Merge tag 'x86-cpu-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 4 Aug 2020 00:08:02 +0000 (17:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 4 Aug 2020 00:08:02 +0000 (17:08 -0700)
Pull x86 cpu updates from Ingo Molar:

 - prepare for Intel's new SERIALIZE instruction

 - enable split-lock debugging on more CPUs

 - add more Intel CPU models

 - optimize stack canary initialization a bit

 - simplify the Spectre logic a bit

* tag 'x86-cpu-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/cpu: Refactor sync_core() for readability
  x86/cpu: Relocate sync_core() to sync_core.h
  x86/cpufeatures: Add enumeration for SERIALIZE instruction
  x86/split_lock: Enable the split lock feature on Sapphire Rapids and Alder Lake CPUs
  x86/cpu: Add Lakefield, Alder Lake and Rocket Lake models to the to Intel CPU family
  x86/stackprotector: Pre-initialize canary for secondary CPUs
  x86/speculation: Merge one test in spectre_v2_user_select_mitigation()

1345 files changed:
.gitignore
Documentation/ABI/testing/sysfs-devices-mapping [new file with mode: 0644]
Documentation/RCU/Design/Requirements/Requirements.rst
Documentation/RCU/checklist.rst [new file with mode: 0644]
Documentation/RCU/checklist.txt [deleted file]
Documentation/RCU/index.rst
Documentation/RCU/lockdep-splat.rst [new file with mode: 0644]
Documentation/RCU/lockdep-splat.txt [deleted file]
Documentation/RCU/lockdep.rst [new file with mode: 0644]
Documentation/RCU/lockdep.txt [deleted file]
Documentation/RCU/rculist_nulls.rst [new file with mode: 0644]
Documentation/RCU/rculist_nulls.txt [deleted file]
Documentation/RCU/rcuref.rst [new file with mode: 0644]
Documentation/RCU/rcuref.txt [deleted file]
Documentation/RCU/stallwarn.rst [new file with mode: 0644]
Documentation/RCU/stallwarn.txt [deleted file]
Documentation/RCU/torture.rst [new file with mode: 0644]
Documentation/RCU/torture.txt [deleted file]
Documentation/admin-guide/cgroup-v2.rst
Documentation/admin-guide/ext4.rst
Documentation/admin-guide/kdump/vmcoreinfo.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/sysctl/kernel.rst
Documentation/atomic_t.txt
Documentation/block/biodoc.rst
Documentation/block/writeback_cache_control.rst
Documentation/cdrom/cdrom-standard.rst
Documentation/core-api/padata.rst
Documentation/crypto/api-intro.txt
Documentation/crypto/userspace-if.rst
Documentation/dev-tools/kcsan.rst
Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/media/allwinner,sun4i-a10-video-engine.yaml
Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
Documentation/devicetree/bindings/rng/imx-rng.txt
Documentation/devicetree/bindings/rng/ingenic,rng.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml [new file with mode: 0644]
Documentation/dontdiff
Documentation/fault-injection/fault-injection.rst
Documentation/features/core/cBPF-JIT/arch-support.txt
Documentation/features/core/eBPF-JIT/arch-support.txt
Documentation/features/core/generic-idle-thread/arch-support.txt
Documentation/features/core/jump-labels/arch-support.txt
Documentation/features/core/tracehook/arch-support.txt
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/debug/debug-vm-pgtable/arch-support.txt
Documentation/features/debug/gcov-profile-all/arch-support.txt
Documentation/features/debug/kgdb/arch-support.txt
Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
Documentation/features/debug/kprobes/arch-support.txt
Documentation/features/debug/kretprobes/arch-support.txt
Documentation/features/debug/optprobes/arch-support.txt
Documentation/features/debug/stackprotector/arch-support.txt
Documentation/features/debug/uprobes/arch-support.txt
Documentation/features/debug/user-ret-profiler/arch-support.txt
Documentation/features/io/dma-contiguous/arch-support.txt
Documentation/features/locking/cmpxchg-local/arch-support.txt
Documentation/features/locking/lockdep/arch-support.txt
Documentation/features/locking/queued-rwlocks/arch-support.txt
Documentation/features/locking/queued-spinlocks/arch-support.txt
Documentation/features/perf/kprobes-event/arch-support.txt
Documentation/features/perf/perf-regs/arch-support.txt
Documentation/features/perf/perf-stackdump/arch-support.txt
Documentation/features/sched/membarrier-sync-core/arch-support.txt
Documentation/features/sched/numa-balancing/arch-support.txt
Documentation/features/seccomp/seccomp-filter/arch-support.txt
Documentation/features/time/arch-tick-broadcast/arch-support.txt
Documentation/features/time/clockevents/arch-support.txt
Documentation/features/time/context-tracking/arch-support.txt
Documentation/features/time/irq-time-acct/arch-support.txt
Documentation/features/time/modern-timekeeping/arch-support.txt
Documentation/features/time/virt-cpuacct/arch-support.txt
Documentation/features/vm/ELF-ASLR/arch-support.txt
Documentation/features/vm/PG_uncached/arch-support.txt
Documentation/features/vm/THP/arch-support.txt
Documentation/features/vm/TLB/arch-support.txt
Documentation/features/vm/huge-vmap/arch-support.txt
Documentation/features/vm/ioremap_prot/arch-support.txt
Documentation/features/vm/pte_special/arch-support.txt
Documentation/filesystems/f2fs.rst
Documentation/filesystems/fscrypt.rst
Documentation/filesystems/locking.rst
Documentation/litmus-tests/README [new file with mode: 0644]
Documentation/litmus-tests/atomic/Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus [new file with mode: 0644]
Documentation/litmus-tests/atomic/Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus [new file with mode: 0644]
Documentation/litmus-tests/rcu/RCU+sync+free.litmus [new file with mode: 0644]
Documentation/litmus-tests/rcu/RCU+sync+read.litmus [new file with mode: 0644]
Documentation/locking/index.rst
Documentation/locking/locktorture.rst
Documentation/locking/mutex-design.rst
Documentation/locking/seqlock.rst [new file with mode: 0644]
Documentation/memory-barriers.txt
Documentation/networking/bareudp.rst
Documentation/networking/devlink/devlink-trap.rst
Documentation/s390/s390dbf.rst
Documentation/scheduler/index.rst
Documentation/scheduler/sched-capacity.rst [new file with mode: 0644]
Documentation/scheduler/sched-energy.rst
Documentation/trace/ftrace.rst
Documentation/translations/ko_KR/memory-barriers.txt
Documentation/x86/boot.rst
MAINTAINERS
Makefile
arch/alpha/include/asm/atomic.h
arch/alpha/include/asm/barrier.h
arch/alpha/include/asm/pgtable.h
arch/alpha/include/asm/rwonce.h [new file with mode: 0644]
arch/arc/include/asm/atomic.h
arch/arm/boot/dts/armada-38x.dtsi
arch/arm/boot/dts/imx6qdl-icore.dtsi
arch/arm/boot/dts/imx6sx-sabreauto.dts
arch/arm/boot/dts/imx6sx-sdb.dtsi
arch/arm/boot/dts/keystone-k2g-evm.dts
arch/arm/boot/dts/sun4i-a10.dtsi
arch/arm/boot/dts/sun5i.dtsi
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/crypto/crc32-ce-core.S
arch/arm/crypto/ghash-ce-glue.c
arch/arm/crypto/sha1-armv4-large.S
arch/arm/crypto/sha256-armv4.pl
arch/arm/crypto/sha256-core.S_shipped
arch/arm/crypto/sha512-armv4.pl
arch/arm/crypto/sha512-core.S_shipped
arch/arm/include/asm/atomic.h
arch/arm/include/asm/percpu.h
arch/arm/include/asm/thread_info.h
arch/arm/include/asm/topology.h
arch/arm/include/asm/vdso/gettimeofday.h
arch/arm/kernel/hw_breakpoint.c
arch/arm/kernel/vdso.c
arch/arm/mm/mmu.c
arch/arm64/Kconfig
arch/arm64/Makefile
arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
arch/arm64/configs/defconfig
arch/arm64/crypto/ghash-ce-glue.c
arch/arm64/include/asm/acpi.h
arch/arm64/include/asm/alternative.h
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/checksum.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/hugetlb.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/kernel-pgtable.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/perf_event.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/smp.h
arch/arm64/include/asm/stage2_pgtable.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/tlb.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/include/asm/topology.h
arch/arm64/include/asm/uaccess.h
arch/arm64/include/asm/vdso.h
arch/arm64/include/asm/vdso/compat_gettimeofday.h
arch/arm64/include/asm/vdso/gettimeofday.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/include/uapi/asm/sigcontext.h
arch/arm64/kernel/acpi.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/crash_core.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/module-plts.c
arch/arm64/kernel/perf_event.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/stacktrace.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vdso.c
arch/arm64/kernel/vdso/vdso.lds.S
arch/arm64/kernel/vdso32/vdso.lds.S
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/hyp-init.S
arch/arm64/kvm/mmu.c
arch/arm64/kvm/sys_regs.c
arch/arm64/mm/context.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/init.c
arch/h8300/include/asm/atomic.h
arch/hexagon/include/asm/atomic.h
arch/ia64/include/asm/atomic.h
arch/m68k/Kbuild [new file with mode: 0644]
arch/m68k/Makefile
arch/m68k/configs/amiga_defconfig
arch/m68k/configs/apollo_defconfig
arch/m68k/configs/atari_defconfig
arch/m68k/configs/bvme6000_defconfig
arch/m68k/configs/hp300_defconfig
arch/m68k/configs/mac_defconfig
arch/m68k/configs/multi_defconfig
arch/m68k/configs/mvme147_defconfig
arch/m68k/configs/mvme16x_defconfig
arch/m68k/configs/q40_defconfig
arch/m68k/configs/sun3_defconfig
arch/m68k/configs/sun3x_defconfig
arch/m68k/emu/nfblock.c
arch/m68k/include/asm/atomic.h
arch/m68k/include/asm/raw_io.h
arch/m68k/kernel/signal.c
arch/m68k/mac/iop.c
arch/m68k/sun3/Makefile
arch/mips/include/asm/atomic.h
arch/mips/pci/pci-xtalk-bridge.c
arch/parisc/include/asm/atomic.h
arch/powerpc/include/asm/atomic.h
arch/powerpc/include/asm/dtl.h [new file with mode: 0644]
arch/powerpc/include/asm/lppaca.h
arch/powerpc/include/asm/paca.h
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/time.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/platforms/pseries/dtl.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/svm.c
arch/riscv/include/asm/atomic.h
arch/riscv/include/asm/vdso/gettimeofday.h
arch/s390/Kconfig
arch/s390/appldata/appldata_os.c
arch/s390/include/asm/asm-const.h [new file with mode: 0644]
arch/s390/include/asm/atomic.h
arch/s390/include/asm/debug.h
arch/s390/include/asm/extable.h
arch/s390/include/asm/linkage.h
arch/s390/include/asm/pci_dma.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/ptrace.h
arch/s390/include/asm/smp.h
arch/s390/include/asm/syscall_wrapper.h
arch/s390/include/asm/thread_info.h
arch/s390/include/asm/timex.h
arch/s390/include/uapi/asm/debug.h [deleted file]
arch/s390/include/uapi/asm/zcrypt.h
arch/s390/kernel/crash_dump.c
arch/s390/kernel/debug.c
arch/s390/kernel/entry.S
arch/s390/kernel/idle.c
arch/s390/kernel/kprobes.c
arch/s390/kernel/lgr.c
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/kernel/time.c
arch/s390/kernel/topology.c
arch/s390/kernel/traps.c
arch/s390/lib/Makefile
arch/s390/lib/error-inject.c [new file with mode: 0644]
arch/s390/mm/cmm.c
arch/s390/mm/extmem.c
arch/s390/mm/fault.c
arch/s390/mm/vmem.c
arch/s390/net/bpf_jit_comp.c
arch/s390/pci/pci_mmio.c
arch/sh/include/asm/atomic.h
arch/sh/include/asm/pgalloc.h
arch/sh/kernel/entry-common.S
arch/sparc/crypto/sha256_glue.c
arch/sparc/include/asm/atomic_32.h
arch/sparc/include/asm/atomic_64.h
arch/sparc/include/asm/percpu_64.h
arch/sparc/include/asm/trap_block.h
arch/unicore32/.gitignore [deleted file]
arch/unicore32/Kconfig [deleted file]
arch/unicore32/Kconfig.debug [deleted file]
arch/unicore32/Makefile [deleted file]
arch/unicore32/boot/Makefile [deleted file]
arch/unicore32/boot/compressed/Makefile [deleted file]
arch/unicore32/boot/compressed/head.S [deleted file]
arch/unicore32/boot/compressed/misc.c [deleted file]
arch/unicore32/boot/compressed/piggy.S.in [deleted file]
arch/unicore32/boot/compressed/vmlinux.lds.S [deleted file]
arch/unicore32/configs/defconfig [deleted file]
arch/unicore32/include/asm/Kbuild [deleted file]
arch/unicore32/include/asm/assembler.h [deleted file]
arch/unicore32/include/asm/barrier.h [deleted file]
arch/unicore32/include/asm/bitops.h [deleted file]
arch/unicore32/include/asm/bug.h [deleted file]
arch/unicore32/include/asm/cache.h [deleted file]
arch/unicore32/include/asm/cacheflush.h [deleted file]
arch/unicore32/include/asm/checksum.h [deleted file]
arch/unicore32/include/asm/cmpxchg.h [deleted file]
arch/unicore32/include/asm/cpu-single.h [deleted file]
arch/unicore32/include/asm/cputype.h [deleted file]
arch/unicore32/include/asm/delay.h [deleted file]
arch/unicore32/include/asm/dma.h [deleted file]
arch/unicore32/include/asm/elf.h [deleted file]
arch/unicore32/include/asm/fpstate.h [deleted file]
arch/unicore32/include/asm/fpu-ucf64.h [deleted file]
arch/unicore32/include/asm/gpio.h [deleted file]
arch/unicore32/include/asm/hwcap.h [deleted file]
arch/unicore32/include/asm/hwdef-copro.h [deleted file]
arch/unicore32/include/asm/io.h [deleted file]
arch/unicore32/include/asm/irq.h [deleted file]
arch/unicore32/include/asm/irqflags.h [deleted file]
arch/unicore32/include/asm/linkage.h [deleted file]
arch/unicore32/include/asm/memblock.h [deleted file]
arch/unicore32/include/asm/memory.h [deleted file]
arch/unicore32/include/asm/mmu.h [deleted file]
arch/unicore32/include/asm/mmu_context.h [deleted file]
arch/unicore32/include/asm/page.h [deleted file]
arch/unicore32/include/asm/pci.h [deleted file]
arch/unicore32/include/asm/pgalloc.h [deleted file]
arch/unicore32/include/asm/pgtable-hwdef.h [deleted file]
arch/unicore32/include/asm/pgtable.h [deleted file]
arch/unicore32/include/asm/processor.h [deleted file]
arch/unicore32/include/asm/ptrace.h [deleted file]
arch/unicore32/include/asm/stacktrace.h [deleted file]
arch/unicore32/include/asm/string.h [deleted file]
arch/unicore32/include/asm/suspend.h [deleted file]
arch/unicore32/include/asm/switch_to.h [deleted file]
arch/unicore32/include/asm/syscall.h [deleted file]
arch/unicore32/include/asm/thread_info.h [deleted file]
arch/unicore32/include/asm/timex.h [deleted file]
arch/unicore32/include/asm/tlb.h [deleted file]
arch/unicore32/include/asm/tlbflush.h [deleted file]
arch/unicore32/include/asm/traps.h [deleted file]
arch/unicore32/include/asm/uaccess.h [deleted file]
arch/unicore32/include/asm/vmalloc.h [deleted file]
arch/unicore32/include/mach/PKUnity.h [deleted file]
arch/unicore32/include/mach/bitfield.h [deleted file]
arch/unicore32/include/mach/dma.h [deleted file]
arch/unicore32/include/mach/hardware.h [deleted file]
arch/unicore32/include/mach/map.h [deleted file]
arch/unicore32/include/mach/memory.h [deleted file]
arch/unicore32/include/mach/ocd.h [deleted file]
arch/unicore32/include/mach/pm.h [deleted file]
arch/unicore32/include/mach/regs-ac97.h [deleted file]
arch/unicore32/include/mach/regs-dmac.h [deleted file]
arch/unicore32/include/mach/regs-gpio.h [deleted file]
arch/unicore32/include/mach/regs-i2c.h [deleted file]
arch/unicore32/include/mach/regs-intc.h [deleted file]
arch/unicore32/include/mach/regs-nand.h [deleted file]
arch/unicore32/include/mach/regs-ost.h [deleted file]
arch/unicore32/include/mach/regs-pci.h [deleted file]
arch/unicore32/include/mach/regs-pm.h [deleted file]
arch/unicore32/include/mach/regs-ps2.h [deleted file]
arch/unicore32/include/mach/regs-resetc.h [deleted file]
arch/unicore32/include/mach/regs-rtc.h [deleted file]
arch/unicore32/include/mach/regs-sdc.h [deleted file]
arch/unicore32/include/mach/regs-spi.h [deleted file]
arch/unicore32/include/mach/regs-uart.h [deleted file]
arch/unicore32/include/mach/regs-umal.h [deleted file]
arch/unicore32/include/mach/regs-unigfx.h [deleted file]
arch/unicore32/include/mach/uncompress.h [deleted file]
arch/unicore32/include/uapi/asm/Kbuild [deleted file]
arch/unicore32/include/uapi/asm/byteorder.h [deleted file]
arch/unicore32/include/uapi/asm/ptrace.h [deleted file]
arch/unicore32/include/uapi/asm/sigcontext.h [deleted file]
arch/unicore32/include/uapi/asm/unistd.h [deleted file]
arch/unicore32/kernel/Makefile [deleted file]
arch/unicore32/kernel/asm-offsets.c [deleted file]
arch/unicore32/kernel/clock.c [deleted file]
arch/unicore32/kernel/debug-macro.S [deleted file]
arch/unicore32/kernel/debug.S [deleted file]
arch/unicore32/kernel/dma.c [deleted file]
arch/unicore32/kernel/early_printk.c [deleted file]
arch/unicore32/kernel/elf.c [deleted file]
arch/unicore32/kernel/entry.S [deleted file]
arch/unicore32/kernel/fpu-ucf64.c [deleted file]
arch/unicore32/kernel/gpio.c [deleted file]
arch/unicore32/kernel/head.S [deleted file]
arch/unicore32/kernel/hibernate.c [deleted file]
arch/unicore32/kernel/hibernate_asm.S [deleted file]
arch/unicore32/kernel/irq.c [deleted file]
arch/unicore32/kernel/ksyms.c [deleted file]
arch/unicore32/kernel/ksyms.h [deleted file]
arch/unicore32/kernel/module.c [deleted file]
arch/unicore32/kernel/pci.c [deleted file]
arch/unicore32/kernel/pm.c [deleted file]
arch/unicore32/kernel/process.c [deleted file]
arch/unicore32/kernel/ptrace.c [deleted file]
arch/unicore32/kernel/puv3-core.c [deleted file]
arch/unicore32/kernel/puv3-nb0916.c [deleted file]
arch/unicore32/kernel/setup.c [deleted file]
arch/unicore32/kernel/setup.h [deleted file]
arch/unicore32/kernel/signal.c [deleted file]
arch/unicore32/kernel/sleep.S [deleted file]
arch/unicore32/kernel/stacktrace.c [deleted file]
arch/unicore32/kernel/sys.c [deleted file]
arch/unicore32/kernel/time.c [deleted file]
arch/unicore32/kernel/traps.c [deleted file]
arch/unicore32/kernel/vmlinux.lds.S [deleted file]
arch/unicore32/lib/Makefile [deleted file]
arch/unicore32/lib/backtrace.S [deleted file]
arch/unicore32/lib/clear_user.S [deleted file]
arch/unicore32/lib/copy_from_user.S [deleted file]
arch/unicore32/lib/copy_page.S [deleted file]
arch/unicore32/lib/copy_template.S [deleted file]
arch/unicore32/lib/copy_to_user.S [deleted file]
arch/unicore32/lib/delay.S [deleted file]
arch/unicore32/lib/findbit.S [deleted file]
arch/unicore32/lib/strncpy_from_user.S [deleted file]
arch/unicore32/lib/strnlen_user.S [deleted file]
arch/unicore32/mm/Kconfig [deleted file]
arch/unicore32/mm/Makefile [deleted file]
arch/unicore32/mm/alignment.c [deleted file]
arch/unicore32/mm/cache-ucv2.S [deleted file]
arch/unicore32/mm/extable.c [deleted file]
arch/unicore32/mm/fault.c [deleted file]
arch/unicore32/mm/flush.c [deleted file]
arch/unicore32/mm/init.c [deleted file]
arch/unicore32/mm/ioremap.c [deleted file]
arch/unicore32/mm/mm.h [deleted file]
arch/unicore32/mm/mmu.c [deleted file]
arch/unicore32/mm/pgd.c [deleted file]
arch/unicore32/mm/proc-macros.S [deleted file]
arch/unicore32/mm/proc-syms.c [deleted file]
arch/unicore32/mm/proc-ucv2.S [deleted file]
arch/unicore32/mm/tlb-ucv2.S [deleted file]
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/Makefile
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/kaslr.c
arch/x86/boot/compressed/misc.c
arch/x86/boot/header.S
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/crypto/aes_ctrby8_avx-x86_64.S
arch/x86/crypto/aesni-intel_asm.S
arch/x86/crypto/aesni-intel_avx-x86_64.S
arch/x86/crypto/chacha-ssse3-x86_64.S
arch/x86/crypto/chacha_glue.c
arch/x86/crypto/crc32-pclmul_asm.S
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/crypto/curve25519-x86_64.c
arch/x86/crypto/ghash-clmulni-intel_asm.S
arch/x86/entry/common.c
arch/x86/events/amd/power.c
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore.h
arch/x86/events/intel/uncore_snb.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/events/perf_event.h
arch/x86/events/rapl.c
arch/x86/events/zhaoxin/core.c
arch/x86/include/asm/asm.h
arch/x86/include/asm/atomic.h
arch/x86/include/asm/boot.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/div64.h
arch/x86/include/asm/fpu/internal.h
arch/x86/include/asm/fpu/types.h
arch/x86/include/asm/fpu/xstate.h
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/inst.h
arch/x86/include/asm/io_apic.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/kprobes.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/sparsemem.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/tsc.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/uv/uv_hub.h
arch/x86/include/uapi/asm/bootparam.h
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/i8259.c
arch/x86/kernel/idt.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/kvm.c
arch/x86/kernel/nmi.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c
arch/x86/kvm/lapic.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/nested.h
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/pat/set_memory.c
arch/x86/xen/enlighten_pv.c
arch/xtensa/include/asm/atomic.h
arch/xtensa/platforms/iss/simdisk.c
block/Makefile
block/bfq-iosched.c
block/bio.c
block/blk-cgroup.c
block/blk-core.c
block/blk-crypto-fallback.c
block/blk-crypto.c
block/blk-flush.c
block/blk-ioc.c
block/blk-iocost.c
block/blk-iolatency.c
block/blk-lib.c
block/blk-merge.c
block/blk-mq-debugfs.c
block/blk-mq-sched.c
block/blk-mq-tag.c
block/blk-mq-tag.h
block/blk-mq.c
block/blk-mq.h
block/blk-softirq.c [deleted file]
block/blk-sysfs.c
block/blk-throttle.c
block/blk-timeout.c
block/blk.h
block/bounce.c
block/bsg-lib.c
block/elevator.c
block/genhd.c
block/partitions/core.c
crypto/Kconfig
crypto/acompress.c
crypto/adiantum.c
crypto/af_alg.c
crypto/algapi.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/api.c
crypto/authenc.c
crypto/authencesn.c
crypto/blake2b_generic.c
crypto/camellia_generic.c
crypto/ccm.c
crypto/chacha20poly1305.c
crypto/cmac.c
crypto/cryptd.c
crypto/ctr.c
crypto/cts.c
crypto/dh.c
crypto/ecc.c
crypto/ecc.h
crypto/echainiv.c
crypto/essiv.c
crypto/gcm.c
crypto/geniv.c
crypto/hmac.c
crypto/internal.h
crypto/jitterentropy.c
crypto/lrw.c
crypto/pcrypt.c
crypto/rsa-pkcs1pad.c
crypto/salsa20_generic.c
crypto/seqiv.c
crypto/sha3_generic.c
crypto/simd.c
crypto/skcipher.c
crypto/testmgr.h
crypto/vmac.c
crypto/xcbc.c
crypto/xts.c
drivers/acpi/arm64/iort.c
drivers/acpi/scan.c
drivers/atm/atmtcp.c
drivers/base/arch_topology.c
drivers/block/brd.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_proc.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_worker.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/nbd.c
drivers/block/null_blk_main.c
drivers/block/pktcdvd.c
drivers/block/ps3vram.c
drivers/block/rsxx/dev.c
drivers/block/skd_main.c
drivers/block/umem.c
drivers/block/virtio_blk.c
drivers/block/xen-blkfront.c
drivers/block/zram/zram_drv.c
drivers/bus/fsl-mc/dprc-driver.c
drivers/bus/fsl-mc/fsl-mc-bus.c
drivers/bus/fsl-mc/fsl-mc-msi.c
drivers/bus/fsl-mc/fsl-mc-private.h
drivers/cdrom/cdrom.c
drivers/char/hw_random/Kconfig
drivers/char/hw_random/Makefile
drivers/char/hw_random/ba431-rng.c [new file with mode: 0644]
drivers/char/hw_random/bcm2835-rng.c
drivers/char/hw_random/core.c
drivers/char/hw_random/hisi-rng.c
drivers/char/hw_random/ingenic-rng.c [new file with mode: 0644]
drivers/char/hw_random/ks-sa-rng.c
drivers/char/hw_random/nomadik-rng.c
drivers/char/hw_random/npcm-rng.c
drivers/char/hw_random/octeon-rng.c
drivers/char/hw_random/omap-rng.c
drivers/char/hw_random/pic32-rng.c
drivers/char/hw_random/st-rng.c
drivers/char/hw_random/virtio-rng.c
drivers/char/random.c
drivers/char/tpm/eventlog/acpi.c
drivers/char/tpm/tpm-chip.c
drivers/char/tpm/tpm.h
drivers/char/tpm/tpm2-space.c
drivers/char/tpm/tpmrm-dev.c
drivers/cpufreq/Makefile
drivers/cpufreq/unicore2-cpufreq.c [deleted file]
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
drivers/crypto/amlogic/Kconfig
drivers/crypto/amlogic/amlogic-gxl-cipher.c
drivers/crypto/amlogic/amlogic-gxl-core.c
drivers/crypto/amlogic/amlogic-gxl.h
drivers/crypto/axis/artpec6_crypto.c
drivers/crypto/bcm/cipher.c
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamalg_qi.c
drivers/crypto/caam/caamalg_qi2.c
drivers/crypto/caam/caamhash.c
drivers/crypto/caam/compat.h
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/dpseci.c
drivers/crypto/caam/dpseci.h
drivers/crypto/caam/dpseci_cmd.h
drivers/crypto/caam/error.c
drivers/crypto/caam/jr.c
drivers/crypto/caam/regs.h
drivers/crypto/cavium/cpt/cptvf_algs.c
drivers/crypto/cavium/cpt/cptvf_reqmanager.c
drivers/crypto/cavium/cpt/request_manager.h
drivers/crypto/cavium/nitrox/nitrox_aead.c
drivers/crypto/cavium/nitrox/nitrox_skcipher.c
drivers/crypto/ccp/ccp-crypto-aes-cmac.c
drivers/crypto/ccp/ccp-crypto-aes-galois.c
drivers/crypto/ccp/ccp-crypto-aes-xts.c
drivers/crypto/ccp/ccp-crypto-aes.c
drivers/crypto/ccp/ccp-crypto-des3.c
drivers/crypto/ccp/ccp-crypto-sha.c
drivers/crypto/ccp/ccp-crypto.h
drivers/crypto/ccp/ccp-dev-v5.c
drivers/crypto/ccp/ccp-dev.c
drivers/crypto/ccp/ccp-dev.h
drivers/crypto/ccp/ccp-ops.c
drivers/crypto/ccp/sp-dev.c
drivers/crypto/ccp/sp-dev.h
drivers/crypto/ccp/sp-pci.c
drivers/crypto/ccp/sp-platform.c
drivers/crypto/ccree/cc_cipher.c
drivers/crypto/chelsio/chcr_algo.c
drivers/crypto/chelsio/chcr_crypto.h
drivers/crypto/hisilicon/hpre/hpre_main.c
drivers/crypto/hisilicon/qm.c
drivers/crypto/hisilicon/qm.h
drivers/crypto/hisilicon/sec/sec_algs.c
drivers/crypto/hisilicon/sec2/sec.h
drivers/crypto/hisilicon/sec2/sec_crypto.c
drivers/crypto/hisilicon/sec2/sec_main.c
drivers/crypto/hisilicon/zip/zip.h
drivers/crypto/hisilicon/zip/zip_crypto.c
drivers/crypto/hisilicon/zip/zip_main.c
drivers/crypto/img-hash.c
drivers/crypto/inside-secure/safexcel.c
drivers/crypto/inside-secure/safexcel.h
drivers/crypto/inside-secure/safexcel_cipher.c
drivers/crypto/inside-secure/safexcel_hash.c
drivers/crypto/ixp4xx_crypto.c
drivers/crypto/marvell/cesa/cesa.c
drivers/crypto/marvell/cesa/cesa.h
drivers/crypto/marvell/cesa/cipher.c
drivers/crypto/marvell/cesa/hash.c
drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h
drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
drivers/crypto/marvell/octeontx/otx_cptvf_algs.h
drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c
drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h
drivers/crypto/mediatek/mtk-aes.c
drivers/crypto/mxs-dcp.c
drivers/crypto/n2_core.c
drivers/crypto/omap-aes.c
drivers/crypto/omap-aes.h
drivers/crypto/omap-des.c
drivers/crypto/omap-sham.c
drivers/crypto/picoxcell_crypto.c
drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
drivers/crypto/qat/qat_c3xxx/adf_drv.c
drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
drivers/crypto/qat/qat_c62x/adf_drv.c
drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
drivers/crypto/qat/qat_c62xvf/adf_drv.c
drivers/crypto/qat/qat_common/adf_accel_devices.h
drivers/crypto/qat/qat_common/adf_accel_engine.c
drivers/crypto/qat/qat_common/adf_admin.c
drivers/crypto/qat/qat_common/adf_aer.c
drivers/crypto/qat/qat_common/adf_cfg.c
drivers/crypto/qat/qat_common/adf_cfg.h
drivers/crypto/qat/qat_common/adf_cfg_common.h
drivers/crypto/qat/qat_common/adf_cfg_strings.h
drivers/crypto/qat/qat_common/adf_cfg_user.h
drivers/crypto/qat/qat_common/adf_common_drv.h
drivers/crypto/qat/qat_common/adf_ctl_drv.c
drivers/crypto/qat/qat_common/adf_dev_mgr.c
drivers/crypto/qat/qat_common/adf_hw_arbiter.c
drivers/crypto/qat/qat_common/adf_init.c
drivers/crypto/qat/qat_common/adf_isr.c
drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
drivers/crypto/qat/qat_common/adf_sriov.c
drivers/crypto/qat/qat_common/adf_transport.c
drivers/crypto/qat/qat_common/adf_transport.h
drivers/crypto/qat/qat_common/adf_transport_access_macros.h
drivers/crypto/qat/qat_common/adf_transport_debug.c
drivers/crypto/qat/qat_common/adf_transport_internal.h
drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
drivers/crypto/qat/qat_common/adf_vf_isr.c
drivers/crypto/qat/qat_common/icp_qat_fw.h
drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
drivers/crypto/qat/qat_common/icp_qat_fw_la.h
drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
drivers/crypto/qat/qat_common/icp_qat_fw_pke.h
drivers/crypto/qat/qat_common/icp_qat_hal.h
drivers/crypto/qat/qat_common/icp_qat_hw.h
drivers/crypto/qat/qat_common/icp_qat_uclo.h
drivers/crypto/qat/qat_common/qat_algs.c
drivers/crypto/qat/qat_common/qat_asym_algs.c
drivers/crypto/qat/qat_common/qat_crypto.c
drivers/crypto/qat/qat_common/qat_crypto.h
drivers/crypto/qat/qat_common/qat_hal.c
drivers/crypto/qat/qat_common/qat_uclo.c
drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
drivers/crypto/qat/qat_dh895xcc/adf_drv.c
drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
drivers/crypto/qce/cipher.h
drivers/crypto/qce/common.h
drivers/crypto/qce/sha.c
drivers/crypto/qce/skcipher.c
drivers/crypto/sa2ul.c [new file with mode: 0644]
drivers/crypto/sa2ul.h [new file with mode: 0644]
drivers/crypto/sahara.c
drivers/crypto/talitos.c
drivers/crypto/ux500/hash/hash_core.c
drivers/crypto/virtio/virtio_crypto_algs.c
drivers/crypto/virtio/virtio_crypto_core.c
drivers/crypto/xilinx/zynqmp-aes-gcm.c
drivers/dax/super.c
drivers/firmware/efi/embedded-firmware.c
drivers/firmware/qemu_fw_cfg.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/bochs/bochs_kms.c
drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
drivers/gpu/drm/bridge/nwl-dsi.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/drm_gem.c
drivers/gpu/drm/drm_mipi_dbi.c
drivers/gpu/drm/drm_of.c
drivers/gpu/drm/mcde/mcde_display.c
drivers/gpu/drm/nouveau/dispnv50/disp.c
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c
drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
drivers/gpu/drm/panel/panel-simple.c
drivers/hwmon/fam15h_power.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-puv3.c [deleted file]
drivers/i2c/i2c-core-slave.c
drivers/infiniband/core/cq.c
drivers/infiniband/core/ucma.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rdmavt/rc.c
drivers/input/serio/i8042-unicore32io.h [deleted file]
drivers/input/serio/i8042.h
drivers/iommu/intel/irq_remapping.c
drivers/iommu/of_iommu.c
drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
drivers/irqchip/irq-gic-v3-its.c
drivers/lightnvm/core.c
drivers/lightnvm/pblk-init.c
drivers/lightnvm/pblk-read.c
drivers/md/bcache/bcache.h
drivers/md/bcache/btree.c
drivers/md/bcache/request.c
drivers/md/bcache/request.h
drivers/md/bcache/super.c
drivers/md/dm-cache-target.c
drivers/md/dm-clone-target.c
drivers/md/dm-crypt.c
drivers/md/dm-delay.c
drivers/md/dm-era-target.c
drivers/md/dm-integrity.c
drivers/md/dm-mpath.c
drivers/md/dm-raid.c
drivers/md/dm-raid1.c
drivers/md/dm-rq.c
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap.c
drivers/md/dm-table.c
drivers/md/dm-thin.c
drivers/md/dm-verity-target.c
drivers/md/dm-writecache.c
drivers/md/dm-zoned-target.c
drivers/md/dm.c
drivers/md/dm.h
drivers/md/md-faulty.c
drivers/md/md-linear.c
drivers/md/md-multipath.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/mfd/ioc3.c
drivers/mmc/core/block.c
drivers/mtd/mtdchar.c
drivers/net/bareudp.c
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/usb/hso.c
drivers/net/usb/lan78xx.c
drivers/net/vxlan.c
drivers/nvdimm/blk.c
drivers/nvdimm/btt.c
drivers/nvdimm/pmem.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/core.c
drivers/nvme/target/loop.c
drivers/of/base.c
drivers/of/device.c
drivers/of/irq.c
drivers/pci/controller/vmd.c
drivers/pci/msi.c
drivers/pci/pci-driver.c
drivers/pci/quirks.c
drivers/perf/arm_smmuv3_pmu.c
drivers/pinctrl/qcom/Kconfig
drivers/pinctrl/qcom/pinctrl-msm.c
drivers/pinctrl/qcom/pinctrl-msm.h
drivers/pinctrl/qcom/pinctrl-sc7180.c
drivers/pwm/Kconfig
drivers/pwm/Makefile
drivers/pwm/pwm-puv3.c [deleted file]
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-puv3.c [deleted file]
drivers/s390/block/dasd.c
drivers/s390/block/dcssblk.c
drivers/s390/block/scm_blk.c
drivers/s390/block/xpram.c
drivers/s390/char/tty3270.c
drivers/s390/char/zcore.c
drivers/s390/cio/qdio.h
drivers/s390/cio/qdio_debug.c
drivers/s390/cio/qdio_main.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/ap_queue.c
drivers/s390/crypto/pkey_api.c
drivers/s390/crypto/zcrypt_api.c
drivers/s390/crypto/zcrypt_ccamisc.c
drivers/s390/crypto/zcrypt_cex2c.c
drivers/s390/crypto/zcrypt_cex4.c
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/s390/crypto/zcrypt_msgtype6.h
drivers/s390/crypto/zcrypt_queue.c
drivers/scsi/scsi_lib.c
drivers/tty/sysrq.c
drivers/vhost/scsi.c
drivers/vhost/vhost.c
drivers/video/fbdev/Kconfig
drivers/video/fbdev/Makefile
drivers/video/fbdev/fb-puv3.c [deleted file]
drivers/virtio/Kconfig
drivers/virtio/virtio_balloon.c
fs/adfs/super.c
fs/affs/file.c
fs/befs/linuxvfs.c
fs/block_dev.c
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/btrfs_inode.h
fs/btrfs/check-integrity.c
fs/btrfs/compression.c
fs/btrfs/compression.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delalloc-space.c
fs/btrfs/delalloc-space.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-io-tree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/raid56.c
fs/btrfs/ref-verify.c
fs/btrfs/reflink.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/space-info.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/free-space-tree-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-defrag.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/buffer.c
fs/crypto/Kconfig
fs/crypto/Makefile
fs/crypto/bio.c
fs/crypto/crypto.c
fs/crypto/fname.c
fs/crypto/fscrypt_private.h
fs/crypto/inline_crypt.c [new file with mode: 0644]
fs/crypto/keyring.c
fs/crypto/keysetup.c
fs/crypto/keysetup_v1.c
fs/crypto/policy.c
fs/direct-io.c
fs/efs/super.c
fs/ext4/inode.c
fs/ext4/page-io.c
fs/ext4/readpage.c
fs/ext4/super.c
fs/f2fs/compress.c
fs/f2fs/data.c
fs/f2fs/super.c
fs/hfs/inode.c
fs/internal.h
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
fs/isofs/inode.c
fs/jfs/jfs_mount.c
fs/jfs/resize.c
fs/locks.c
fs/ntfs/dir.c
fs/proc/devices.c
fs/quota/dquot.c
fs/reiserfs/procfs.c
fs/userfaultfd.c
fs/verity/open.c
fs/xfs/xfs_file.c
fs/xfs/xfs_pwork.c
include/acpi/acpi_bus.h
include/acpi/actbl3.h
include/asm-generic/Kbuild
include/asm-generic/atomic.h
include/asm-generic/barrier.h
include/asm-generic/bug.h
include/asm-generic/io.h
include/asm-generic/qspinlock.h
include/asm-generic/qspinlock_types.h
include/asm-generic/rwonce.h [new file with mode: 0644]
include/asm-generic/tlb.h
include/asm-generic/vmlinux.lds.h
include/crypto/acompress.h
include/crypto/algapi.h
include/crypto/chacha.h
include/crypto/chacha20poly1305.h
include/crypto/hash.h
include/crypto/if_alg.h
include/crypto/internal/geniv.h
include/crypto/sha.h
include/crypto/skcipher.h
include/drm/drm_mode_config.h
include/linux/acpi.h
include/linux/acpi_iort.h
include/linux/arch_topology.h
include/linux/arm-smccc.h
include/linux/backing-dev-defs.h
include/linux/backing-dev.h
include/linux/bio.h
include/linux/blk-cgroup.h
include/linux/blk-mq.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/buffer_head.h
include/linux/cdrom.h
include/linux/compiler.h
include/linux/compiler_types.h
include/linux/context_tracking.h
include/linux/cpu.h
include/linux/crypto.h
include/linux/dasd_mod.h
include/linux/decompress/unzstd.h [new file with mode: 0644]
include/linux/device-mapper.h
include/linux/fs.h
include/linux/fscrypt.h
include/linux/fsverity.h
include/linux/ftrace.h
include/linux/genhd.h
include/linux/hardirq.h
include/linux/i2c.h
include/linux/instrumentation.h [new file with mode: 0644]
include/linux/irq.h
include/linux/irqflags.h
include/linux/jbd2.h
include/linux/kprobes.h
include/linux/lightnvm.h
include/linux/list.h
include/linux/lockdep.h
include/linux/lockdep_types.h [new file with mode: 0644]
include/linux/math64.h
include/linux/memblock.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mpi.h
include/linux/nospec.h
include/linux/of.h
include/linux/of_device.h
include/linux/of_iommu.h
include/linux/of_irq.h
include/linux/padata.h
include/linux/pagemap.h
include/linux/percpu-refcount.h
include/linux/perf_event.h
include/linux/psi_types.h
include/linux/ptr_ring.h
include/linux/random.h
include/linux/rculist.h
include/linux/rculist_nulls.h
include/linux/rcupdate.h
include/linux/rcupdate_trace.h
include/linux/rcutiny.h
include/linux/rcutree.h
include/linux/rhashtable.h
include/linux/rwsem.h
include/linux/sched.h
include/linux/sched/isolation.h
include/linux/sched/loadavg.h
include/linux/sched/mm.h
include/linux/sched/sysctl.h
include/linux/sched/task.h
include/linux/sched/topology.h
include/linux/sched_clock.h
include/linux/seqlock.h
include/linux/spinlock.h
include/linux/spinlock_types.h
include/linux/torture.h
include/linux/tpm.h
include/linux/tpm_eventlog.h
include/linux/types.h
include/net/addrconf.h
include/net/devlink.h
include/net/xfrm.h
include/rdma/rdmavt_qp.h
include/trace/events/block.h
include/trace/events/btrfs.h
include/trace/events/rcu.h
include/trace/events/sched.h
include/uapi/linux/btrfs.h
include/uapi/linux/btrfs_tree.h
include/uapi/linux/io_uring.h
include/uapi/linux/perf_event.h
include/vdso/datapage.h
init/Kconfig
init/do_mounts_initrd.c
kernel/audit.c
kernel/audit.h
kernel/auditsc.c
kernel/bpf/btf.c
kernel/bpf/hashtab.c
kernel/cgroup/rstat.c
kernel/crash_core.c
kernel/events/core.c
kernel/fork.c
kernel/futex.c
kernel/irq/debugfs.c
kernel/irq/manage.c
kernel/kallsyms.c
kernel/kcsan/Makefile
kernel/kcsan/atomic.h
kernel/kcsan/core.c
kernel/kcsan/kcsan-test.c [new file with mode: 0644]
kernel/kcsan/kcsan.h
kernel/kcsan/report.c
kernel/kcsan/selftest.c [new file with mode: 0644]
kernel/kcsan/test.c [deleted file]
kernel/kprobes.c
kernel/kthread.c
kernel/locking/lockdep.c
kernel/locking/locktorture.c
kernel/locking/osq_lock.c
kernel/padata.c
kernel/rcu/Kconfig.debug
kernel/rcu/Makefile
kernel/rcu/rcuperf.c
kernel/rcu/rcutorture.c
kernel/rcu/refscale.c [new file with mode: 0644]
kernel/rcu/srcutree.c
kernel/rcu/tasks.h
kernel/rcu/tiny.c
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_exp.h
kernel/rcu/tree_plugin.h
kernel/rcu/tree_stall.h
kernel/rcu/update.c
kernel/reboot.c
kernel/sched/core.c
kernel/sched/cpudeadline.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/cputime.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/isolation.c
kernel/sched/loadavg.c
kernel/sched/pelt.c
kernel/sched/pelt.h
kernel/sched/psi.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/stop_task.c
kernel/sched/topology.c
kernel/sched/wait.c
kernel/signal.c
kernel/smp.c
kernel/softirq.c
kernel/sysctl.c
kernel/time/sched_clock.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer.c
kernel/torture.c
kernel/trace/blktrace.c
kernel/trace/ftrace.c
lib/Kconfig
lib/Kconfig.debug
lib/Kconfig.kcsan
lib/Makefile
lib/cpumask.c
lib/crc-t10dif.c
lib/crypto/chacha20poly1305.c
lib/crypto/sha256.c
lib/debugobjects.c
lib/decompress.c
lib/decompress_unzstd.c [new file with mode: 0644]
lib/math/div64.c
lib/mpi/Makefile
lib/mpi/mpi-sub-ui.c [new file with mode: 0644]
lib/random32.c
lib/rhashtable.c
lib/sbitmap.c
lib/test_vmalloc.c
lib/zstd/fse_decompress.c
lib/zstd/zstd_internal.h
mm/backing-dev.c
mm/filemap.c
mm/list_lru.c
mm/memblock.c
mm/memory.c
mm/mmap.c
mm/page_io.c
mm/swapfile.c
net/9p/trans_fd.c
net/bluetooth/hci_event.c
net/bpfilter/bpfilter_kern.c
net/compat.c
net/core/devlink.c
net/core/net-sysfs.c
net/core/sock.c
net/ipv4/fib_trie.c
net/ipv6/anycast.c
net/ipv6/esp6.c
net/ipv6/ipv6_sockglue.c
net/ipv6/route.c
net/key/af_key.c
net/mac80211/cfg.c
net/mac80211/mesh.c
net/mac80211/mesh_pathtbl.c
net/mac80211/sta_info.c
net/mac80211/tx.c
net/mac80211/util.c
net/mptcp/crypto.c
net/mptcp/protocol.c
net/rds/recv.c
net/rxrpc/call_object.c
net/rxrpc/conn_object.c
net/rxrpc/recvmsg.c
net/rxrpc/sendmsg.c
net/sched/act_ct.c
net/wireless/nl80211.c
net/xfrm/espintcp.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_user.c
scripts/Makefile.kcsan
scripts/Makefile.lib
scripts/Makefile.modpost
scripts/checkpatch.pl
scripts/kconfig/.gitignore
scripts/kconfig/Makefile
scripts/kconfig/qconf.cc
scripts/kconfig/qconf.h
scripts/mod/modpost.c
scripts/recordmcount.c
scripts/sorttable.c
security/loadpin/loadpin.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_controller.h
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/cros_ec_codec.c
sound/usb/pcm.c
tools/bpf/Makefile
tools/cgroup/iocost_monitor.py
tools/include/linux/irqflags.h
tools/include/uapi/linux/filter.h [new file with mode: 0644]
tools/include/uapi/linux/perf_event.h
tools/io_uring/liburing.h
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/plugins/Makefile
tools/memory-model/Documentation/explanation.txt
tools/memory-model/Documentation/recipes.txt
tools/memory-model/Documentation/references.txt
tools/memory-model/README
tools/objtool/arch.h
tools/objtool/arch/x86/decode.c
tools/objtool/check.c
tools/objtool/check.h
tools/objtool/elf.c
tools/objtool/elf.h
tools/objtool/orc_gen.c
tools/objtool/special.c
tools/perf/arch/arm/util/auxtrace.c
tools/perf/tests/shell/record+zstd_comp_decomp.sh
tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
tools/testing/selftests/bpf/test_offload.py
tools/testing/selftests/bpf/verifier/event_output.c
tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
tools/testing/selftests/net/forwarding/ethtool.sh
tools/testing/selftests/net/psock_fanout.c
tools/testing/selftests/net/rxtimestamp.c
tools/testing/selftests/net/so_txtime.c
tools/testing/selftests/net/tcp_mmap.c
tools/testing/selftests/rcutorture/bin/configinit.sh
tools/testing/selftests/rcutorture/bin/console-badness.sh [new file with mode: 0755]
tools/testing/selftests/rcutorture/bin/functions.sh
tools/testing/selftests/rcutorture/bin/jitter.sh
tools/testing/selftests/rcutorture/bin/kvm-build.sh
tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh [new file with mode: 0755]
tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh [new file with mode: 0755]
tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
tools/testing/selftests/rcutorture/bin/kvm-transform.sh [new file with mode: 0755]
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/bin/parse-console.sh
tools/testing/selftests/rcutorture/configs/refscale/CFLIST [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/refscale/CFcommon [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/refscale/PREEMPT [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh [new file with mode: 0644]
usr/Kconfig
usr/Makefile

index d5f4804..162bd2b 100644 (file)
@@ -44,6 +44,7 @@
 *.tab.[ch]
 *.tar
 *.xz
+*.zst
 Module.symvers
 modules.builtin
 modules.order
diff --git a/Documentation/ABI/testing/sysfs-devices-mapping b/Documentation/ABI/testing/sysfs-devices-mapping
new file mode 100644 (file)
index 0000000..490ccfd
--- /dev/null
@@ -0,0 +1,33 @@
+What:           /sys/devices/uncore_iio_x/dieX
+Date:           February 2020
+Contact:        Roman Sudarikov <roman.sudarikov@linux.intel.com>
+Description:
+                Each IIO stack (PCIe root port) has its own IIO PMON block, so
+                each dieX file (where X is die number) holds "Segment:Root Bus"
+                for PCIe root port, which can be monitored by that IIO PMON
+                block.
+                For example, on 4-die Xeon platform with up to 6 IIO stacks per
+                die and, therefore, 6 IIO PMON blocks per die, the mapping of
+                IIO PMON block 0 exposes as the following:
+
+                $ ls /sys/devices/uncore_iio_0/die*
+                -r--r--r-- /sys/devices/uncore_iio_0/die0
+                -r--r--r-- /sys/devices/uncore_iio_0/die1
+                -r--r--r-- /sys/devices/uncore_iio_0/die2
+                -r--r--r-- /sys/devices/uncore_iio_0/die3
+
+                $ tail /sys/devices/uncore_iio_0/die*
+                ==> /sys/devices/uncore_iio_0/die0 <==
+                0000:00
+                ==> /sys/devices/uncore_iio_0/die1 <==
+                0000:40
+                ==> /sys/devices/uncore_iio_0/die2 <==
+                0000:80
+                ==> /sys/devices/uncore_iio_0/die3 <==
+                0000:c0
+
+                Which means:
+                IIO PMU 0 on die 0 belongs to PCI RP on bus 0x00, domain 0x0000
+                IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
+                IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
+                IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
index 75b8ca0..8f41ad0 100644 (file)
@@ -463,7 +463,7 @@ again without disrupting RCU readers.
 This guarantee was only partially premeditated. DYNIX/ptx used an
 explicit memory barrier for publication, but had nothing resembling
 ``rcu_dereference()`` for subscription, nor did it have anything
-resembling the ``smp_read_barrier_depends()`` that was later subsumed
+resembling the dependency-ordering barrier that was later subsumed
 into ``rcu_dereference()`` and later still into ``READ_ONCE()``. The
 need for these operations made itself known quite suddenly at a
 late-1990s meeting with the DEC Alpha architects, back in the days when
@@ -2583,7 +2583,12 @@ not work to have these markers in the trampoline itself, because there
 would need to be instructions following ``rcu_read_unlock()``. Although
 ``synchronize_rcu()`` would guarantee that execution reached the
 ``rcu_read_unlock()``, it would not be able to guarantee that execution
-had completely left the trampoline.
+had completely left the trampoline. Worse yet, in some situations
+the trampoline's protection must extend a few instructions *prior* to
+execution reaching the trampoline.  For example, these few instructions
+might calculate the address of the trampoline, so that entering the
+trampoline would be pre-ordained a surprisingly long time before execution
+actually reached the trampoline itself.
 
 The solution, in the form of `Tasks
 RCU <https://lwn.net/Articles/607117/>`__, is to have implicit read-side
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
new file mode 100644 (file)
index 0000000..2efed99
--- /dev/null
@@ -0,0 +1,465 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Review Checklist for RCU Patches
+================================
+
+
+This document contains a checklist for producing and reviewing patches
+that make use of RCU.  Violating any of the rules listed below will
+result in the same sorts of problems that leaving out a locking primitive
+would cause.  This list is based on experiences reviewing such patches
+over a rather long period of time, but improvements are always welcome!
+
+0.     Is RCU being applied to a read-mostly situation?  If the data
+       structure is updated more than about 10% of the time, then you
+       should strongly consider some other approach, unless detailed
+       performance measurements show that RCU is nonetheless the right
+       tool for the job.  Yes, RCU does reduce read-side overhead by
+       increasing write-side overhead, which is exactly why normal uses
+       of RCU will do much more reading than updating.
+
+       Another exception is where performance is not an issue, and RCU
+       provides a simpler implementation.  An example of this situation
+       is the dynamic NMI code in the Linux 2.6 kernel, at least on
+       architectures where NMIs are rare.
+
+       Yet another exception is where the low real-time latency of RCU's
+       read-side primitives is critically important.
+
+       One final exception is where RCU readers are used to prevent
+       the ABA problem (https://en.wikipedia.org/wiki/ABA_problem)
+       for lockless updates.  This does result in the mildly
+       counter-intuitive situation where rcu_read_lock() and
+       rcu_read_unlock() are used to protect updates, however, this
+       approach provides the same potential simplifications that garbage
+       collectors do.
+
+1.     Does the update code have proper mutual exclusion?
+
+       RCU does allow -readers- to run (almost) naked, but -writers- must
+       still use some sort of mutual exclusion, such as:
+
+       a.      locking,
+       b.      atomic operations, or
+       c.      restricting updates to a single task.
+
+       If you choose #b, be prepared to describe how you have handled
+       memory barriers on weakly ordered machines (pretty much all of
+       them -- even x86 allows later loads to be reordered to precede
+       earlier stores), and be prepared to explain why this added
+       complexity is worthwhile.  If you choose #c, be prepared to
+       explain how this single task does not become a major bottleneck on
+       big multiprocessor machines (for example, if the task is updating
+       information relating to itself that other tasks can read, there
+       by definition can be no bottleneck).  Note that the definition
+       of "large" has changed significantly:  Eight CPUs was "large"
+       in the year 2000, but a hundred CPUs was unremarkable in 2017.
+
+2.     Do the RCU read-side critical sections make proper use of
+       rcu_read_lock() and friends?  These primitives are needed
+       to prevent grace periods from ending prematurely, which
+       could result in data being unceremoniously freed out from
+       under your read-side code, which can greatly increase the
+       actuarial risk of your kernel.
+
+       As a rough rule of thumb, any dereference of an RCU-protected
+       pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(),
+       rcu_read_lock_sched(), or by the appropriate update-side lock.
+       Disabling of preemption can serve as rcu_read_lock_sched(), but
+       is less readable and prevents lockdep from detecting locking issues.
+
+       Letting RCU-protected pointers "leak" out of an RCU read-side
+       critical section is every bid as bad as letting them leak out
+       from under a lock.  Unless, of course, you have arranged some
+       other means of protection, such as a lock or a reference count
+       -before- letting them out of the RCU read-side critical section.
+
+3.     Does the update code tolerate concurrent accesses?
+
+       The whole point of RCU is to permit readers to run without
+       any locks or atomic operations.  This means that readers will
+       be running while updates are in progress.  There are a number
+       of ways to handle this concurrency, depending on the situation:
+
+       a.      Use the RCU variants of the list and hlist update
+               primitives to add, remove, and replace elements on
+               an RCU-protected list.  Alternatively, use the other
+               RCU-protected data structures that have been added to
+               the Linux kernel.
+
+               This is almost always the best approach.
+
+       b.      Proceed as in (a) above, but also maintain per-element
+               locks (that are acquired by both readers and writers)
+               that guard per-element state.  Of course, fields that
+               the readers refrain from accessing can be guarded by
+               some other lock acquired only by updaters, if desired.
+
+               This works quite well, also.
+
+       c.      Make updates appear atomic to readers.  For example,
+               pointer updates to properly aligned fields will
+               appear atomic, as will individual atomic primitives.
+               Sequences of operations performed under a lock will -not-
+               appear to be atomic to RCU readers, nor will sequences
+               of multiple atomic primitives.
+
+               This can work, but is starting to get a bit tricky.
+
+       d.      Carefully order the updates and the reads so that
+               readers see valid data at all phases of the update.
+               This is often more difficult than it sounds, especially
+               given modern CPUs' tendency to reorder memory references.
+               One must usually liberally sprinkle memory barriers
+               (smp_wmb(), smp_rmb(), smp_mb()) through the code,
+               making it difficult to understand and to test.
+
+               It is usually better to group the changing data into
+               a separate structure, so that the change may be made
+               to appear atomic by updating a pointer to reference
+               a new structure containing updated values.
+
+4.     Weakly ordered CPUs pose special challenges.  Almost all CPUs
+       are weakly ordered -- even x86 CPUs allow later loads to be
+       reordered to precede earlier stores.  RCU code must take all of
+       the following measures to prevent memory-corruption problems:
+
+       a.      Readers must maintain proper ordering of their memory
+               accesses.  The rcu_dereference() primitive ensures that
+               the CPU picks up the pointer before it picks up the data
+               that the pointer points to.  This really is necessary
+               on Alpha CPUs.  If you don't believe me, see:
+
+                       http://www.openvms.compaq.com/wizard/wiz_2637.html
+
+               The rcu_dereference() primitive is also an excellent
+               documentation aid, letting the person reading the
+               code know exactly which pointers are protected by RCU.
+               Please note that compilers can also reorder code, and
+               they are becoming increasingly aggressive about doing
+               just that.  The rcu_dereference() primitive therefore also
+               prevents destructive compiler optimizations.  However,
+               with a bit of devious creativity, it is possible to
+               mishandle the return value from rcu_dereference().
+               Please see rcu_dereference.txt in this directory for
+               more information.
+
+               The rcu_dereference() primitive is used by the
+               various "_rcu()" list-traversal primitives, such
+               as the list_for_each_entry_rcu().  Note that it is
+               perfectly legal (if redundant) for update-side code to
+               use rcu_dereference() and the "_rcu()" list-traversal
+               primitives.  This is particularly useful in code that
+               is common to readers and updaters.  However, lockdep
+               will complain if you access rcu_dereference() outside
+               of an RCU read-side critical section.  See lockdep.txt
+               to learn what to do about this.
+
+               Of course, neither rcu_dereference() nor the "_rcu()"
+               list-traversal primitives can substitute for a good
+               concurrency design coordinating among multiple updaters.
+
+       b.      If the list macros are being used, the list_add_tail_rcu()
+               and list_add_rcu() primitives must be used in order
+               to prevent weakly ordered machines from misordering
+               structure initialization and pointer planting.
+               Similarly, if the hlist macros are being used, the
+               hlist_add_head_rcu() primitive is required.
+
+       c.      If the list macros are being used, the list_del_rcu()
+               primitive must be used to keep list_del()'s pointer
+               poisoning from inflicting toxic effects on concurrent
+               readers.  Similarly, if the hlist macros are being used,
+               the hlist_del_rcu() primitive is required.
+
+               The list_replace_rcu() and hlist_replace_rcu() primitives
+               may be used to replace an old structure with a new one
+               in their respective types of RCU-protected lists.
+
+       d.      Rules similar to (4b) and (4c) apply to the "hlist_nulls"
+               type of RCU-protected linked lists.
+
+       e.      Updates must ensure that initialization of a given
+               structure happens before pointers to that structure are
+               publicized.  Use the rcu_assign_pointer() primitive
+               when publicizing a pointer to a structure that can
+               be traversed by an RCU read-side critical section.
+
+5.     If call_rcu() or call_srcu() is used, the callback function will
+       be called from softirq context.  In particular, it cannot block.
+
+6.     Since synchronize_rcu() can block, it cannot be called
+       from any sort of irq context.  The same rule applies
+       for synchronize_srcu(), synchronize_rcu_expedited(), and
+       synchronize_srcu_expedited().
+
+       The expedited forms of these primitives have the same semantics
+       as the non-expedited forms, but expediting is both expensive and
+       (with the exception of synchronize_srcu_expedited()) unfriendly
+       to real-time workloads.  Use of the expedited primitives should
+       be restricted to rare configuration-change operations that would
+       not normally be undertaken while a real-time workload is running.
+       However, real-time workloads can use rcupdate.rcu_normal kernel
+       boot parameter to completely disable expedited grace periods,
+       though this might have performance implications.
+
+       In particular, if you find yourself invoking one of the expedited
+       primitives repeatedly in a loop, please do everyone a favor:
+       Restructure your code so that it batches the updates, allowing
+       a single non-expedited primitive to cover the entire batch.
+       This will very likely be faster than the loop containing the
+       expedited primitive, and will be much much easier on the rest
+       of the system, especially to real-time workloads running on
+       the rest of the system.
+
+7.     As of v4.20, a given kernel implements only one RCU flavor,
+       which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
+       If the updater uses call_rcu() or synchronize_rcu(),
+       then the corresponding readers my use rcu_read_lock() and
+       rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
+       or any pair of primitives that disables and re-enables preemption,
+       for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
+       If the updater uses synchronize_srcu() or call_srcu(),
+       then the corresponding readers must use srcu_read_lock() and
+       srcu_read_unlock(), and with the same srcu_struct.  The rules for
+       the expedited primitives are the same as for their non-expedited
+       counterparts.  Mixing things up will result in confusion and
+       broken kernels, and has even resulted in an exploitable security
+       issue.
+
+       One exception to this rule: rcu_read_lock() and rcu_read_unlock()
+       may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
+       in cases where local bottom halves are already known to be
+       disabled, for example, in irq or softirq context.  Commenting
+       such cases is a must, of course!  And the jury is still out on
+       whether the increased speed is worth it.
+
+8.     Although synchronize_rcu() is slower than is call_rcu(), it
+       usually results in simpler code.  So, unless update performance is
+       critically important, the updaters cannot block, or the latency of
+       synchronize_rcu() is visible from userspace, synchronize_rcu()
+       should be used in preference to call_rcu().  Furthermore,
+       kfree_rcu() usually results in even simpler code than does
+       synchronize_rcu() without synchronize_rcu()'s multi-millisecond
+       latency.  So please take advantage of kfree_rcu()'s "fire and
+       forget" memory-freeing capabilities where it applies.
+
+       An especially important property of the synchronize_rcu()
+       primitive is that it automatically self-limits: if grace periods
+       are delayed for whatever reason, then the synchronize_rcu()
+       primitive will correspondingly delay updates.  In contrast,
+       code using call_rcu() should explicitly limit update rate in
+       cases where grace periods are delayed, as failing to do so can
+       result in excessive realtime latencies or even OOM conditions.
+
+       Ways of gaining this self-limiting property when using call_rcu()
+       include:
+
+       a.      Keeping a count of the number of data-structure elements
+               used by the RCU-protected data structure, including
+               those waiting for a grace period to elapse.  Enforce a
+               limit on this number, stalling updates as needed to allow
+               previously deferred frees to complete.  Alternatively,
+               limit only the number awaiting deferred free rather than
+               the total number of elements.
+
+               One way to stall the updates is to acquire the update-side
+               mutex.  (Don't try this with a spinlock -- other CPUs
+               spinning on the lock could prevent the grace period
+               from ever ending.)  Another way to stall the updates
+               is for the updates to use a wrapper function around
+               the memory allocator, so that this wrapper function
+               simulates OOM when there is too much memory awaiting an
+               RCU grace period.  There are of course many other
+               variations on this theme.
+
+       b.      Limiting update rate.  For example, if updates occur only
+               once per hour, then no explicit rate limiting is
+               required, unless your system is already badly broken.
+               Older versions of the dcache subsystem take this approach,
+               guarding updates with a global lock, limiting their rate.
+
+       c.      Trusted update -- if updates can only be done manually by
+               superuser or some other trusted user, then it might not
+               be necessary to automatically limit them.  The theory
+               here is that superuser already has lots of ways to crash
+               the machine.
+
+       d.      Periodically invoke synchronize_rcu(), permitting a limited
+               number of updates per grace period.
+
+       The same cautions apply to call_srcu() and kfree_rcu().
+
+       Note that although these primitives do take action to avoid memory
+       exhaustion when any given CPU has too many callbacks, a determined
+       user could still exhaust memory.  This is especially the case
+       if a system with a large number of CPUs has been configured to
+       offload all of its RCU callbacks onto a single CPU, or if the
+       system has relatively little free memory.
+
+9.     All RCU list-traversal primitives, which include
+       rcu_dereference(), list_for_each_entry_rcu(), and
+       list_for_each_safe_rcu(), must be either within an RCU read-side
+       critical section or must be protected by appropriate update-side
+       locks.  RCU read-side critical sections are delimited by
+       rcu_read_lock() and rcu_read_unlock(), or by similar primitives
+       such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
+       case the matching rcu_dereference() primitive must be used in
+       order to keep lockdep happy, in this case, rcu_dereference_bh().
+
+       The reason that it is permissible to use RCU list-traversal
+       primitives when the update-side lock is held is that doing so
+       can be quite helpful in reducing code bloat when common code is
+       shared between readers and updaters.  Additional primitives
+       are provided for this case, as discussed in lockdep.txt.
+
+10.    Conversely, if you are in an RCU read-side critical section,
+       and you don't hold the appropriate update-side lock, you -must-
+       use the "_rcu()" variants of the list macros.  Failing to do so
+       will break Alpha, cause aggressive compilers to generate bad code,
+       and confuse people trying to read your code.
+
+11.    Any lock acquired by an RCU callback must be acquired elsewhere
+       with softirq disabled, e.g., via spin_lock_irqsave(),
+       spin_lock_bh(), etc.  Failing to disable softirq on a given
+       acquisition of that lock will result in deadlock as soon as
+       the RCU softirq handler happens to run your RCU callback while
+       interrupting that acquisition's critical section.
+
+12.    RCU callbacks can be and are executed in parallel.  In many cases,
+       the callback code simply wrappers around kfree(), so that this
+       is not an issue (or, more accurately, to the extent that it is
+       an issue, the memory-allocator locking handles it).  However,
+       if the callbacks do manipulate a shared data structure, they
+       must use whatever locking or other synchronization is required
+       to safely access and/or modify that data structure.
+
+       Do not assume that RCU callbacks will be executed on the same
+       CPU that executed the corresponding call_rcu() or call_srcu().
+       For example, if a given CPU goes offline while having an RCU
+       callback pending, then that RCU callback will execute on some
+       surviving CPU.  (If this was not the case, a self-spawning RCU
+       callback would prevent the victim CPU from ever going offline.)
+       Furthermore, CPUs designated by rcu_nocbs= might well -always-
+       have their RCU callbacks executed on some other CPUs, in fact,
+       for some  real-time workloads, this is the whole point of using
+       the rcu_nocbs= kernel boot parameter.
+
+13.    Unlike other forms of RCU, it -is- permissible to block in an
+       SRCU read-side critical section (demarked by srcu_read_lock()
+       and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
+       Please note that if you don't need to sleep in read-side critical
+       sections, you should be using RCU rather than SRCU, because RCU
+       is almost always faster and easier to use than is SRCU.
+
+       Also unlike other forms of RCU, explicit initialization and
+       cleanup is required either at build time via DEFINE_SRCU()
+       or DEFINE_STATIC_SRCU() or at runtime via init_srcu_struct()
+       and cleanup_srcu_struct().  These last two are passed a
+       "struct srcu_struct" that defines the scope of a given
+       SRCU domain.  Once initialized, the srcu_struct is passed
+       to srcu_read_lock(), srcu_read_unlock() synchronize_srcu(),
+       synchronize_srcu_expedited(), and call_srcu().  A given
+       synchronize_srcu() waits only for SRCU read-side critical
+       sections governed by srcu_read_lock() and srcu_read_unlock()
+       calls that have been passed the same srcu_struct.  This property
+       is what makes sleeping read-side critical sections tolerable --
+       a given subsystem delays only its own updates, not those of other
+       subsystems using SRCU.  Therefore, SRCU is less prone to OOM the
+       system than RCU would be if RCU's read-side critical sections
+       were permitted to sleep.
+
+       The ability to sleep in read-side critical sections does not
+       come for free.  First, corresponding srcu_read_lock() and
+       srcu_read_unlock() calls must be passed the same srcu_struct.
+       Second, grace-period-detection overhead is amortized only
+       over those updates sharing a given srcu_struct, rather than
+       being globally amortized as they are for other forms of RCU.
+       Therefore, SRCU should be used in preference to rw_semaphore
+       only in extremely read-intensive situations, or in situations
+       requiring SRCU's read-side deadlock immunity or low read-side
+       realtime latency.  You should also consider percpu_rw_semaphore
+       when you need lightweight readers.
+
+       SRCU's expedited primitive (synchronize_srcu_expedited())
+       never sends IPIs to other CPUs, so it is easier on
+       real-time workloads than is synchronize_rcu_expedited().
+
+       Note that rcu_assign_pointer() relates to SRCU just as it does to
+       other forms of RCU, but instead of rcu_dereference() you should
+       use srcu_dereference() in order to avoid lockdep splats.
+
+14.    The whole point of call_rcu(), synchronize_rcu(), and friends
+       is to wait until all pre-existing readers have finished before
+       carrying out some otherwise-destructive operation.  It is
+       therefore critically important to -first- remove any path
+       that readers can follow that could be affected by the
+       destructive operation, and -only- -then- invoke call_rcu(),
+       synchronize_rcu(), or friends.
+
+       Because these primitives only wait for pre-existing readers, it
+       is the caller's responsibility to guarantee that any subsequent
+       readers will execute safely.
+
+15.    The various RCU read-side primitives do -not- necessarily contain
+       memory barriers.  You should therefore plan for the CPU
+       and the compiler to freely reorder code into and out of RCU
+       read-side critical sections.  It is the responsibility of the
+       RCU update-side primitives to deal with this.
+
+       For SRCU readers, you can use smp_mb__after_srcu_read_unlock()
+       immediately after an srcu_read_unlock() to get a full barrier.
+
+16.    Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
+       __rcu sparse checks to validate your RCU code.  These can help
+       find problems as follows:
+
+       CONFIG_PROVE_LOCKING:
+               check that accesses to RCU-protected data
+               structures are carried out under the proper RCU
+               read-side critical section, while holding the right
+               combination of locks, or whatever other conditions
+               are appropriate.
+
+       CONFIG_DEBUG_OBJECTS_RCU_HEAD:
+               check that you don't pass the
+               same object to call_rcu() (or friends) before an RCU
+               grace period has elapsed since the last time that you
+               passed that same object to call_rcu() (or friends).
+
+       __rcu sparse checks:
+               tag the pointer to the RCU-protected data
+               structure with __rcu, and sparse will warn you if you
+               access that pointer without the services of one of the
+               variants of rcu_dereference().
+
+       These debugging aids can help you find problems that are
+       otherwise extremely difficult to spot.
+
+17.    If you register a callback using call_rcu() or call_srcu(), and
+       pass in a function defined within a loadable module, then it in
+       necessary to wait for all pending callbacks to be invoked after
+       the last invocation and before unloading that module.  Note that
+       it is absolutely -not- sufficient to wait for a grace period!
+       The current (say) synchronize_rcu() implementation is -not-
+       guaranteed to wait for callbacks registered on other CPUs.
+       Or even on the current CPU if that CPU recently went offline
+       and came back online.
+
+       You instead need to use one of the barrier functions:
+
+       -       call_rcu() -> rcu_barrier()
+       -       call_srcu() -> srcu_barrier()
+
+       However, these barrier functions are absolutely -not- guaranteed
+       to wait for a grace period.  In fact, if there are no call_rcu()
+       callbacks waiting anywhere in the system, rcu_barrier() is within
+       its rights to return immediately.
+
+       So if you need to wait for both an RCU grace period and for
+       all pre-existing call_rcu() callbacks, you will need to execute
+       both rcu_barrier() and synchronize_rcu(), if necessary, using
+       something like workqueues to to execute them concurrently.
+
+       See rcubarrier.txt for more information.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
deleted file mode 100644 (file)
index e98ff26..0000000
+++ /dev/null
@@ -1,458 +0,0 @@
-Review Checklist for RCU Patches
-
-
-This document contains a checklist for producing and reviewing patches
-that make use of RCU.  Violating any of the rules listed below will
-result in the same sorts of problems that leaving out a locking primitive
-would cause.  This list is based on experiences reviewing such patches
-over a rather long period of time, but improvements are always welcome!
-
-0.     Is RCU being applied to a read-mostly situation?  If the data
-       structure is updated more than about 10% of the time, then you
-       should strongly consider some other approach, unless detailed
-       performance measurements show that RCU is nonetheless the right
-       tool for the job.  Yes, RCU does reduce read-side overhead by
-       increasing write-side overhead, which is exactly why normal uses
-       of RCU will do much more reading than updating.
-
-       Another exception is where performance is not an issue, and RCU
-       provides a simpler implementation.  An example of this situation
-       is the dynamic NMI code in the Linux 2.6 kernel, at least on
-       architectures where NMIs are rare.
-
-       Yet another exception is where the low real-time latency of RCU's
-       read-side primitives is critically important.
-
-       One final exception is where RCU readers are used to prevent
-       the ABA problem (https://en.wikipedia.org/wiki/ABA_problem)
-       for lockless updates.  This does result in the mildly
-       counter-intuitive situation where rcu_read_lock() and
-       rcu_read_unlock() are used to protect updates, however, this
-       approach provides the same potential simplifications that garbage
-       collectors do.
-
-1.     Does the update code have proper mutual exclusion?
-
-       RCU does allow -readers- to run (almost) naked, but -writers- must
-       still use some sort of mutual exclusion, such as:
-
-       a.      locking,
-       b.      atomic operations, or
-       c.      restricting updates to a single task.
-
-       If you choose #b, be prepared to describe how you have handled
-       memory barriers on weakly ordered machines (pretty much all of
-       them -- even x86 allows later loads to be reordered to precede
-       earlier stores), and be prepared to explain why this added
-       complexity is worthwhile.  If you choose #c, be prepared to
-       explain how this single task does not become a major bottleneck on
-       big multiprocessor machines (for example, if the task is updating
-       information relating to itself that other tasks can read, there
-       by definition can be no bottleneck).  Note that the definition
-       of "large" has changed significantly:  Eight CPUs was "large"
-       in the year 2000, but a hundred CPUs was unremarkable in 2017.
-
-2.     Do the RCU read-side critical sections make proper use of
-       rcu_read_lock() and friends?  These primitives are needed
-       to prevent grace periods from ending prematurely, which
-       could result in data being unceremoniously freed out from
-       under your read-side code, which can greatly increase the
-       actuarial risk of your kernel.
-
-       As a rough rule of thumb, any dereference of an RCU-protected
-       pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(),
-       rcu_read_lock_sched(), or by the appropriate update-side lock.
-       Disabling of preemption can serve as rcu_read_lock_sched(), but
-       is less readable and prevents lockdep from detecting locking issues.
-
-       Letting RCU-protected pointers "leak" out of an RCU read-side
-       critical section is every bid as bad as letting them leak out
-       from under a lock.  Unless, of course, you have arranged some
-       other means of protection, such as a lock or a reference count
-       -before- letting them out of the RCU read-side critical section.
-
-3.     Does the update code tolerate concurrent accesses?
-
-       The whole point of RCU is to permit readers to run without
-       any locks or atomic operations.  This means that readers will
-       be running while updates are in progress.  There are a number
-       of ways to handle this concurrency, depending on the situation:
-
-       a.      Use the RCU variants of the list and hlist update
-               primitives to add, remove, and replace elements on
-               an RCU-protected list.  Alternatively, use the other
-               RCU-protected data structures that have been added to
-               the Linux kernel.
-
-               This is almost always the best approach.
-
-       b.      Proceed as in (a) above, but also maintain per-element
-               locks (that are acquired by both readers and writers)
-               that guard per-element state.  Of course, fields that
-               the readers refrain from accessing can be guarded by
-               some other lock acquired only by updaters, if desired.
-
-               This works quite well, also.
-
-       c.      Make updates appear atomic to readers.  For example,
-               pointer updates to properly aligned fields will
-               appear atomic, as will individual atomic primitives.
-               Sequences of operations performed under a lock will -not-
-               appear to be atomic to RCU readers, nor will sequences
-               of multiple atomic primitives.
-
-               This can work, but is starting to get a bit tricky.
-
-       d.      Carefully order the updates and the reads so that
-               readers see valid data at all phases of the update.
-               This is often more difficult than it sounds, especially
-               given modern CPUs' tendency to reorder memory references.
-               One must usually liberally sprinkle memory barriers
-               (smp_wmb(), smp_rmb(), smp_mb()) through the code,
-               making it difficult to understand and to test.
-
-               It is usually better to group the changing data into
-               a separate structure, so that the change may be made
-               to appear atomic by updating a pointer to reference
-               a new structure containing updated values.
-
-4.     Weakly ordered CPUs pose special challenges.  Almost all CPUs
-       are weakly ordered -- even x86 CPUs allow later loads to be
-       reordered to precede earlier stores.  RCU code must take all of
-       the following measures to prevent memory-corruption problems:
-
-       a.      Readers must maintain proper ordering of their memory
-               accesses.  The rcu_dereference() primitive ensures that
-               the CPU picks up the pointer before it picks up the data
-               that the pointer points to.  This really is necessary
-               on Alpha CPUs.  If you don't believe me, see:
-
-                       http://www.openvms.compaq.com/wizard/wiz_2637.html
-
-               The rcu_dereference() primitive is also an excellent
-               documentation aid, letting the person reading the
-               code know exactly which pointers are protected by RCU.
-               Please note that compilers can also reorder code, and
-               they are becoming increasingly aggressive about doing
-               just that.  The rcu_dereference() primitive therefore also
-               prevents destructive compiler optimizations.  However,
-               with a bit of devious creativity, it is possible to
-               mishandle the return value from rcu_dereference().
-               Please see rcu_dereference.txt in this directory for
-               more information.
-
-               The rcu_dereference() primitive is used by the
-               various "_rcu()" list-traversal primitives, such
-               as the list_for_each_entry_rcu().  Note that it is
-               perfectly legal (if redundant) for update-side code to
-               use rcu_dereference() and the "_rcu()" list-traversal
-               primitives.  This is particularly useful in code that
-               is common to readers and updaters.  However, lockdep
-               will complain if you access rcu_dereference() outside
-               of an RCU read-side critical section.  See lockdep.txt
-               to learn what to do about this.
-
-               Of course, neither rcu_dereference() nor the "_rcu()"
-               list-traversal primitives can substitute for a good
-               concurrency design coordinating among multiple updaters.
-
-       b.      If the list macros are being used, the list_add_tail_rcu()
-               and list_add_rcu() primitives must be used in order
-               to prevent weakly ordered machines from misordering
-               structure initialization and pointer planting.
-               Similarly, if the hlist macros are being used, the
-               hlist_add_head_rcu() primitive is required.
-
-       c.      If the list macros are being used, the list_del_rcu()
-               primitive must be used to keep list_del()'s pointer
-               poisoning from inflicting toxic effects on concurrent
-               readers.  Similarly, if the hlist macros are being used,
-               the hlist_del_rcu() primitive is required.
-
-               The list_replace_rcu() and hlist_replace_rcu() primitives
-               may be used to replace an old structure with a new one
-               in their respective types of RCU-protected lists.
-
-       d.      Rules similar to (4b) and (4c) apply to the "hlist_nulls"
-               type of RCU-protected linked lists.
-
-       e.      Updates must ensure that initialization of a given
-               structure happens before pointers to that structure are
-               publicized.  Use the rcu_assign_pointer() primitive
-               when publicizing a pointer to a structure that can
-               be traversed by an RCU read-side critical section.
-
-5.     If call_rcu() or call_srcu() is used, the callback function will
-       be called from softirq context.  In particular, it cannot block.
-
-6.     Since synchronize_rcu() can block, it cannot be called
-       from any sort of irq context.  The same rule applies
-       for synchronize_srcu(), synchronize_rcu_expedited(), and
-       synchronize_srcu_expedited().
-
-       The expedited forms of these primitives have the same semantics
-       as the non-expedited forms, but expediting is both expensive and
-       (with the exception of synchronize_srcu_expedited()) unfriendly
-       to real-time workloads.  Use of the expedited primitives should
-       be restricted to rare configuration-change operations that would
-       not normally be undertaken while a real-time workload is running.
-       However, real-time workloads can use rcupdate.rcu_normal kernel
-       boot parameter to completely disable expedited grace periods,
-       though this might have performance implications.
-
-       In particular, if you find yourself invoking one of the expedited
-       primitives repeatedly in a loop, please do everyone a favor:
-       Restructure your code so that it batches the updates, allowing
-       a single non-expedited primitive to cover the entire batch.
-       This will very likely be faster than the loop containing the
-       expedited primitive, and will be much much easier on the rest
-       of the system, especially to real-time workloads running on
-       the rest of the system.
-
-7.     As of v4.20, a given kernel implements only one RCU flavor,
-       which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
-       If the updater uses call_rcu() or synchronize_rcu(),
-       then the corresponding readers my use rcu_read_lock() and
-       rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
-       or any pair of primitives that disables and re-enables preemption,
-       for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
-       If the updater uses synchronize_srcu() or call_srcu(),
-       then the corresponding readers must use srcu_read_lock() and
-       srcu_read_unlock(), and with the same srcu_struct.  The rules for
-       the expedited primitives are the same as for their non-expedited
-       counterparts.  Mixing things up will result in confusion and
-       broken kernels, and has even resulted in an exploitable security
-       issue.
-
-       One exception to this rule: rcu_read_lock() and rcu_read_unlock()
-       may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
-       in cases where local bottom halves are already known to be
-       disabled, for example, in irq or softirq context.  Commenting
-       such cases is a must, of course!  And the jury is still out on
-       whether the increased speed is worth it.
-
-8.     Although synchronize_rcu() is slower than is call_rcu(), it
-       usually results in simpler code.  So, unless update performance is
-       critically important, the updaters cannot block, or the latency of
-       synchronize_rcu() is visible from userspace, synchronize_rcu()
-       should be used in preference to call_rcu().  Furthermore,
-       kfree_rcu() usually results in even simpler code than does
-       synchronize_rcu() without synchronize_rcu()'s multi-millisecond
-       latency.  So please take advantage of kfree_rcu()'s "fire and
-       forget" memory-freeing capabilities where it applies.
-
-       An especially important property of the synchronize_rcu()
-       primitive is that it automatically self-limits: if grace periods
-       are delayed for whatever reason, then the synchronize_rcu()
-       primitive will correspondingly delay updates.  In contrast,
-       code using call_rcu() should explicitly limit update rate in
-       cases where grace periods are delayed, as failing to do so can
-       result in excessive realtime latencies or even OOM conditions.
-
-       Ways of gaining this self-limiting property when using call_rcu()
-       include:
-
-       a.      Keeping a count of the number of data-structure elements
-               used by the RCU-protected data structure, including
-               those waiting for a grace period to elapse.  Enforce a
-               limit on this number, stalling updates as needed to allow
-               previously deferred frees to complete.  Alternatively,
-               limit only the number awaiting deferred free rather than
-               the total number of elements.
-
-               One way to stall the updates is to acquire the update-side
-               mutex.  (Don't try this with a spinlock -- other CPUs
-               spinning on the lock could prevent the grace period
-               from ever ending.)  Another way to stall the updates
-               is for the updates to use a wrapper function around
-               the memory allocator, so that this wrapper function
-               simulates OOM when there is too much memory awaiting an
-               RCU grace period.  There are of course many other
-               variations on this theme.
-
-       b.      Limiting update rate.  For example, if updates occur only
-               once per hour, then no explicit rate limiting is
-               required, unless your system is already badly broken.
-               Older versions of the dcache subsystem take this approach,
-               guarding updates with a global lock, limiting their rate.
-
-       c.      Trusted update -- if updates can only be done manually by
-               superuser or some other trusted user, then it might not
-               be necessary to automatically limit them.  The theory
-               here is that superuser already has lots of ways to crash
-               the machine.
-
-       d.      Periodically invoke synchronize_rcu(), permitting a limited
-               number of updates per grace period.
-
-       The same cautions apply to call_srcu() and kfree_rcu().
-
-       Note that although these primitives do take action to avoid memory
-       exhaustion when any given CPU has too many callbacks, a determined
-       user could still exhaust memory.  This is especially the case
-       if a system with a large number of CPUs has been configured to
-       offload all of its RCU callbacks onto a single CPU, or if the
-       system has relatively little free memory.
-
-9.     All RCU list-traversal primitives, which include
-       rcu_dereference(), list_for_each_entry_rcu(), and
-       list_for_each_safe_rcu(), must be either within an RCU read-side
-       critical section or must be protected by appropriate update-side
-       locks.  RCU read-side critical sections are delimited by
-       rcu_read_lock() and rcu_read_unlock(), or by similar primitives
-       such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
-       case the matching rcu_dereference() primitive must be used in
-       order to keep lockdep happy, in this case, rcu_dereference_bh().
-
-       The reason that it is permissible to use RCU list-traversal
-       primitives when the update-side lock is held is that doing so
-       can be quite helpful in reducing code bloat when common code is
-       shared between readers and updaters.  Additional primitives
-       are provided for this case, as discussed in lockdep.txt.
-
-10.    Conversely, if you are in an RCU read-side critical section,
-       and you don't hold the appropriate update-side lock, you -must-
-       use the "_rcu()" variants of the list macros.  Failing to do so
-       will break Alpha, cause aggressive compilers to generate bad code,
-       and confuse people trying to read your code.
-
-11.    Any lock acquired by an RCU callback must be acquired elsewhere
-       with softirq disabled, e.g., via spin_lock_irqsave(),
-       spin_lock_bh(), etc.  Failing to disable softirq on a given
-       acquisition of that lock will result in deadlock as soon as
-       the RCU softirq handler happens to run your RCU callback while
-       interrupting that acquisition's critical section.
-
-12.    RCU callbacks can be and are executed in parallel.  In many cases,
-       the callback code simply wrappers around kfree(), so that this
-       is not an issue (or, more accurately, to the extent that it is
-       an issue, the memory-allocator locking handles it).  However,
-       if the callbacks do manipulate a shared data structure, they
-       must use whatever locking or other synchronization is required
-       to safely access and/or modify that data structure.
-
-       Do not assume that RCU callbacks will be executed on the same
-       CPU that executed the corresponding call_rcu() or call_srcu().
-       For example, if a given CPU goes offline while having an RCU
-       callback pending, then that RCU callback will execute on some
-       surviving CPU.  (If this was not the case, a self-spawning RCU
-       callback would prevent the victim CPU from ever going offline.)
-       Furthermore, CPUs designated by rcu_nocbs= might well -always-
-       have their RCU callbacks executed on some other CPUs, in fact,
-       for some  real-time workloads, this is the whole point of using
-       the rcu_nocbs= kernel boot parameter.
-
-13.    Unlike other forms of RCU, it -is- permissible to block in an
-       SRCU read-side critical section (demarked by srcu_read_lock()
-       and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
-       Please note that if you don't need to sleep in read-side critical
-       sections, you should be using RCU rather than SRCU, because RCU
-       is almost always faster and easier to use than is SRCU.
-
-       Also unlike other forms of RCU, explicit initialization and
-       cleanup is required either at build time via DEFINE_SRCU()
-       or DEFINE_STATIC_SRCU() or at runtime via init_srcu_struct()
-       and cleanup_srcu_struct().  These last two are passed a
-       "struct srcu_struct" that defines the scope of a given
-       SRCU domain.  Once initialized, the srcu_struct is passed
-       to srcu_read_lock(), srcu_read_unlock() synchronize_srcu(),
-       synchronize_srcu_expedited(), and call_srcu().  A given
-       synchronize_srcu() waits only for SRCU read-side critical
-       sections governed by srcu_read_lock() and srcu_read_unlock()
-       calls that have been passed the same srcu_struct.  This property
-       is what makes sleeping read-side critical sections tolerable --
-       a given subsystem delays only its own updates, not those of other
-       subsystems using SRCU.  Therefore, SRCU is less prone to OOM the
-       system than RCU would be if RCU's read-side critical sections
-       were permitted to sleep.
-
-       The ability to sleep in read-side critical sections does not
-       come for free.  First, corresponding srcu_read_lock() and
-       srcu_read_unlock() calls must be passed the same srcu_struct.
-       Second, grace-period-detection overhead is amortized only
-       over those updates sharing a given srcu_struct, rather than
-       being globally amortized as they are for other forms of RCU.
-       Therefore, SRCU should be used in preference to rw_semaphore
-       only in extremely read-intensive situations, or in situations
-       requiring SRCU's read-side deadlock immunity or low read-side
-       realtime latency.  You should also consider percpu_rw_semaphore
-       when you need lightweight readers.
-
-       SRCU's expedited primitive (synchronize_srcu_expedited())
-       never sends IPIs to other CPUs, so it is easier on
-       real-time workloads than is synchronize_rcu_expedited().
-
-       Note that rcu_assign_pointer() relates to SRCU just as it does to
-       other forms of RCU, but instead of rcu_dereference() you should
-       use srcu_dereference() in order to avoid lockdep splats.
-
-14.    The whole point of call_rcu(), synchronize_rcu(), and friends
-       is to wait until all pre-existing readers have finished before
-       carrying out some otherwise-destructive operation.  It is
-       therefore critically important to -first- remove any path
-       that readers can follow that could be affected by the
-       destructive operation, and -only- -then- invoke call_rcu(),
-       synchronize_rcu(), or friends.
-
-       Because these primitives only wait for pre-existing readers, it
-       is the caller's responsibility to guarantee that any subsequent
-       readers will execute safely.
-
-15.    The various RCU read-side primitives do -not- necessarily contain
-       memory barriers.  You should therefore plan for the CPU
-       and the compiler to freely reorder code into and out of RCU
-       read-side critical sections.  It is the responsibility of the
-       RCU update-side primitives to deal with this.
-
-       For SRCU readers, you can use smp_mb__after_srcu_read_unlock()
-       immediately after an srcu_read_unlock() to get a full barrier.
-
-16.    Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
-       __rcu sparse checks to validate your RCU code.  These can help
-       find problems as follows:
-
-       CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
-               structures are carried out under the proper RCU
-               read-side critical section, while holding the right
-               combination of locks, or whatever other conditions
-               are appropriate.
-
-       CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
-               same object to call_rcu() (or friends) before an RCU
-               grace period has elapsed since the last time that you
-               passed that same object to call_rcu() (or friends).
-
-       __rcu sparse checks: tag the pointer to the RCU-protected data
-               structure with __rcu, and sparse will warn you if you
-               access that pointer without the services of one of the
-               variants of rcu_dereference().
-
-       These debugging aids can help you find problems that are
-       otherwise extremely difficult to spot.
-
-17.    If you register a callback using call_rcu() or call_srcu(), and
-       pass in a function defined within a loadable module, then it in
-       necessary to wait for all pending callbacks to be invoked after
-       the last invocation and before unloading that module.  Note that
-       it is absolutely -not- sufficient to wait for a grace period!
-       The current (say) synchronize_rcu() implementation is -not-
-       guaranteed to wait for callbacks registered on other CPUs.
-       Or even on the current CPU if that CPU recently went offline
-       and came back online.
-
-       You instead need to use one of the barrier functions:
-
-       o       call_rcu() -> rcu_barrier()
-       o       call_srcu() -> srcu_barrier()
-
-       However, these barrier functions are absolutely -not- guaranteed
-       to wait for a grace period.  In fact, if there are no call_rcu()
-       callbacks waiting anywhere in the system, rcu_barrier() is within
-       its rights to return immediately.
-
-       So if you need to wait for both an RCU grace period and for
-       all pre-existing call_rcu() callbacks, you will need to execute
-       both rcu_barrier() and synchronize_rcu(), if necessary, using
-       something like workqueues to to execute them concurrently.
-
-       See rcubarrier.txt for more information.
index 81a0a1e..e703d3d 100644 (file)
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. _rcu_concepts:
 
 ============
@@ -8,10 +10,17 @@ RCU concepts
    :maxdepth: 3
 
    arrayRCU
+   checklist
+   lockdep
+   lockdep-splat
    rcubarrier
    rcu_dereference
    whatisRCU
    rcu
+   rculist_nulls
+   rcuref
+   torture
+   stallwarn
    listRCU
    NMI-RCU
    UP
diff --git a/Documentation/RCU/lockdep-splat.rst b/Documentation/RCU/lockdep-splat.rst
new file mode 100644 (file)
index 0000000..2a5c79d
--- /dev/null
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Lockdep-RCU Splat
+=================
+
+Lockdep-RCU was added to the Linux kernel in early 2010
+(http://lwn.net/Articles/371986/).  This facility checks for some common
+misuses of the RCU API, most notably using one of the rcu_dereference()
+family to access an RCU-protected pointer without the proper protection.
+When such misuse is detected, an lockdep-RCU splat is emitted.
+
+The usual cause of a lockdep-RCU slat is someone accessing an
+RCU-protected data structure without either (1) being in the right kind of
+RCU read-side critical section or (2) holding the right update-side lock.
+This problem can therefore be serious: it might result in random memory
+overwriting or worse.  There can of course be false positives, this
+being the real world and all that.
+
+So let's look at an example RCU lockdep splat from 3.0-rc5, one that
+has long since been fixed::
+
+    =============================
+    WARNING: suspicious RCU usage
+    -----------------------------
+    block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
+
+other info that might help us debug this::
+
+    rcu_scheduler_active = 1, debug_locks = 0
+    3 locks held by scsi_scan_6/1552:
+    #0:  (&shost->scan_mutex){+.+.}, at: [<ffffffff8145efca>]
+    scsi_scan_host_selected+0x5a/0x150
+    #1:  (&eq->sysfs_lock){+.+.}, at: [<ffffffff812a5032>]
+    elevator_exit+0x22/0x60
+    #2:  (&(&q->__queue_lock)->rlock){-.-.}, at: [<ffffffff812b6233>]
+    cfq_exit_queue+0x43/0x190
+
+    stack backtrace:
+    Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
+    Call Trace:
+    [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
+    [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
+    [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
+    [<ffffffff812a5046>] elevator_exit+0x36/0x60
+    [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
+    [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
+    [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
+    [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
+    [<ffffffff817da069>] ? error_exit+0x29/0xb0
+    [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
+    [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
+    [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
+    [<ffffffff817da069>] ? error_exit+0x29/0xb0
+    [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
+    [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
+    [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
+    [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
+    [<ffffffff8145f170>] do_scan_async+0x20/0x160
+    [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
+    [<ffffffff810975b6>] kthread+0xa6/0xb0
+    [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
+    [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
+    [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
+    [<ffffffff81097510>] ? __kthread_init_worker+0x70/0x70
+    [<ffffffff817db150>] ? gs_change+0xb/0xb
+
+Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows::
+
+       if (rcu_dereference(ioc->ioc_data) == cic) {
+
+This form says that it must be in a plain vanilla RCU read-side critical
+section, but the "other info" list above shows that this is not the
+case.  Instead, we hold three locks, one of which might be RCU related.
+And maybe that lock really does protect this reference.  If so, the fix
+is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
+take the struct request_queue "q" from cfq_exit_queue() as an argument,
+which would permit us to invoke rcu_dereference_protected as follows::
+
+       if (rcu_dereference_protected(ioc->ioc_data,
+                                     lockdep_is_held(&q->queue_lock)) == cic) {
+
+With this change, there would be no lockdep-RCU splat emitted if this
+code was invoked either from within an RCU read-side critical section
+or with the ->queue_lock held.  In particular, this would have suppressed
+the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
+list above).
+
+On the other hand, perhaps we really do need an RCU read-side critical
+section.  In this case, the critical section must span the use of the
+return value from rcu_dereference(), or at least until there is some
+reference count incremented or some such.  One way to handle this is to
+add rcu_read_lock() and rcu_read_unlock() as follows::
+
+       rcu_read_lock();
+       if (rcu_dereference(ioc->ioc_data) == cic) {
+               spin_lock(&ioc->lock);
+               rcu_assign_pointer(ioc->ioc_data, NULL);
+               spin_unlock(&ioc->lock);
+       }
+       rcu_read_unlock();
+
+With this change, the rcu_dereference() is always within an RCU
+read-side critical section, which again would have suppressed the
+above lockdep-RCU splat.
+
+But in this particular case, we don't actually dereference the pointer
+returned from rcu_dereference().  Instead, that pointer is just compared
+to the cic pointer, which means that the rcu_dereference() can be replaced
+by rcu_access_pointer() as follows::
+
+       if (rcu_access_pointer(ioc->ioc_data) == cic) {
+
+Because it is legal to invoke rcu_access_pointer() without protection,
+this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt
deleted file mode 100644 (file)
index b809631..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-Lockdep-RCU was added to the Linux kernel in early 2010
-(http://lwn.net/Articles/371986/).  This facility checks for some common
-misuses of the RCU API, most notably using one of the rcu_dereference()
-family to access an RCU-protected pointer without the proper protection.
-When such misuse is detected, an lockdep-RCU splat is emitted.
-
-The usual cause of a lockdep-RCU slat is someone accessing an
-RCU-protected data structure without either (1) being in the right kind of
-RCU read-side critical section or (2) holding the right update-side lock.
-This problem can therefore be serious: it might result in random memory
-overwriting or worse.  There can of course be false positives, this
-being the real world and all that.
-
-So let's look at an example RCU lockdep splat from 3.0-rc5, one that
-has long since been fixed:
-
-=============================
-WARNING: suspicious RCU usage
------------------------------
-block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
-
-other info that might help us debug this:
-
-
-rcu_scheduler_active = 1, debug_locks = 0
-3 locks held by scsi_scan_6/1552:
- #0:  (&shost->scan_mutex){+.+.}, at: [<ffffffff8145efca>]
-scsi_scan_host_selected+0x5a/0x150
- #1:  (&eq->sysfs_lock){+.+.}, at: [<ffffffff812a5032>]
-elevator_exit+0x22/0x60
- #2:  (&(&q->__queue_lock)->rlock){-.-.}, at: [<ffffffff812b6233>]
-cfq_exit_queue+0x43/0x190
-
-stack backtrace:
-Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
-Call Trace:
- [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
- [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
- [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
- [<ffffffff812a5046>] elevator_exit+0x36/0x60
- [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
- [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
- [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
- [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
- [<ffffffff817da069>] ? error_exit+0x29/0xb0
- [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
- [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
- [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
- [<ffffffff817da069>] ? error_exit+0x29/0xb0
- [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
- [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
- [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
- [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
- [<ffffffff8145f170>] do_scan_async+0x20/0x160
- [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
- [<ffffffff810975b6>] kthread+0xa6/0xb0
- [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
- [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
- [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
- [<ffffffff81097510>] ? __kthread_init_worker+0x70/0x70
- [<ffffffff817db150>] ? gs_change+0xb/0xb
-
-Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
-
-       if (rcu_dereference(ioc->ioc_data) == cic) {
-
-This form says that it must be in a plain vanilla RCU read-side critical
-section, but the "other info" list above shows that this is not the
-case.  Instead, we hold three locks, one of which might be RCU related.
-And maybe that lock really does protect this reference.  If so, the fix
-is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
-take the struct request_queue "q" from cfq_exit_queue() as an argument,
-which would permit us to invoke rcu_dereference_protected as follows:
-
-       if (rcu_dereference_protected(ioc->ioc_data,
-                                     lockdep_is_held(&q->queue_lock)) == cic) {
-
-With this change, there would be no lockdep-RCU splat emitted if this
-code was invoked either from within an RCU read-side critical section
-or with the ->queue_lock held.  In particular, this would have suppressed
-the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
-list above).
-
-On the other hand, perhaps we really do need an RCU read-side critical
-section.  In this case, the critical section must span the use of the
-return value from rcu_dereference(), or at least until there is some
-reference count incremented or some such.  One way to handle this is to
-add rcu_read_lock() and rcu_read_unlock() as follows:
-
-       rcu_read_lock();
-       if (rcu_dereference(ioc->ioc_data) == cic) {
-               spin_lock(&ioc->lock);
-               rcu_assign_pointer(ioc->ioc_data, NULL);
-               spin_unlock(&ioc->lock);
-       }
-       rcu_read_unlock();
-
-With this change, the rcu_dereference() is always within an RCU
-read-side critical section, which again would have suppressed the
-above lockdep-RCU splat.
-
-But in this particular case, we don't actually dereference the pointer
-returned from rcu_dereference().  Instead, that pointer is just compared
-to the cic pointer, which means that the rcu_dereference() can be replaced
-by rcu_access_pointer() as follows:
-
-       if (rcu_access_pointer(ioc->ioc_data) == cic) {
-
-Because it is legal to invoke rcu_access_pointer() without protection,
-this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst
new file mode 100644 (file)
index 0000000..f1fc8ae
--- /dev/null
@@ -0,0 +1,116 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+RCU and lockdep checking
+========================
+
+All flavors of RCU have lockdep checking available, so that lockdep is
+aware of when each task enters and leaves any flavor of RCU read-side
+critical section.  Each flavor of RCU is tracked separately (but note
+that this is not the case in 2.6.32 and earlier).  This allows lockdep's
+tracking to include RCU state, which can sometimes help when debugging
+deadlocks and the like.
+
+In addition, RCU provides the following primitives that check lockdep's
+state::
+
+       rcu_read_lock_held() for normal RCU.
+       rcu_read_lock_bh_held() for RCU-bh.
+       rcu_read_lock_sched_held() for RCU-sched.
+       srcu_read_lock_held() for SRCU.
+
+These functions are conservative, and will therefore return 1 if they
+aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
+This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false
+positives when lockdep is disabled.
+
+In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables
+checking of rcu_dereference() primitives:
+
+       rcu_dereference(p):
+               Check for RCU read-side critical section.
+       rcu_dereference_bh(p):
+               Check for RCU-bh read-side critical section.
+       rcu_dereference_sched(p):
+               Check for RCU-sched read-side critical section.
+       srcu_dereference(p, sp):
+               Check for SRCU read-side critical section.
+       rcu_dereference_check(p, c):
+               Use explicit check expression "c" along with
+               rcu_read_lock_held().  This is useful in code that is
+               invoked by both RCU readers and updaters.
+       rcu_dereference_bh_check(p, c):
+               Use explicit check expression "c" along with
+               rcu_read_lock_bh_held().  This is useful in code that
+               is invoked by both RCU-bh readers and updaters.
+       rcu_dereference_sched_check(p, c):
+               Use explicit check expression "c" along with
+               rcu_read_lock_sched_held().  This is useful in code that
+               is invoked by both RCU-sched readers and updaters.
+       srcu_dereference_check(p, c):
+               Use explicit check expression "c" along with
+               srcu_read_lock_held()().  This is useful in code that
+               is invoked by both SRCU readers and updaters.
+       rcu_dereference_raw(p):
+               Don't check.  (Use sparingly, if at all.)
+       rcu_dereference_protected(p, c):
+               Use explicit check expression "c", and omit all barriers
+               and compiler constraints.  This is useful when the data
+               structure cannot change, for example, in code that is
+               invoked only by updaters.
+       rcu_access_pointer(p):
+               Return the value of the pointer and omit all barriers,
+               but retain the compiler constraints that prevent duplicating
+               or coalescsing.  This is useful when when testing the
+               value of the pointer itself, for example, against NULL.
+
+The rcu_dereference_check() check expression can be any boolean
+expression, but would normally include a lockdep expression.  However,
+any boolean expression can be used.  For a moderately ornate example,
+consider the following::
+
+       file = rcu_dereference_check(fdt->fd[fd],
+                                    lockdep_is_held(&files->file_lock) ||
+                                    atomic_read(&files->count) == 1);
+
+This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
+and, if CONFIG_PROVE_RCU is configured, verifies that this expression
+is used in:
+
+1.     An RCU read-side critical section (implicit), or
+2.     with files->file_lock held, or
+3.     on an unshared files_struct.
+
+In case (1), the pointer is picked up in an RCU-safe manner for vanilla
+RCU read-side critical sections, in case (2) the ->file_lock prevents
+any change from taking place, and finally, in case (3) the current task
+is the only task accessing the file_struct, again preventing any change
+from taking place.  If the above statement was invoked only from updater
+code, it could instead be written as follows::
+
+       file = rcu_dereference_protected(fdt->fd[fd],
+                                        lockdep_is_held(&files->file_lock) ||
+                                        atomic_read(&files->count) == 1);
+
+This would verify cases #2 and #3 above, and furthermore lockdep would
+complain if this was used in an RCU read-side critical section unless one
+of these two cases held.  Because rcu_dereference_protected() omits all
+barriers and compiler constraints, it generates better code than do the
+other flavors of rcu_dereference().  On the other hand, it is illegal
+to use rcu_dereference_protected() if either the RCU-protected pointer
+or the RCU-protected data that it points to can change concurrently.
+
+Like rcu_dereference(), when lockdep is enabled, RCU list and hlist
+traversal primitives check for being called from within an RCU read-side
+critical section.  However, a lockdep expression can be passed to them
+as a additional optional argument.  With this lockdep expression, these
+traversal primitives will complain only if the lockdep expression is
+false and they are called from outside any RCU read-side critical section.
+
+For example, the workqueue for_each_pwq() macro is intended to be used
+either within an RCU read-side critical section or with wq->mutex held.
+It is thus implemented as follows::
+
+       #define for_each_pwq(pwq, wq)
+               list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
+                                       lock_is_held(&(wq->mutex).dep_map))
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
deleted file mode 100644 (file)
index 89db949..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-RCU and lockdep checking
-
-All flavors of RCU have lockdep checking available, so that lockdep is
-aware of when each task enters and leaves any flavor of RCU read-side
-critical section.  Each flavor of RCU is tracked separately (but note
-that this is not the case in 2.6.32 and earlier).  This allows lockdep's
-tracking to include RCU state, which can sometimes help when debugging
-deadlocks and the like.
-
-In addition, RCU provides the following primitives that check lockdep's
-state:
-
-       rcu_read_lock_held() for normal RCU.
-       rcu_read_lock_bh_held() for RCU-bh.
-       rcu_read_lock_sched_held() for RCU-sched.
-       srcu_read_lock_held() for SRCU.
-
-These functions are conservative, and will therefore return 1 if they
-aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
-This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false
-positives when lockdep is disabled.
-
-In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables
-checking of rcu_dereference() primitives:
-
-       rcu_dereference(p):
-               Check for RCU read-side critical section.
-       rcu_dereference_bh(p):
-               Check for RCU-bh read-side critical section.
-       rcu_dereference_sched(p):
-               Check for RCU-sched read-side critical section.
-       srcu_dereference(p, sp):
-               Check for SRCU read-side critical section.
-       rcu_dereference_check(p, c):
-               Use explicit check expression "c" along with
-               rcu_read_lock_held().  This is useful in code that is
-               invoked by both RCU readers and updaters.
-       rcu_dereference_bh_check(p, c):
-               Use explicit check expression "c" along with
-               rcu_read_lock_bh_held().  This is useful in code that
-               is invoked by both RCU-bh readers and updaters.
-       rcu_dereference_sched_check(p, c):
-               Use explicit check expression "c" along with
-               rcu_read_lock_sched_held().  This is useful in code that
-               is invoked by both RCU-sched readers and updaters.
-       srcu_dereference_check(p, c):
-               Use explicit check expression "c" along with
-               srcu_read_lock_held()().  This is useful in code that
-               is invoked by both SRCU readers and updaters.
-       rcu_dereference_raw(p):
-               Don't check.  (Use sparingly, if at all.)
-       rcu_dereference_protected(p, c):
-               Use explicit check expression "c", and omit all barriers
-               and compiler constraints.  This is useful when the data
-               structure cannot change, for example, in code that is
-               invoked only by updaters.
-       rcu_access_pointer(p):
-               Return the value of the pointer and omit all barriers,
-               but retain the compiler constraints that prevent duplicating
-               or coalescsing.  This is useful when when testing the
-               value of the pointer itself, for example, against NULL.
-
-The rcu_dereference_check() check expression can be any boolean
-expression, but would normally include a lockdep expression.  However,
-any boolean expression can be used.  For a moderately ornate example,
-consider the following:
-
-       file = rcu_dereference_check(fdt->fd[fd],
-                                    lockdep_is_held(&files->file_lock) ||
-                                    atomic_read(&files->count) == 1);
-
-This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
-and, if CONFIG_PROVE_RCU is configured, verifies that this expression
-is used in:
-
-1.     An RCU read-side critical section (implicit), or
-2.     with files->file_lock held, or
-3.     on an unshared files_struct.
-
-In case (1), the pointer is picked up in an RCU-safe manner for vanilla
-RCU read-side critical sections, in case (2) the ->file_lock prevents
-any change from taking place, and finally, in case (3) the current task
-is the only task accessing the file_struct, again preventing any change
-from taking place.  If the above statement was invoked only from updater
-code, it could instead be written as follows:
-
-       file = rcu_dereference_protected(fdt->fd[fd],
-                                        lockdep_is_held(&files->file_lock) ||
-                                        atomic_read(&files->count) == 1);
-
-This would verify cases #2 and #3 above, and furthermore lockdep would
-complain if this was used in an RCU read-side critical section unless one
-of these two cases held.  Because rcu_dereference_protected() omits all
-barriers and compiler constraints, it generates better code than do the
-other flavors of rcu_dereference().  On the other hand, it is illegal
-to use rcu_dereference_protected() if either the RCU-protected pointer
-or the RCU-protected data that it points to can change concurrently.
-
-Like rcu_dereference(), when lockdep is enabled, RCU list and hlist
-traversal primitives check for being called from within an RCU read-side
-critical section.  However, a lockdep expression can be passed to them
-as a additional optional argument.  With this lockdep expression, these
-traversal primitives will complain only if the lockdep expression is
-false and they are called from outside any RCU read-side critical section.
-
-For example, the workqueue for_each_pwq() macro is intended to be used
-either within an RCU read-side critical section or with wq->mutex held.
-It is thus implemented as follows:
-
-       #define for_each_pwq(pwq, wq)
-               list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
-                                       lock_is_held(&(wq->mutex).dep_map))
diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
new file mode 100644 (file)
index 0000000..a9fc774
--- /dev/null
@@ -0,0 +1,200 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Using RCU hlist_nulls to protect list and objects
+=================================================
+
+This section describes how to use hlist_nulls to
+protect read-mostly linked lists and
+objects using SLAB_TYPESAFE_BY_RCU allocations.
+
+Please read the basics in Documentation/RCU/listRCU.rst
+
+Using 'nulls'
+=============
+
+Using special makers (called 'nulls') is a convenient way
+to solve following problem :
+
+A typical RCU linked list managing objects which are
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
+use following algos :
+
+1) Lookup algo
+--------------
+
+::
+
+  rcu_read_lock()
+  begin:
+  obj = lockless_lookup(key);
+  if (obj) {
+    if (!try_get_ref(obj)) // might fail for free objects
+      goto begin;
+    /*
+    * Because a writer could delete object, and a writer could
+    * reuse these object before the RCU grace period, we
+    * must check key after getting the reference on object
+    */
+    if (obj->key != key) { // not the object we expected
+      put_ref(obj);
+      goto begin;
+    }
+  }
+  rcu_read_unlock();
+
+Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
+but a version with an additional memory barrier (smp_rmb())
+
+::
+
+  lockless_lookup(key)
+  {
+    struct hlist_node *node, *next;
+    for (pos = rcu_dereference((head)->first);
+        pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
+        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+        pos = rcu_dereference(next))
+      if (obj->key == key)
+        return obj;
+    return NULL;
+  }
+
+And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
+
+  struct hlist_node *node;
+  for (pos = rcu_dereference((head)->first);
+        pos && ({ prefetch(pos->next); 1; }) &&
+        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+        pos = rcu_dereference(pos->next))
+   if (obj->key == key)
+     return obj;
+  return NULL;
+
+Quoting Corey Minyard::
+
+  "If the object is moved from one list to another list in-between the
+  time the hash is calculated and the next field is accessed, and the
+  object has moved to the end of a new list, the traversal will not
+  complete properly on the list it should have, since the object will
+  be on the end of the new list and there's not a way to tell it's on a
+  new list and restart the list traversal. I think that this can be
+  solved by pre-fetching the "next" field (with proper barriers) before
+  checking the key."
+
+2) Insert algo
+--------------
+
+We need to make sure a reader cannot read the new 'obj->obj_next' value
+and previous value of 'obj->key'. Or else, an item could be deleted
+from a chain, and inserted into another chain. If new chain was empty
+before the move, 'next' pointer is NULL, and lockless reader can
+not detect it missed following items in original chain.
+
+::
+
+  /*
+  * Please note that new inserts are done at the head of list,
+  * not in the middle or end.
+  */
+  obj = kmem_cache_alloc(...);
+  lock_chain(); // typically a spin_lock()
+  obj->key = key;
+  /*
+  * we need to make sure obj->key is updated before obj->next
+  * or obj->refcnt
+  */
+  smp_wmb();
+  atomic_set(&obj->refcnt, 1);
+  hlist_add_head_rcu(&obj->obj_node, list);
+  unlock_chain(); // typically a spin_unlock()
+
+
+3) Remove algo
+--------------
+Nothing special here, we can use a standard RCU hlist deletion.
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
+very very fast (before the end of RCU grace period)
+
+::
+
+  if (put_last_reference_on(obj) {
+    lock_chain(); // typically a spin_lock()
+    hlist_del_init_rcu(&obj->obj_node);
+    unlock_chain(); // typically a spin_unlock()
+    kmem_cache_free(cachep, obj);
+  }
+
+
+
+--------------------------------------------------------------------------
+
+Avoiding extra smp_rmb()
+========================
+
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
+and extra smp_wmb() in insert function.
+
+For example, if we choose to store the slot number as the 'nulls'
+end-of-list marker for each slot of the hash table, we can detect
+a race (some writer did a delete and/or a move of an object
+to another chain) checking the final 'nulls' value if
+the lookup met the end of chain. If final 'nulls' value
+is not the slot number, then we must restart the lookup at
+the beginning. If the object was moved to the same chain,
+then the reader doesn't care : It might eventually
+scan the list again without harm.
+
+
+1) lookup algo
+--------------
+
+::
+
+  head = &table[slot];
+  rcu_read_lock();
+  begin:
+  hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+    if (obj->key == key) {
+      if (!try_get_ref(obj)) // might fail for free objects
+        goto begin;
+      if (obj->key != key) { // not the object we expected
+        put_ref(obj);
+        goto begin;
+      }
+    goto out;
+  }
+  /*
+  * if the nulls value we got at the end of this lookup is
+  * not the expected one, we must restart lookup.
+  * We probably met an item that was moved to another chain.
+  */
+  if (get_nulls_value(node) != slot)
+  goto begin;
+  obj = NULL;
+
+  out:
+  rcu_read_unlock();
+
+2) Insert function
+------------------
+
+::
+
+  /*
+  * Please note that new inserts are done at the head of list,
+  * not in the middle or end.
+  */
+  obj = kmem_cache_alloc(cachep);
+  lock_chain(); // typically a spin_lock()
+  obj->key = key;
+  /*
+  * changes to obj->key must be visible before refcnt one
+  */
+  smp_wmb();
+  atomic_set(&obj->refcnt, 1);
+  /*
+  * insert obj in RCU way (readers might be traversing chain)
+  */
+  hlist_nulls_add_head_rcu(&obj->obj_node, list);
+  unlock_chain(); // typically a spin_unlock()
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
deleted file mode 100644 (file)
index 23f115d..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_TYPESAFE_BY_RCU allocations.
-
-Please read the basics in Documentation/RCU/listRCU.rst
-
-Using special makers (called 'nulls') is a convenient way
-to solve following problem :
-
-A typical RCU linked list managing objects which are
-allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
-use following algos :
-
-1) Lookup algo
---------------
-rcu_read_lock()
-begin:
-obj = lockless_lookup(key);
-if (obj) {
-  if (!try_get_ref(obj)) // might fail for free objects
-    goto begin;
-  /*
-   * Because a writer could delete object, and a writer could
-   * reuse these object before the RCU grace period, we
-   * must check key after getting the reference on object
-   */
-  if (obj->key != key) { // not the object we expected
-     put_ref(obj);
-     goto begin;
-   }
-}
-rcu_read_unlock();
-
-Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
-but a version with an additional memory barrier (smp_rmb())
-
-lockless_lookup(key)
-{
-   struct hlist_node *node, *next;
-   for (pos = rcu_dereference((head)->first);
-          pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
-          ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
-          pos = rcu_dereference(next))
-      if (obj->key == key)
-         return obj;
-   return NULL;
-
-And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb() :
-
-   struct hlist_node *node;
-   for (pos = rcu_dereference((head)->first);
-               pos && ({ prefetch(pos->next); 1; }) &&
-               ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
-               pos = rcu_dereference(pos->next))
-      if (obj->key == key)
-         return obj;
-   return NULL;
-}
-
-Quoting Corey Minyard :
-
-"If the object is moved from one list to another list in-between the
- time the hash is calculated and the next field is accessed, and the
- object has moved to the end of a new list, the traversal will not
- complete properly on the list it should have, since the object will
- be on the end of the new list and there's not a way to tell it's on a
- new list and restart the list traversal.  I think that this can be
- solved by pre-fetching the "next" field (with proper barriers) before
- checking the key."
-
-2) Insert algo :
-----------------
-
-We need to make sure a reader cannot read the new 'obj->obj_next' value
-and previous value of 'obj->key'. Or else, an item could be deleted
-from a chain, and inserted into another chain. If new chain was empty
-before the move, 'next' pointer is NULL, and lockless reader can
-not detect it missed following items in original chain.
-
-/*
- * Please note that new inserts are done at the head of list,
- * not in the middle or end.
- */
-obj = kmem_cache_alloc(...);
-lock_chain(); // typically a spin_lock()
-obj->key = key;
-/*
- * we need to make sure obj->key is updated before obj->next
- * or obj->refcnt
- */
-smp_wmb();
-atomic_set(&obj->refcnt, 1);
-hlist_add_head_rcu(&obj->obj_node, list);
-unlock_chain(); // typically a spin_unlock()
-
-
-3) Remove algo
---------------
-Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
-very very fast (before the end of RCU grace period)
-
-if (put_last_reference_on(obj) {
-   lock_chain(); // typically a spin_lock()
-   hlist_del_init_rcu(&obj->obj_node);
-   unlock_chain(); // typically a spin_unlock()
-   kmem_cache_free(cachep, obj);
-}
-
-
-
---------------------------------------------------------------------------
-With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
-and extra smp_wmb() in insert function.
-
-For example, if we choose to store the slot number as the 'nulls'
-end-of-list marker for each slot of the hash table, we can detect
-a race (some writer did a delete and/or a move of an object
-to another chain) checking the final 'nulls' value if
-the lookup met the end of chain. If final 'nulls' value
-is not the slot number, then we must restart the lookup at
-the beginning. If the object was moved to the same chain,
-then the reader doesn't care : It might eventually
-scan the list again without harm.
-
-
-1) lookup algo
-
- head = &table[slot];
- rcu_read_lock();
-begin:
- hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
-   if (obj->key == key) {
-      if (!try_get_ref(obj)) // might fail for free objects
-         goto begin;
-      if (obj->key != key) { // not the object we expected
-         put_ref(obj);
-         goto begin;
-      }
-  goto out;
- }
-/*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
-   goto begin;
- obj = NULL;
-
-out:
- rcu_read_unlock();
-
-2) Insert function :
---------------------
-
-/*
- * Please note that new inserts are done at the head of list,
- * not in the middle or end.
- */
-obj = kmem_cache_alloc(cachep);
-lock_chain(); // typically a spin_lock()
-obj->key = key;
-/*
- * changes to obj->key must be visible before refcnt one
- */
-smp_wmb();
-atomic_set(&obj->refcnt, 1);
-/*
- * insert obj in RCU way (readers might be traversing chain)
- */
-hlist_nulls_add_head_rcu(&obj->obj_node, list);
-unlock_chain(); // typically a spin_unlock()
diff --git a/Documentation/RCU/rcuref.rst b/Documentation/RCU/rcuref.rst
new file mode 100644 (file)
index 0000000..b33aeb1
--- /dev/null
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================================
+Reference-count design for elements of lists/arrays protected by RCU
+====================================================================
+
+
+Please note that the percpu-ref feature is likely your first
+stop if you need to combine reference counts and RCU.  Please see
+include/linux/percpu-refcount.h for more information.  However, in
+those unusual cases where percpu-ref would consume too much memory,
+please read on.
+
+------------------------------------------------------------------------
+
+Reference counting on elements of lists which are protected by traditional
+reader/writer spinlocks or semaphores are straightforward:
+
+CODE LISTING A::
+
+    1.                                     2.
+    add()                                  search_and_reference()
+    {                                      {
+       alloc_object                            read_lock(&list_lock);
+       ...                                     search_for_element
+       atomic_set(&el->rc, 1);                 atomic_inc(&el->rc);
+       write_lock(&list_lock);                  ...
+       add_element                             read_unlock(&list_lock);
+       ...                                     ...
+       write_unlock(&list_lock);          }
+    }
+
+    3.                                     4.
+    release_referenced()                   delete()
+    {                                      {
+       ...                                     write_lock(&list_lock);
+       if(atomic_dec_and_test(&el->rc))        ...
+           kfree(el);
+       ...                                     remove_element
+    }                                          write_unlock(&list_lock);
+                                               ...
+                                               if (atomic_dec_and_test(&el->rc))
+                                                   kfree(el);
+                                               ...
+                                           }
+
+If this list/array is made lock free using RCU as in changing the
+write_lock() in add() and delete() to spin_lock() and changing read_lock()
+in search_and_reference() to rcu_read_lock(), the atomic_inc() in
+search_and_reference() could potentially hold reference to an element which
+has already been deleted from the list/array.  Use atomic_inc_not_zero()
+in this scenario as follows:
+
+CODE LISTING B::
+
+    1.                                     2.
+    add()                                  search_and_reference()
+    {                                      {
+       alloc_object                            rcu_read_lock();
+       ...                                     search_for_element
+       atomic_set(&el->rc, 1);                 if (!atomic_inc_not_zero(&el->rc)) {
+       spin_lock(&list_lock);                      rcu_read_unlock();
+                                                   return FAIL;
+       add_element                             }
+       ...                                     ...
+       spin_unlock(&list_lock);                rcu_read_unlock();
+    }                                      }
+    3.                                     4.
+    release_referenced()                   delete()
+    {                                      {
+       ...                                     spin_lock(&list_lock);
+       if (atomic_dec_and_test(&el->rc))       ...
+           call_rcu(&el->head, el_free);       remove_element
+       ...                                     spin_unlock(&list_lock);
+    }                                          ...
+                                               if (atomic_dec_and_test(&el->rc))
+                                                   call_rcu(&el->head, el_free);
+                                               ...
+                                           }
+
+Sometimes, a reference to the element needs to be obtained in the
+update (write) stream. In such cases, atomic_inc_not_zero() might be
+overkill, since we hold the update-side spinlock.  One might instead
+use atomic_inc() in such cases.
+
+It is not always convenient to deal with "FAIL" in the
+search_and_reference() code path.  In such cases, the
+atomic_dec_and_test() may be moved from delete() to el_free()
+as follows:
+
+CODE LISTING C::
+
+    1.                                     2.
+    add()                                  search_and_reference()
+    {                                      {
+       alloc_object                            rcu_read_lock();
+       ...                                     search_for_element
+       atomic_set(&el->rc, 1);                 atomic_inc(&el->rc);
+       spin_lock(&list_lock);                  ...
+
+       add_element                             rcu_read_unlock();
+       ...                                 }
+       spin_unlock(&list_lock);            4.
+    }                                      delete()
+    3.                                     {
+    release_referenced()                       spin_lock(&list_lock);
+    {                                          ...
+       ...                                     remove_element
+       if (atomic_dec_and_test(&el->rc))       spin_unlock(&list_lock);
+           kfree(el);                          ...
+       ...                                     call_rcu(&el->head, el_free);
+    }                                          ...
+    5.                                     }
+    void el_free(struct rcu_head *rhp)
+    {
+       release_referenced();
+    }
+
+The key point is that the initial reference added by add() is not removed
+until after a grace period has elapsed following removal.  This means that
+search_and_reference() cannot find this element, which means that the value
+of el->rc cannot increase.  Thus, once it reaches zero, there are no
+readers that can or ever will be able to reference the element.         The
+element can therefore safely be freed. This in turn guarantees that if
+any reader finds the element, that reader may safely acquire a reference
+without checking the value of the reference counter.
+
+A clear advantage of the RCU-based pattern in listing C over the one
+in listing B is that any call to search_and_reference() that locates
+a given object will succeed in obtaining a reference to that object,
+even given a concurrent invocation of delete() for that same object.
+Similarly, a clear advantage of both listings B and C over listing A is
+that a call to delete() is not delayed even if there are an arbitrarily
+large number of calls to search_and_reference() searching for the same
+object that delete() was invoked on.  Instead, all that is delayed is
+the eventual invocation of kfree(), which is usually not a problem on
+modern computer systems, even the small ones.
+
+In cases where delete() can sleep, synchronize_rcu() can be called from
+delete(), so that el_free() can be subsumed into delete as follows::
+
+    4.
+    delete()
+    {
+       spin_lock(&list_lock);
+       ...
+       remove_element
+       spin_unlock(&list_lock);
+       ...
+       synchronize_rcu();
+       if (atomic_dec_and_test(&el->rc))
+           kfree(el);
+       ...
+    }
+
+As additional examples in the kernel, the pattern in listing C is used by
+reference counting of struct pid, while the pattern in listing B is used by
+struct posix_acl.
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
deleted file mode 100644 (file)
index 5e6429d..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-Reference-count design for elements of lists/arrays protected by RCU.
-
-
-Please note that the percpu-ref feature is likely your first
-stop if you need to combine reference counts and RCU.  Please see
-include/linux/percpu-refcount.h for more information.  However, in
-those unusual cases where percpu-ref would consume too much memory,
-please read on.
-
-------------------------------------------------------------------------
-
-Reference counting on elements of lists which are protected by traditional
-reader/writer spinlocks or semaphores are straightforward:
-
-CODE LISTING A:
-1.                             2.
-add()                          search_and_reference()
-{                              {
-    alloc_object                   read_lock(&list_lock);
-    ...                                    search_for_element
-    atomic_set(&el->rc, 1);        atomic_inc(&el->rc);
-    write_lock(&list_lock);         ...
-    add_element                            read_unlock(&list_lock);
-    ...                                    ...
-    write_unlock(&list_lock);  }
-}
-
-3.                                     4.
-release_referenced()                   delete()
-{                                      {
-    ...                                            write_lock(&list_lock);
-    if(atomic_dec_and_test(&el->rc))       ...
-       kfree(el);
-    ...                                            remove_element
-}                                          write_unlock(&list_lock);
-                                           ...
-                                           if (atomic_dec_and_test(&el->rc))
-                                               kfree(el);
-                                           ...
-                                       }
-
-If this list/array is made lock free using RCU as in changing the
-write_lock() in add() and delete() to spin_lock() and changing read_lock()
-in search_and_reference() to rcu_read_lock(), the atomic_inc() in
-search_and_reference() could potentially hold reference to an element which
-has already been deleted from the list/array.  Use atomic_inc_not_zero()
-in this scenario as follows:
-
-CODE LISTING B:
-1.                                     2.
-add()                                  search_and_reference()
-{                                      {
-    alloc_object                           rcu_read_lock();
-    ...                                            search_for_element
-    atomic_set(&el->rc, 1);                if (!atomic_inc_not_zero(&el->rc)) {
-    spin_lock(&list_lock);                     rcu_read_unlock();
-                                               return FAIL;
-    add_element                                    }
-    ...                                            ...
-    spin_unlock(&list_lock);               rcu_read_unlock();
-}                                      }
-3.                                     4.
-release_referenced()                   delete()
-{                                      {
-    ...                                            spin_lock(&list_lock);
-    if (atomic_dec_and_test(&el->rc))       ...
-        call_rcu(&el->head, el_free);       remove_element
-    ...                                     spin_unlock(&list_lock);
-}                                          ...
-                                           if (atomic_dec_and_test(&el->rc))
-                                               call_rcu(&el->head, el_free);
-                                           ...
-                                       }
-
-Sometimes, a reference to the element needs to be obtained in the
-update (write) stream.  In such cases, atomic_inc_not_zero() might be
-overkill, since we hold the update-side spinlock.  One might instead
-use atomic_inc() in such cases.
-
-It is not always convenient to deal with "FAIL" in the
-search_and_reference() code path.  In such cases, the
-atomic_dec_and_test() may be moved from delete() to el_free()
-as follows:
-
-CODE LISTING C:
-1.                                     2.
-add()                                  search_and_reference()
-{                                      {
-    alloc_object                           rcu_read_lock();
-    ...                                            search_for_element
-    atomic_set(&el->rc, 1);                atomic_inc(&el->rc);
-    spin_lock(&list_lock);                 ...
-
-    add_element                                    rcu_read_unlock();
-    ...                                        }
-    spin_unlock(&list_lock);           4.
-}                                      delete()
-3.                                     {
-release_referenced()                       spin_lock(&list_lock);
-{                                          ...
-    ...                                            remove_element
-    if (atomic_dec_and_test(&el->rc))       spin_unlock(&list_lock);
-        kfree(el);                         ...
-    ...                                     call_rcu(&el->head, el_free);
-}                                          ...
-5.                                     }
-void el_free(struct rcu_head *rhp)
-{
-    release_referenced();
-}
-
-The key point is that the initial reference added by add() is not removed
-until after a grace period has elapsed following removal.  This means that
-search_and_reference() cannot find this element, which means that the value
-of el->rc cannot increase.  Thus, once it reaches zero, there are no
-readers that can or ever will be able to reference the element.  The
-element can therefore safely be freed.  This in turn guarantees that if
-any reader finds the element, that reader may safely acquire a reference
-without checking the value of the reference counter.
-
-A clear advantage of the RCU-based pattern in listing C over the one
-in listing B is that any call to search_and_reference() that locates
-a given object will succeed in obtaining a reference to that object,
-even given a concurrent invocation of delete() for that same object.
-Similarly, a clear advantage of both listings B and C over listing A is
-that a call to delete() is not delayed even if there are an arbitrarily
-large number of calls to search_and_reference() searching for the same
-object that delete() was invoked on.  Instead, all that is delayed is
-the eventual invocation of kfree(), which is usually not a problem on
-modern computer systems, even the small ones.
-
-In cases where delete() can sleep, synchronize_rcu() can be called from
-delete(), so that el_free() can be subsumed into delete as follows:
-
-4.
-delete()
-{
-    spin_lock(&list_lock);
-    ...
-    remove_element
-    spin_unlock(&list_lock);
-    ...
-    synchronize_rcu();
-    if (atomic_dec_and_test(&el->rc))
-       kfree(el);
-    ...
-}
-
-As additional examples in the kernel, the pattern in listing C is used by
-reference counting of struct pid, while the pattern in listing B is used by
-struct posix_acl.
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
new file mode 100644 (file)
index 0000000..c9ab6af
--- /dev/null
@@ -0,0 +1,336 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Using RCU's CPU Stall Detector
+==============================
+
+This document first discusses what sorts of issues RCU's CPU stall
+detector can locate, and then discusses kernel parameters and Kconfig
+options that can be used to fine-tune the detector's operation.  Finally,
+this document explains the stall detector's "splat" format.
+
+
+What Causes RCU CPU Stall Warnings?
+===================================
+
+So your kernel printed an RCU CPU stall warning.  The next question is
+"What caused it?"  The following problems can result in RCU CPU stall
+warnings:
+
+-      A CPU looping in an RCU read-side critical section.
+
+-      A CPU looping with interrupts disabled.
+
+-      A CPU looping with preemption disabled.
+
+-      A CPU looping with bottom halves disabled.
+
+-      For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
+       without invoking schedule().  If the looping in the kernel is
+       really expected and desirable behavior, you might need to add
+       some calls to cond_resched().
+
+-      Booting Linux using a console connection that is too slow to
+       keep up with the boot-time console-message rate.  For example,
+       a 115Kbaud serial console can be -way- too slow to keep up
+       with boot-time message rates, and will frequently result in
+       RCU CPU stall warning messages.  Especially if you have added
+       debug printk()s.
+
+-      Anything that prevents RCU's grace-period kthreads from running.
+       This can result in the "All QSes seen" console-log message.
+       This message will include information on when the kthread last
+       ran and how often it should be expected to run.  It can also
+       result in the ``rcu_.*kthread starved for`` console-log message,
+       which will include additional debugging information.
+
+-      A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+       happen to preempt a low-priority task in the middle of an RCU
+       read-side critical section.   This is especially damaging if
+       that low-priority task is not permitted to run on any other CPU,
+       in which case the next RCU grace period can never complete, which
+       will eventually cause the system to run out of memory and hang.
+       While the system is in the process of running itself out of
+       memory, you might see stall-warning messages.
+
+-      A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+       is running at a higher priority than the RCU softirq threads.
+       This will prevent RCU callbacks from ever being invoked,
+       and in a CONFIG_PREEMPT_RCU kernel will further prevent
+       RCU grace periods from ever completing.  Either way, the
+       system will eventually run out of memory and hang.  In the
+       CONFIG_PREEMPT_RCU case, you might see stall-warning
+       messages.
+
+       You can use the rcutree.kthread_prio kernel boot parameter to
+       increase the scheduling priority of RCU's kthreads, which can
+       help avoid this problem.  However, please note that doing this
+       can increase your system's context-switch rate and thus degrade
+       performance.
+
+-      A periodic interrupt whose handler takes longer than the time
+       interval between successive pairs of interrupts.  This can
+       prevent RCU's kthreads and softirq handlers from running.
+       Note that certain high-overhead debugging options, for example
+       the function_graph tracer, can result in interrupt handler taking
+       considerably longer than normal, which can in turn result in
+       RCU CPU stall warnings.
+
+-      Testing a workload on a fast system, tuning the stall-warning
+       timeout down to just barely avoid RCU CPU stall warnings, and then
+       running the same workload with the same stall-warning timeout on a
+       slow system.  Note that thermal throttling and on-demand governors
+       can cause a single system to be sometimes fast and sometimes slow!
+
+-      A hardware or software issue shuts off the scheduler-clock
+       interrupt on a CPU that is not in dyntick-idle mode.  This
+       problem really has happened, and seems to be most likely to
+       result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
+
+-      A hardware or software issue that prevents time-based wakeups
+       from occurring.  These issues can range from misconfigured or
+       buggy timer hardware through bugs in the interrupt or exception
+       path (whether hardware, firmware, or software) through bugs
+       in Linux's timer subsystem through bugs in the scheduler, and,
+       yes, even including bugs in RCU itself.
+
+-      A bug in the RCU implementation.
+
+-      A hardware failure.  This is quite unlikely, but has occurred
+       at least once in real life.  A CPU failed in a running system,
+       becoming unresponsive, but not causing an immediate crash.
+       This resulted in a series of RCU CPU stall warnings, eventually
+       leading the realization that the CPU had failed.
+
+The RCU, RCU-sched, and RCU-tasks implementations have CPU stall warning.
+Note that SRCU does -not- have CPU stall warnings.  Please note that
+RCU only detects CPU stalls when there is a grace period in progress.
+No grace period, no CPU stall warnings.
+
+To diagnose the cause of the stall, inspect the stack traces.
+The offending function will usually be near the top of the stack.
+If you have a series of stall warnings from a single extended stall,
+comparing the stack traces can often help determine where the stall
+is occurring, which will usually be in the function nearest the top of
+that portion of the stack which remains the same from trace to trace.
+If you can reliably trigger the stall, ftrace can be quite helpful.
+
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
+and with RCU's event tracing.  For information on RCU's event tracing,
+see include/trace/events/rcu.h.
+
+
+Fine-Tuning the RCU CPU Stall Detector
+======================================
+
+The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
+CPU stall detector, which detects conditions that unduly delay RCU grace
+periods.  This module parameter enables CPU stall detection by default,
+but may be overridden via boot-time parameter or at runtime via sysfs.
+The stall detector's idea of what constitutes "unduly delayed" is
+controlled by a set of kernel configuration variables and cpp macros:
+
+CONFIG_RCU_CPU_STALL_TIMEOUT
+----------------------------
+
+       This kernel configuration parameter defines the period of time
+       that RCU will wait from the beginning of a grace period until it
+       issues an RCU CPU stall warning.  This time period is normally
+       21 seconds.
+
+       This configuration parameter may be changed at runtime via the
+       /sys/module/rcupdate/parameters/rcu_cpu_stall_timeout, however
+       this parameter is checked only at the beginning of a cycle.
+       So if you are 10 seconds into a 40-second stall, setting this
+       sysfs parameter to (say) five will shorten the timeout for the
+       -next- stall, or the following warning for the current stall
+       (assuming the stall lasts long enough).  It will not affect the
+       timing of the next warning for the current stall.
+
+       Stall-warning messages may be enabled and disabled completely via
+       /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
+RCU_STALL_DELAY_DELTA
+---------------------
+
+       Although the lockdep facility is extremely useful, it does add
+       some overhead.  Therefore, under CONFIG_PROVE_RCU, the
+       RCU_STALL_DELAY_DELTA macro allows five extra seconds before
+       giving an RCU CPU stall warning message.  (This is a cpp
+       macro, not a kernel configuration parameter.)
+
+RCU_STALL_RAT_DELAY
+-------------------
+
+       The CPU stall detector tries to make the offending CPU print its
+       own warnings, as this often gives better-quality stack traces.
+       However, if the offending CPU does not detect its own stall in
+       the number of jiffies specified by RCU_STALL_RAT_DELAY, then
+       some other CPU will complain.  This delay is normally set to
+       two jiffies.  (This is a cpp macro, not a kernel configuration
+       parameter.)
+
+rcupdate.rcu_task_stall_timeout
+-------------------------------
+
+       This boot/sysfs parameter controls the RCU-tasks stall warning
+       interval.  A value of zero or less suppresses RCU-tasks stall
+       warnings.  A positive value sets the stall-warning interval
+       in seconds.  An RCU-tasks stall warning starts with the line:
+
+               INFO: rcu_tasks detected stalls on tasks:
+
+       And continues with the output of sched_show_task() for each
+       task stalling the current RCU-tasks grace period.
+
+
+Interpreting RCU's CPU Stall-Detector "Splats"
+==============================================
+
+For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
+it will print a message similar to the following::
+
+       INFO: rcu_sched detected stalls on CPUs/tasks:
+       2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
+       16-...: (0 ticks this GP) idle=81c/0/0 softirq=764/764 fqs=0
+       (detected by 32, t=2603 jiffies, g=7075, q=625)
+
+This message indicates that CPU 32 detected that CPUs 2 and 16 were both
+causing stalls, and that the stall was affecting RCU-sched.  This message
+will normally be followed by stack dumps for each CPU.  Please note that
+PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, and that
+the tasks will be indicated by PID, for example, "P3421".  It is even
+possible for an rcu_state stall to be caused by both CPUs -and- tasks,
+in which case the offending CPUs and tasks will all be called out in the list.
+
+CPU 2's "(3 GPs behind)" indicates that this CPU has not interacted with
+the RCU core for the past three grace periods.  In contrast, CPU 16's "(0
+ticks this GP)" indicates that this CPU has not taken any scheduling-clock
+interrupts during the current stalled grace period.
+
+The "idle=" portion of the message prints the dyntick-idle state.
+The hex number before the first "/" is the low-order 12 bits of the
+dynticks counter, which will have an even-numbered value if the CPU
+is in dyntick-idle mode and an odd-numbered value otherwise.  The hex
+number between the two "/"s is the value of the nesting, which will be
+a small non-negative number if in the idle loop (as shown above) and a
+very large positive number otherwise.
+
+The "softirq=" portion of the message tracks the number of RCU softirq
+handlers that the stalled CPU has executed.  The number before the "/"
+is the number that had executed since boot at the time that this CPU
+last noted the beginning of a grace period, which might be the current
+(stalled) grace period, or it might be some earlier grace period (for
+example, if the CPU might have been in dyntick-idle mode for an extended
+time period.  The number after the "/" is the number that have executed
+since boot until the current time.  If this latter number stays constant
+across repeated stall-warning messages, it is possible that RCU's softirq
+handlers are no longer able to execute on this CPU.  This can happen if
+the stalled CPU is spinning with interrupts are disabled, or, in -rt
+kernels, if a high-priority process is starving RCU's softirq handler.
+
+The "fqs=" shows the number of force-quiescent-state idle/offline
+detection passes that the grace-period kthread has made across this
+CPU since the last time that this CPU noted the beginning of a grace
+period.
+
+The "detected by" line indicates which CPU detected the stall (in this
+case, CPU 32), how many jiffies have elapsed since the start of the grace
+period (in this case 2603), the grace-period sequence number (7075), and
+an estimate of the total number of RCU callbacks queued across all CPUs
+(625 in this case).
+
+In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
+for each CPU::
+
+       0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1
+
+The "last_accelerate:" prints the low-order 16 bits (in hex) of the
+jiffies counter when this CPU last invoked rcu_try_advance_all_cbs()
+from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from
+rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle
+processing is enabled.
+
+If the grace period ends just as the stall warning starts printing,
+there will be a spurious stall-warning message, which will include
+the following::
+
+       INFO: Stall ended before state dump start
+
+This is rare, but does happen from time to time in real life.  It is also
+possible for a zero-jiffy stall to be flagged in this case, depending
+on how the stall warning and the grace-period initialization happen to
+interact.  Please note that it is not possible to entirely eliminate this
+sort of false positive without resorting to things like stop_machine(),
+which is overkill for this sort of problem.
+
+If all CPUs and tasks have passed through quiescent states, but the
+grace period has nevertheless failed to end, the stall-warning splat
+will include something like the following::
+
+       All QSes seen, last rcu_preempt kthread activity 23807 (4297905177-4297881370), jiffies_till_next_fqs=3, root ->qsmask 0x0
+
+The "23807" indicates that it has been more than 23 thousand jiffies
+since the grace-period kthread ran.  The "jiffies_till_next_fqs"
+indicates how frequently that kthread should run, giving the number
+of jiffies between force-quiescent-state scans, in this case three,
+which is way less than 23807.  Finally, the root rcu_node structure's
+->qsmask field is printed, which will normally be zero.
+
+If the relevant grace-period kthread has been unable to run prior to
+the stall warning, as was the case in the "All QSes seen" line above,
+the following additional line is printed::
+
+       kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
+
+Starving the grace-period kthreads of CPU time can of course result
+in RCU CPU stall warnings even when all CPUs and tasks have passed
+through the required quiescent states.  The "g" number shows the current
+grace-period sequence number, the "f" precedes the ->gp_flags command
+to the grace-period kthread, the "RCU_GP_WAIT_FQS" indicates that the
+kthread is waiting for a short timeout, the "state" precedes value of the
+task_struct ->state field, and the "cpu" indicates that the grace-period
+kthread last ran on CPU 5.
+
+
+Multiple Warnings From One Stall
+================================
+
+If a stall lasts long enough, multiple stall-warning messages will be
+printed for it.  The second and subsequent messages are printed at
+longer intervals, so that the time between (say) the first and second
+message will be about three times the interval between the beginning
+of the stall and the first message.
+
+
+Stall Warnings for Expedited Grace Periods
+==========================================
+
+If an expedited grace period detects a stall, it will place a message
+like the following in dmesg::
+
+       INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 7-... } 21119 jiffies s: 73 root: 0x2/.
+
+This indicates that CPU 7 has failed to respond to a reschedule IPI.
+The three periods (".") following the CPU number indicate that the CPU
+is online (otherwise the first period would instead have been "O"),
+that the CPU was online at the beginning of the expedited grace period
+(otherwise the second period would have instead been "o"), and that
+the CPU has been online at least once since boot (otherwise, the third
+period would instead have been "N").  The number before the "jiffies"
+indicates that the expedited grace period has been going on for 21,119
+jiffies.  The number following the "s:" indicates that the expedited
+grace-period sequence counter is 73.  The fact that this last value is
+odd indicates that an expedited grace period is in flight.  The number
+following "root:" is a bitmask that indicates which children of the root
+rcu_node structure correspond to CPUs and/or tasks that are blocking the
+current expedited grace period.  If the tree had more than one level,
+additional hex numbers would be printed for the states of the other
+rcu_node structures in the tree.
+
+As with normal grace periods, PREEMPT_RCU builds can be stalled by
+tasks as well as by CPUs, and that the tasks will be indicated by PID,
+for example, "P3421".
+
+It is entirely possible to see stall warnings from normal and from
+expedited grace periods at about the same time during the same run.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
deleted file mode 100644 (file)
index a360a87..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-Using RCU's CPU Stall Detector
-
-This document first discusses what sorts of issues RCU's CPU stall
-detector can locate, and then discusses kernel parameters and Kconfig
-options that can be used to fine-tune the detector's operation.  Finally,
-this document explains the stall detector's "splat" format.
-
-
-What Causes RCU CPU Stall Warnings?
-
-So your kernel printed an RCU CPU stall warning.  The next question is
-"What caused it?"  The following problems can result in RCU CPU stall
-warnings:
-
-o      A CPU looping in an RCU read-side critical section.
-
-o      A CPU looping with interrupts disabled.
-
-o      A CPU looping with preemption disabled.
-
-o      A CPU looping with bottom halves disabled.
-
-o      For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
-       without invoking schedule().  If the looping in the kernel is
-       really expected and desirable behavior, you might need to add
-       some calls to cond_resched().
-
-o      Booting Linux using a console connection that is too slow to
-       keep up with the boot-time console-message rate.  For example,
-       a 115Kbaud serial console can be -way- too slow to keep up
-       with boot-time message rates, and will frequently result in
-       RCU CPU stall warning messages.  Especially if you have added
-       debug printk()s.
-
-o      Anything that prevents RCU's grace-period kthreads from running.
-       This can result in the "All QSes seen" console-log message.
-       This message will include information on when the kthread last
-       ran and how often it should be expected to run.  It can also
-       result in the "rcu_.*kthread starved for" console-log message,
-       which will include additional debugging information.
-
-o      A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
-       happen to preempt a low-priority task in the middle of an RCU
-       read-side critical section.   This is especially damaging if
-       that low-priority task is not permitted to run on any other CPU,
-       in which case the next RCU grace period can never complete, which
-       will eventually cause the system to run out of memory and hang.
-       While the system is in the process of running itself out of
-       memory, you might see stall-warning messages.
-
-o      A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
-       is running at a higher priority than the RCU softirq threads.
-       This will prevent RCU callbacks from ever being invoked,
-       and in a CONFIG_PREEMPT_RCU kernel will further prevent
-       RCU grace periods from ever completing.  Either way, the
-       system will eventually run out of memory and hang.  In the
-       CONFIG_PREEMPT_RCU case, you might see stall-warning
-       messages.
-
-       You can use the rcutree.kthread_prio kernel boot parameter to
-       increase the scheduling priority of RCU's kthreads, which can
-       help avoid this problem.  However, please note that doing this
-       can increase your system's context-switch rate and thus degrade
-       performance.
-
-o      A periodic interrupt whose handler takes longer than the time
-       interval between successive pairs of interrupts.  This can
-       prevent RCU's kthreads and softirq handlers from running.
-       Note that certain high-overhead debugging options, for example
-       the function_graph tracer, can result in interrupt handler taking
-       considerably longer than normal, which can in turn result in
-       RCU CPU stall warnings.
-
-o      Testing a workload on a fast system, tuning the stall-warning
-       timeout down to just barely avoid RCU CPU stall warnings, and then
-       running the same workload with the same stall-warning timeout on a
-       slow system.  Note that thermal throttling and on-demand governors
-       can cause a single system to be sometimes fast and sometimes slow!
-
-o      A hardware or software issue shuts off the scheduler-clock
-       interrupt on a CPU that is not in dyntick-idle mode.  This
-       problem really has happened, and seems to be most likely to
-       result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
-
-o      A bug in the RCU implementation.
-
-o      A hardware failure.  This is quite unlikely, but has occurred
-       at least once in real life.  A CPU failed in a running system,
-       becoming unresponsive, but not causing an immediate crash.
-       This resulted in a series of RCU CPU stall warnings, eventually
-       leading the realization that the CPU had failed.
-
-The RCU, RCU-sched, and RCU-tasks implementations have CPU stall warning.
-Note that SRCU does -not- have CPU stall warnings.  Please note that
-RCU only detects CPU stalls when there is a grace period in progress.
-No grace period, no CPU stall warnings.
-
-To diagnose the cause of the stall, inspect the stack traces.
-The offending function will usually be near the top of the stack.
-If you have a series of stall warnings from a single extended stall,
-comparing the stack traces can often help determine where the stall
-is occurring, which will usually be in the function nearest the top of
-that portion of the stack which remains the same from trace to trace.
-If you can reliably trigger the stall, ftrace can be quite helpful.
-
-RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
-and with RCU's event tracing.  For information on RCU's event tracing,
-see include/trace/events/rcu.h.
-
-
-Fine-Tuning the RCU CPU Stall Detector
-
-The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
-CPU stall detector, which detects conditions that unduly delay RCU grace
-periods.  This module parameter enables CPU stall detection by default,
-but may be overridden via boot-time parameter or at runtime via sysfs.
-The stall detector's idea of what constitutes "unduly delayed" is
-controlled by a set of kernel configuration variables and cpp macros:
-
-CONFIG_RCU_CPU_STALL_TIMEOUT
-
-       This kernel configuration parameter defines the period of time
-       that RCU will wait from the beginning of a grace period until it
-       issues an RCU CPU stall warning.  This time period is normally
-       21 seconds.
-
-       This configuration parameter may be changed at runtime via the
-       /sys/module/rcupdate/parameters/rcu_cpu_stall_timeout, however
-       this parameter is checked only at the beginning of a cycle.
-       So if you are 10 seconds into a 40-second stall, setting this
-       sysfs parameter to (say) five will shorten the timeout for the
-       -next- stall, or the following warning for the current stall
-       (assuming the stall lasts long enough).  It will not affect the
-       timing of the next warning for the current stall.
-
-       Stall-warning messages may be enabled and disabled completely via
-       /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
-
-RCU_STALL_DELAY_DELTA
-
-       Although the lockdep facility is extremely useful, it does add
-       some overhead.  Therefore, under CONFIG_PROVE_RCU, the
-       RCU_STALL_DELAY_DELTA macro allows five extra seconds before
-       giving an RCU CPU stall warning message.  (This is a cpp
-       macro, not a kernel configuration parameter.)
-
-RCU_STALL_RAT_DELAY
-
-       The CPU stall detector tries to make the offending CPU print its
-       own warnings, as this often gives better-quality stack traces.
-       However, if the offending CPU does not detect its own stall in
-       the number of jiffies specified by RCU_STALL_RAT_DELAY, then
-       some other CPU will complain.  This delay is normally set to
-       two jiffies.  (This is a cpp macro, not a kernel configuration
-       parameter.)
-
-rcupdate.rcu_task_stall_timeout
-
-       This boot/sysfs parameter controls the RCU-tasks stall warning
-       interval.  A value of zero or less suppresses RCU-tasks stall
-       warnings.  A positive value sets the stall-warning interval
-       in seconds.  An RCU-tasks stall warning starts with the line:
-
-               INFO: rcu_tasks detected stalls on tasks:
-
-       And continues with the output of sched_show_task() for each
-       task stalling the current RCU-tasks grace period.
-
-
-Interpreting RCU's CPU Stall-Detector "Splats"
-
-For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
-it will print a message similar to the following:
-
-       INFO: rcu_sched detected stalls on CPUs/tasks:
-       2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
-       16-...: (0 ticks this GP) idle=81c/0/0 softirq=764/764 fqs=0
-       (detected by 32, t=2603 jiffies, g=7075, q=625)
-
-This message indicates that CPU 32 detected that CPUs 2 and 16 were both
-causing stalls, and that the stall was affecting RCU-sched.  This message
-will normally be followed by stack dumps for each CPU.  Please note that
-PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, and that
-the tasks will be indicated by PID, for example, "P3421".  It is even
-possible for an rcu_state stall to be caused by both CPUs -and- tasks,
-in which case the offending CPUs and tasks will all be called out in the list.
-
-CPU 2's "(3 GPs behind)" indicates that this CPU has not interacted with
-the RCU core for the past three grace periods.  In contrast, CPU 16's "(0
-ticks this GP)" indicates that this CPU has not taken any scheduling-clock
-interrupts during the current stalled grace period.
-
-The "idle=" portion of the message prints the dyntick-idle state.
-The hex number before the first "/" is the low-order 12 bits of the
-dynticks counter, which will have an even-numbered value if the CPU
-is in dyntick-idle mode and an odd-numbered value otherwise.  The hex
-number between the two "/"s is the value of the nesting, which will be
-a small non-negative number if in the idle loop (as shown above) and a
-very large positive number otherwise.
-
-The "softirq=" portion of the message tracks the number of RCU softirq
-handlers that the stalled CPU has executed.  The number before the "/"
-is the number that had executed since boot at the time that this CPU
-last noted the beginning of a grace period, which might be the current
-(stalled) grace period, or it might be some earlier grace period (for
-example, if the CPU might have been in dyntick-idle mode for an extended
-time period.  The number after the "/" is the number that have executed
-since boot until the current time.  If this latter number stays constant
-across repeated stall-warning messages, it is possible that RCU's softirq
-handlers are no longer able to execute on this CPU.  This can happen if
-the stalled CPU is spinning with interrupts are disabled, or, in -rt
-kernels, if a high-priority process is starving RCU's softirq handler.
-
-The "fqs=" shows the number of force-quiescent-state idle/offline
-detection passes that the grace-period kthread has made across this
-CPU since the last time that this CPU noted the beginning of a grace
-period.
-
-The "detected by" line indicates which CPU detected the stall (in this
-case, CPU 32), how many jiffies have elapsed since the start of the grace
-period (in this case 2603), the grace-period sequence number (7075), and
-an estimate of the total number of RCU callbacks queued across all CPUs
-(625 in this case).
-
-In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
-for each CPU:
-
-       0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1
-
-The "last_accelerate:" prints the low-order 16 bits (in hex) of the
-jiffies counter when this CPU last invoked rcu_try_advance_all_cbs()
-from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from
-rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle
-processing is enabled.
-
-If the grace period ends just as the stall warning starts printing,
-there will be a spurious stall-warning message, which will include
-the following:
-
-       INFO: Stall ended before state dump start
-
-This is rare, but does happen from time to time in real life.  It is also
-possible for a zero-jiffy stall to be flagged in this case, depending
-on how the stall warning and the grace-period initialization happen to
-interact.  Please note that it is not possible to entirely eliminate this
-sort of false positive without resorting to things like stop_machine(),
-which is overkill for this sort of problem.
-
-If all CPUs and tasks have passed through quiescent states, but the
-grace period has nevertheless failed to end, the stall-warning splat
-will include something like the following:
-
-       All QSes seen, last rcu_preempt kthread activity 23807 (4297905177-4297881370), jiffies_till_next_fqs=3, root ->qsmask 0x0
-
-The "23807" indicates that it has been more than 23 thousand jiffies
-since the grace-period kthread ran.  The "jiffies_till_next_fqs"
-indicates how frequently that kthread should run, giving the number
-of jiffies between force-quiescent-state scans, in this case three,
-which is way less than 23807.  Finally, the root rcu_node structure's
-->qsmask field is printed, which will normally be zero.
-
-If the relevant grace-period kthread has been unable to run prior to
-the stall warning, as was the case in the "All QSes seen" line above,
-the following additional line is printed:
-
-       kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5
-
-Starving the grace-period kthreads of CPU time can of course result
-in RCU CPU stall warnings even when all CPUs and tasks have passed
-through the required quiescent states.  The "g" number shows the current
-grace-period sequence number, the "f" precedes the ->gp_flags command
-to the grace-period kthread, the "RCU_GP_WAIT_FQS" indicates that the
-kthread is waiting for a short timeout, the "state" precedes value of the
-task_struct ->state field, and the "cpu" indicates that the grace-period
-kthread last ran on CPU 5.
-
-
-Multiple Warnings From One Stall
-
-If a stall lasts long enough, multiple stall-warning messages will be
-printed for it.  The second and subsequent messages are printed at
-longer intervals, so that the time between (say) the first and second
-message will be about three times the interval between the beginning
-of the stall and the first message.
-
-
-Stall Warnings for Expedited Grace Periods
-
-If an expedited grace period detects a stall, it will place a message
-like the following in dmesg:
-
-       INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 7-... } 21119 jiffies s: 73 root: 0x2/.
-
-This indicates that CPU 7 has failed to respond to a reschedule IPI.
-The three periods (".") following the CPU number indicate that the CPU
-is online (otherwise the first period would instead have been "O"),
-that the CPU was online at the beginning of the expedited grace period
-(otherwise the second period would have instead been "o"), and that
-the CPU has been online at least once since boot (otherwise, the third
-period would instead have been "N").  The number before the "jiffies"
-indicates that the expedited grace period has been going on for 21,119
-jiffies.  The number following the "s:" indicates that the expedited
-grace-period sequence counter is 73.  The fact that this last value is
-odd indicates that an expedited grace period is in flight.  The number
-following "root:" is a bitmask that indicates which children of the root
-rcu_node structure correspond to CPUs and/or tasks that are blocking the
-current expedited grace period.  If the tree had more than one level,
-additional hex numbers would be printed for the states of the other
-rcu_node structures in the tree.
-
-As with normal grace periods, PREEMPT_RCU builds can be stalled by
-tasks as well as by CPUs, and that the tasks will be indicated by PID,
-for example, "P3421".
-
-It is entirely possible to see stall warnings from normal and from
-expedited grace periods at about the same time during the same run.
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
new file mode 100644 (file)
index 0000000..a901477
--- /dev/null
@@ -0,0 +1,293 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+RCU Torture Test Operation
+==========================
+
+
+CONFIG_RCU_TORTURE_TEST
+=======================
+
+The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
+implementations.  It creates an rcutorture kernel module that can
+be loaded to run a torture test.  The test periodically outputs
+status messages via printk(), which can be examined via the dmesg
+command (perhaps grepping for "torture").  The test is started
+when the module is loaded, and stops when the module is unloaded.
+
+Module parameters are prefixed by "rcutorture." in
+Documentation/admin-guide/kernel-parameters.txt.
+
+Output
+======
+
+The statistics output is as follows::
+
+       rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+       rcu-torture: rtc:           (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
+       rcu-torture: Reader Pipe:  727860534 34213 0 0 0 0 0 0 0 0 0
+       rcu-torture: Reader Batch:  727877838 17003 0 0 0 0 0 0 0 0 0
+       rcu-torture: Free-Block Circulation:  155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
+       rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+
+The command "dmesg | grep torture:" will extract this information on
+most systems.  On more esoteric configurations, it may be necessary to
+use other commands to access the output of the printk()s used by
+the RCU torture test.  The printk()s use KERN_ALERT, so they should
+be evident.  ;-)
+
+The first and last lines show the rcutorture module parameters, and the
+last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
+automatic determination as to whether RCU operated correctly.
+
+The entries are as follows:
+
+*      "rtc": The hexadecimal address of the structure currently visible
+       to readers.
+
+*      "ver": The number of times since boot that the RCU writer task
+       has changed the structure visible to readers.
+
+*      "tfle": If non-zero, indicates that the "torture freelist"
+       containing structures to be placed into the "rtc" area is empty.
+       This condition is important, since it can fool you into thinking
+       that RCU is working when it is not.  :-/
+
+*      "rta": Number of structures allocated from the torture freelist.
+
+*      "rtaf": Number of allocations from the torture freelist that have
+       failed due to the list being empty.  It is not unusual for this
+       to be non-zero, but it is bad for it to be a large fraction of
+       the value indicated by "rta".
+
+*      "rtf": Number of frees into the torture freelist.
+
+*      "rtmbe": A non-zero value indicates that rcutorture believes that
+       rcu_assign_pointer() and rcu_dereference() are not working
+       correctly.  This value should be zero.
+
+*      "rtbe": A non-zero value indicates that one of the rcu_barrier()
+       family of functions is not working correctly.
+
+*      "rtbke": rcutorture was unable to create the real-time kthreads
+       used to force RCU priority inversion.  This value should be zero.
+
+*      "rtbre": Although rcutorture successfully created the kthreads
+       used to force RCU priority inversion, it was unable to set them
+       to the real-time priority level of 1.  This value should be zero.
+
+*      "rtbf": The number of times that RCU priority boosting failed
+       to resolve RCU priority inversion.
+
+*      "rtb": The number of times that rcutorture attempted to force
+       an RCU priority inversion condition.  If you are testing RCU
+       priority boosting via the "test_boost" module parameter, this
+       value should be non-zero.
+
+*      "nt": The number of times rcutorture ran RCU read-side code from
+       within a timer handler.  This value should be non-zero only
+       if you specified the "irqreader" module parameter.
+
+*      "Reader Pipe": Histogram of "ages" of structures seen by readers.
+       If any entries past the first two are non-zero, RCU is broken.
+       And rcutorture prints the error flag string "!!!" to make sure
+       you notice.  The age of a newly allocated structure is zero,
+       it becomes one when removed from reader visibility, and is
+       incremented once per grace period subsequently -- and is freed
+       after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods.
+
+       The output displayed above was taken from a correctly working
+       RCU.  If you want to see what it looks like when broken, break
+       it yourself.  ;-)
+
+*      "Reader Batch": Another histogram of "ages" of structures seen
+       by readers, but in terms of counter flips (or batches) rather
+       than in terms of grace periods.  The legal number of non-zero
+       entries is again two.  The reason for this separate view is that
+       it is sometimes easier to get the third entry to show up in the
+       "Reader Batch" list than in the "Reader Pipe" list.
+
+*      "Free-Block Circulation": Shows the number of torture structures
+       that have reached a given point in the pipeline.  The first element
+       should closely correspond to the number of structures allocated,
+       the second to the number that have been removed from reader view,
+       and all but the last remaining to the corresponding number of
+       passes through a grace period.  The last entry should be zero,
+       as it is only incremented if a torture structure's counter
+       somehow gets incremented farther than it should.
+
+Different implementations of RCU can provide implementation-specific
+additional information.  For example, Tree SRCU provides the following
+additional line::
+
+       srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)
+
+This line shows the per-CPU counter state, in this case for Tree SRCU
+using a dynamically allocated srcu_struct (hence "srcud-" rather than
+"srcu-").  The numbers in parentheses are the values of the "old" and
+"current" counters for the corresponding CPU.  The "idx" value maps the
+"old" and "current" values to the underlying array, and is useful for
+debugging.  The final "T" entry contains the totals of the counters.
+
+Usage on Specific Kernel Builds
+===============================
+
+It is sometimes desirable to torture RCU on a specific kernel build,
+for example, when preparing to put that kernel build into production.
+In that case, the kernel should be built with CONFIG_RCU_TORTURE_TEST=m
+so that the test can be started using modprobe and terminated using rmmod.
+
+For example, the following script may be used to torture RCU::
+
+       #!/bin/sh
+
+       modprobe rcutorture
+       sleep 3600
+       rmmod rcutorture
+       dmesg | grep torture:
+
+The output can be manually inspected for the error flag of "!!!".
+One could of course create a more elaborate script that automatically
+checked for such errors.  The "rmmod" command forces a "SUCCESS",
+"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed.  The first
+two are self-explanatory, while the last indicates that while there
+were no RCU failures, CPU-hotplug problems were detected.
+
+
+Usage on Mainline Kernels
+=========================
+
+When using rcutorture to test changes to RCU itself, it is often
+necessary to build a number of kernels in order to test that change
+across a broad range of combinations of the relevant Kconfig options
+and of the relevant kernel boot parameters.  In this situation, use
+of modprobe and rmmod can be quite time-consuming and error-prone.
+
+Therefore, the tools/testing/selftests/rcutorture/bin/kvm.sh
+script is available for mainline testing for x86, arm64, and
+powerpc.  By default, it will run the series of tests specified by
+tools/testing/selftests/rcutorture/configs/rcu/CFLIST, with each test
+running for 30 minutes within a guest OS using a minimal userspace
+supplied by an automatically generated initrd.  After the tests are
+complete, the resulting build products and console output are analyzed
+for errors and the results of the runs are summarized.
+
+On larger systems, rcutorture testing can be accelerated by passing the
+--cpus argument to kvm.sh.  For example, on a 64-CPU system, "--cpus 43"
+would use up to 43 CPUs to run tests concurrently, which as of v5.4 would
+complete all the scenarios in two batches, reducing the time to complete
+from about eight hours to about one hour (not counting the time to build
+the sixteen kernels).  The "--dryrun sched" argument will not run tests,
+but rather tell you how the tests would be scheduled into batches.  This
+can be useful when working out how many CPUs to specify in the --cpus
+argument.
+
+Not all changes require that all scenarios be run.  For example, a change
+to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the
+--configs argument to kvm.sh as follows:  "--configs 'SRCU-N SRCU-P'".
+Large systems can run multiple copies of of the full set of scenarios,
+for example, a system with 448 hardware threads can run five instances
+of the full set concurrently.  To make this happen::
+
+       kvm.sh --cpus 448 --configs '5*CFLIST'
+
+Alternatively, such a system can run 56 concurrent instances of a single
+eight-CPU scenario::
+
+       kvm.sh --cpus 448 --configs '56*TREE04'
+
+Or 28 concurrent instances of each of two eight-CPU scenarios::
+
+       kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04'
+
+Of course, each concurrent instance will use memory, which can be
+limited using the --memory argument, which defaults to 512M.  Small
+values for memory may require disabling the callback-flooding tests
+using the --bootargs parameter discussed below.
+
+Sometimes additional debugging is useful, and in such cases the --kconfig
+parameter to kvm.sh may be used, for example, ``--kconfig 'CONFIG_KASAN=y'``.
+
+Kernel boot arguments can also be supplied, for example, to control
+rcutorture's module parameters.  For example, to test a change to RCU's
+CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
+This will of course result in the scripting reporting a failure, namely
+the resuling RCU CPU stall warning.  As noted above, reducing memory may
+require disabling rcutorture's callback-flooding tests::
+
+       kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
+               --bootargs 'rcutorture.fwd_progress=0'
+
+Sometimes all that is needed is a full set of kernel builds.  This is
+what the --buildonly argument does.
+
+Finally, the --trust-make argument allows each kernel build to reuse what
+it can from the previous kernel build.
+
+There are additional more arcane arguments that are documented in the
+source code of the kvm.sh script.
+
+If a run contains failures, the number of buildtime and runtime failures
+is listed at the end of the kvm.sh output, which you really should redirect
+to a file.  The build products and console output of each run is kept in
+tools/testing/selftests/rcutorture/res in timestamped directories.  A
+given directory can be supplied to kvm-find-errors.sh in order to have
+it cycle you through summaries of errors and full error logs.  For example::
+
+       tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \
+               tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23
+
+However, it is often more convenient to access the files directly.
+Files pertaining to all scenarios in a run reside in the top-level
+directory (2020.01.20-15.54.23 in the example above), while per-scenario
+files reside in a subdirectory named after the scenario (for example,
+"TREE04").  If a given scenario ran more than once (as in "--configs
+'56*TREE04'" above), the directories corresponding to the second and
+subsequent runs of that scenario include a sequence number, for example,
+"TREE04.2", "TREE04.3", and so on.
+
+The most frequently used file in the top-level directory is testid.txt.
+If the test ran in a git repository, then this file contains the commit
+that was tested and any uncommitted changes in diff format.
+
+The most frequently used files in each per-scenario-run directory are:
+
+.config:
+       This file contains the Kconfig options.
+
+Make.out:
+       This contains build output for a specific scenario.
+
+console.log:
+       This contains the console output for a specific scenario.
+       This file may be examined once the kernel has booted, but
+       it might not exist if the build failed.
+
+vmlinux:
+       This contains the kernel, which can be useful with tools like
+       objdump and gdb.
+
+A number of additional files are available, but are less frequently used.
+Many are intended for debugging of rcutorture itself or of its scripting.
+
+As of v5.4, a successful run with the default set of scenarios produces
+the following summary at the end of the run on a 12-CPU system::
+
+    SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
+    SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
+    SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
+    SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
+    TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
+    TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
+    TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
+    TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
+    TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
+    TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
+    TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
+    TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
+    CPU count limited from 16 to 12
+    TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
+    TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
+    TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
+    CPU count limited from 16 to 12
+    TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
deleted file mode 100644 (file)
index af712a3..0000000
+++ /dev/null
@@ -1,282 +0,0 @@
-RCU Torture Test Operation
-
-
-CONFIG_RCU_TORTURE_TEST
-
-The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
-implementations.  It creates an rcutorture kernel module that can
-be loaded to run a torture test.  The test periodically outputs
-status messages via printk(), which can be examined via the dmesg
-command (perhaps grepping for "torture").  The test is started
-when the module is loaded, and stops when the module is unloaded.
-
-Module parameters are prefixed by "rcutorture." in
-Documentation/admin-guide/kernel-parameters.txt.
-
-OUTPUT
-
-The statistics output is as follows:
-
-       rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
-       rcu-torture: rtc:           (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
-       rcu-torture: Reader Pipe:  727860534 34213 0 0 0 0 0 0 0 0 0
-       rcu-torture: Reader Batch:  727877838 17003 0 0 0 0 0 0 0 0 0
-       rcu-torture: Free-Block Circulation:  155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
-       rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
-
-The command "dmesg | grep torture:" will extract this information on
-most systems.  On more esoteric configurations, it may be necessary to
-use other commands to access the output of the printk()s used by
-the RCU torture test.  The printk()s use KERN_ALERT, so they should
-be evident.  ;-)
-
-The first and last lines show the rcutorture module parameters, and the
-last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
-automatic determination as to whether RCU operated correctly.
-
-The entries are as follows:
-
-o      "rtc": The hexadecimal address of the structure currently visible
-       to readers.
-
-o      "ver": The number of times since boot that the RCU writer task
-       has changed the structure visible to readers.
-
-o      "tfle": If non-zero, indicates that the "torture freelist"
-       containing structures to be placed into the "rtc" area is empty.
-       This condition is important, since it can fool you into thinking
-       that RCU is working when it is not.  :-/
-
-o      "rta": Number of structures allocated from the torture freelist.
-
-o      "rtaf": Number of allocations from the torture freelist that have
-       failed due to the list being empty.  It is not unusual for this
-       to be non-zero, but it is bad for it to be a large fraction of
-       the value indicated by "rta".
-
-o      "rtf": Number of frees into the torture freelist.
-
-o      "rtmbe": A non-zero value indicates that rcutorture believes that
-       rcu_assign_pointer() and rcu_dereference() are not working
-       correctly.  This value should be zero.
-
-o      "rtbe": A non-zero value indicates that one of the rcu_barrier()
-       family of functions is not working correctly.
-
-o      "rtbke": rcutorture was unable to create the real-time kthreads
-       used to force RCU priority inversion.  This value should be zero.
-
-o      "rtbre": Although rcutorture successfully created the kthreads
-       used to force RCU priority inversion, it was unable to set them
-       to the real-time priority level of 1.  This value should be zero.
-
-o      "rtbf": The number of times that RCU priority boosting failed
-       to resolve RCU priority inversion.
-
-o      "rtb": The number of times that rcutorture attempted to force
-       an RCU priority inversion condition.  If you are testing RCU
-       priority boosting via the "test_boost" module parameter, this
-       value should be non-zero.
-
-o      "nt": The number of times rcutorture ran RCU read-side code from
-       within a timer handler.  This value should be non-zero only
-       if you specified the "irqreader" module parameter.
-
-o      "Reader Pipe": Histogram of "ages" of structures seen by readers.
-       If any entries past the first two are non-zero, RCU is broken.
-       And rcutorture prints the error flag string "!!!" to make sure
-       you notice.  The age of a newly allocated structure is zero,
-       it becomes one when removed from reader visibility, and is
-       incremented once per grace period subsequently -- and is freed
-       after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods.
-
-       The output displayed above was taken from a correctly working
-       RCU.  If you want to see what it looks like when broken, break
-       it yourself.  ;-)
-
-o      "Reader Batch": Another histogram of "ages" of structures seen
-       by readers, but in terms of counter flips (or batches) rather
-       than in terms of grace periods.  The legal number of non-zero
-       entries is again two.  The reason for this separate view is that
-       it is sometimes easier to get the third entry to show up in the
-       "Reader Batch" list than in the "Reader Pipe" list.
-
-o      "Free-Block Circulation": Shows the number of torture structures
-       that have reached a given point in the pipeline.  The first element
-       should closely correspond to the number of structures allocated,
-       the second to the number that have been removed from reader view,
-       and all but the last remaining to the corresponding number of
-       passes through a grace period.  The last entry should be zero,
-       as it is only incremented if a torture structure's counter
-       somehow gets incremented farther than it should.
-
-Different implementations of RCU can provide implementation-specific
-additional information.  For example, Tree SRCU provides the following
-additional line:
-
-       srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)
-
-This line shows the per-CPU counter state, in this case for Tree SRCU
-using a dynamically allocated srcu_struct (hence "srcud-" rather than
-"srcu-").  The numbers in parentheses are the values of the "old" and
-"current" counters for the corresponding CPU.  The "idx" value maps the
-"old" and "current" values to the underlying array, and is useful for
-debugging.  The final "T" entry contains the totals of the counters.
-
-
-USAGE ON SPECIFIC KERNEL BUILDS
-
-It is sometimes desirable to torture RCU on a specific kernel build,
-for example, when preparing to put that kernel build into production.
-In that case, the kernel should be built with CONFIG_RCU_TORTURE_TEST=m
-so that the test can be started using modprobe and terminated using rmmod.
-
-For example, the following script may be used to torture RCU:
-
-       #!/bin/sh
-
-       modprobe rcutorture
-       sleep 3600
-       rmmod rcutorture
-       dmesg | grep torture:
-
-The output can be manually inspected for the error flag of "!!!".
-One could of course create a more elaborate script that automatically
-checked for such errors.  The "rmmod" command forces a "SUCCESS",
-"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed.  The first
-two are self-explanatory, while the last indicates that while there
-were no RCU failures, CPU-hotplug problems were detected.
-
-
-USAGE ON MAINLINE KERNELS
-
-When using rcutorture to test changes to RCU itself, it is often
-necessary to build a number of kernels in order to test that change
-across a broad range of combinations of the relevant Kconfig options
-and of the relevant kernel boot parameters.  In this situation, use
-of modprobe and rmmod can be quite time-consuming and error-prone.
-
-Therefore, the tools/testing/selftests/rcutorture/bin/kvm.sh
-script is available for mainline testing for x86, arm64, and
-powerpc.  By default, it will run the series of tests specified by
-tools/testing/selftests/rcutorture/configs/rcu/CFLIST, with each test
-running for 30 minutes within a guest OS using a minimal userspace
-supplied by an automatically generated initrd.  After the tests are
-complete, the resulting build products and console output are analyzed
-for errors and the results of the runs are summarized.
-
-On larger systems, rcutorture testing can be accelerated by passing the
---cpus argument to kvm.sh.  For example, on a 64-CPU system, "--cpus 43"
-would use up to 43 CPUs to run tests concurrently, which as of v5.4 would
-complete all the scenarios in two batches, reducing the time to complete
-from about eight hours to about one hour (not counting the time to build
-the sixteen kernels).  The "--dryrun sched" argument will not run tests,
-but rather tell you how the tests would be scheduled into batches.  This
-can be useful when working out how many CPUs to specify in the --cpus
-argument.
-
-Not all changes require that all scenarios be run.  For example, a change
-to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the
---configs argument to kvm.sh as follows:  "--configs 'SRCU-N SRCU-P'".
-Large systems can run multiple copies of of the full set of scenarios,
-for example, a system with 448 hardware threads can run five instances
-of the full set concurrently.  To make this happen:
-
-       kvm.sh --cpus 448 --configs '5*CFLIST'
-
-Alternatively, such a system can run 56 concurrent instances of a single
-eight-CPU scenario:
-
-       kvm.sh --cpus 448 --configs '56*TREE04'
-
-Or 28 concurrent instances of each of two eight-CPU scenarios:
-
-       kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04'
-
-Of course, each concurrent instance will use memory, which can be
-limited using the --memory argument, which defaults to 512M.  Small
-values for memory may require disabling the callback-flooding tests
-using the --bootargs parameter discussed below.
-
-Sometimes additional debugging is useful, and in such cases the --kconfig
-parameter to kvm.sh may be used, for example, "--kconfig 'CONFIG_KASAN=y'".
-
-Kernel boot arguments can also be supplied, for example, to control
-rcutorture's module parameters.  For example, to test a change to RCU's
-CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
-This will of course result in the scripting reporting a failure, namely
-the resuling RCU CPU stall warning.  As noted above, reducing memory may
-require disabling rcutorture's callback-flooding tests:
-
-       kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
-               --bootargs 'rcutorture.fwd_progress=0'
-
-Sometimes all that is needed is a full set of kernel builds.  This is
-what the --buildonly argument does.
-
-Finally, the --trust-make argument allows each kernel build to reuse what
-it can from the previous kernel build.
-
-There are additional more arcane arguments that are documented in the
-source code of the kvm.sh script.
-
-If a run contains failures, the number of buildtime and runtime failures
-is listed at the end of the kvm.sh output, which you really should redirect
-to a file.  The build products and console output of each run is kept in
-tools/testing/selftests/rcutorture/res in timestamped directories.  A
-given directory can be supplied to kvm-find-errors.sh in order to have
-it cycle you through summaries of errors and full error logs.  For example:
-
-       tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \
-               tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23
-
-However, it is often more convenient to access the files directly.
-Files pertaining to all scenarios in a run reside in the top-level
-directory (2020.01.20-15.54.23 in the example above), while per-scenario
-files reside in a subdirectory named after the scenario (for example,
-"TREE04").  If a given scenario ran more than once (as in "--configs
-'56*TREE04'" above), the directories corresponding to the second and
-subsequent runs of that scenario include a sequence number, for example,
-"TREE04.2", "TREE04.3", and so on.
-
-The most frequently used file in the top-level directory is testid.txt.
-If the test ran in a git repository, then this file contains the commit
-that was tested and any uncommitted changes in diff format.
-
-The most frequently used files in each per-scenario-run directory are:
-
-.config: This file contains the Kconfig options.
-
-Make.out: This contains build output for a specific scenario.
-
-console.log: This contains the console output for a specific scenario.
-       This file may be examined once the kernel has booted, but
-       it might not exist if the build failed.
-
-vmlinux: This contains the kernel, which can be useful with tools like
-       objdump and gdb.
-
-A number of additional files are available, but are less frequently used.
-Many are intended for debugging of rcutorture itself or of its scripting.
-
-As of v5.4, a successful run with the default set of scenarios produces
-the following summary at the end of the run on a 12-CPU system:
-
-SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
-SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
-SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
-SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
-TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
-TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
-TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
-TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
-TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
-TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
-TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
-TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
-CPU count limited from 16 to 12
-TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
-TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
-TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
-CPU count limited from 16 to 12
-TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
index d09471a..a789755 100644 (file)
@@ -1483,8 +1483,7 @@ IO Interface Files
 ~~~~~~~~~~~~~~~~~~
 
   io.stat
-       A read-only nested-keyed file which exists on non-root
-       cgroups.
+       A read-only nested-keyed file.
 
        Lines are keyed by $MAJ:$MIN device numbers and not ordered.
        The following nested keys are defined.
index 9443fce..2162d79 100644 (file)
@@ -395,6 +395,13 @@ When mounting an ext4 filesystem, the following option are accepted:
         Documentation/filesystems/dax.txt.  Note that this option is
         incompatible with data=journal.
 
+  inlinecrypt
+        When possible, encrypt/decrypt the contents of encrypted files using the
+        blk-crypto framework rather than filesystem-layer encryption. This
+        allows the use of inline encryption hardware. The on-disk format is
+        unaffected. For more details, see
+        Documentation/block/inline-encryption.rst.
+
 Data Mode
 =========
 There are 3 different data modes:
index e4ee8b2..2baad0b 100644 (file)
@@ -93,6 +93,11 @@ It exists in the sparse memory mapping model, and it is also somewhat
 similar to the mem_map variable, both of them are used to translate an
 address.
 
+MAX_PHYSMEM_BITS
+----------------
+
+Defines the maximum supported physical address space memory.
+
 page
 ----
 
@@ -399,6 +404,17 @@ KERNELPACMASK
 The mask to extract the Pointer Authentication Code from a kernel virtual
 address.
 
+TCR_EL1.T1SZ
+------------
+
+Indicates the size offset of the memory region addressed by TTBR1_EL1.
+The region size is 2^(64-T1SZ) bytes.
+
+TTBR1_EL1 is the table base address register specified by ARMv8-A
+architecture which is used to lookup the page-tables for the Virtual
+addresses in the higher VA range (refer to ARMv8 ARM document for
+more details).
+
 arm
 ===
 
index fb95fad..d35fd3c 100644 (file)
                        latencies, which will choose a value aligned
                        with the appropriate hardware boundaries.
 
+       rcutree.rcu_min_cached_objs= [KNL]
+                       Minimum number of objects which are cached and
+                       maintained per one CPU. Object size is equal
+                       to PAGE_SIZE. The cache allows to reduce the
+                       pressure to page allocator, also it makes the
+                       whole algorithm to behave better in low memory
+                       condition.
+
        rcutree.jiffies_till_first_fqs= [KNL]
                        Set delay from grace-period initialization to
                        first attempt to force quiescent states.
                        Set time (jiffies) between CPU-hotplug operations,
                        or zero to disable CPU-hotplug testing.
 
+       rcutorture.read_exit= [KNL]
+                       Set the number of read-then-exit kthreads used
+                       to test the interaction of RCU updaters and
+                       task-exit processing.
+
+       rcutorture.read_exit_burst= [KNL]
+                       The number of times in a given read-then-exit
+                       episode that a set of read-then-exit kthreads
+                       is spawned.
+
+       rcutorture.read_exit_delay= [KNL]
+                       The delay, in seconds, between successive
+                       read-then-exit testing episodes.
+
        rcutorture.shuffle_interval= [KNL]
                        Set task-shuffle interval (s).  Shuffling tasks
                        allows some CPUs to go into dyntick-idle mode
                              reboot_cpu is s[mp]#### with #### being the processor
                                        to be used for rebooting.
 
+       refscale.holdoff= [KNL]
+                       Set test-start holdoff period.  The purpose of
+                       this parameter is to delay the start of the
+                       test until boot completes in order to avoid
+                       interference.
+
+       refscale.loops= [KNL]
+                       Set the number of loops over the synchronization
+                       primitive under test.  Increasing this number
+                       reduces noise due to loop start/end overhead,
+                       but the default has already reduced the per-pass
+                       noise to a handful of picoseconds on ca. 2020
+                       x86 laptops.
+
+       refscale.nreaders= [KNL]
+                       Set number of readers.  The default value of -1
+                       selects N, where N is roughly 75% of the number
+                       of CPUs.  A value of zero is an interesting choice.
+
+       refscale.nruns= [KNL]
+                       Set number of runs, each of which is dumped onto
+                       the console log.
+
+       refscale.readdelay= [KNL]
+                       Set the read-side critical-section duration,
+                       measured in microseconds.
+
+       refscale.scale_type= [KNL]
+                       Specify the read-protection implementation to test.
+
+       refscale.shutdown= [KNL]
+                       Shut down the system at the end of the performance
+                       test.  This defaults to 1 (shut it down) when
+                       rcuperf is built into the kernel and to 0 (leave
+                       it running) when rcuperf is built as a module.
+
+       refscale.verbose= [KNL]
+                       Enable additional printk() statements.
+
        relax_domain_level=
                        [KNL, SMP] Set scheduler's default relax_domain_level.
                        See Documentation/admin-guide/cgroup-v1/cpusets.rst.
                        Prevent the CPU-hotplug component of torturing
                        until after init has spawned.
 
+       torture.ftrace_dump_at_shutdown= [KNL]
+                       Dump the ftrace buffer at torture-test shutdown,
+                       even if there were no errors.  This can be a
+                       very costly operation when many torture tests
+                       are running concurrently, especially on systems
+                       with rotating-rust storage.
+
        tp720=          [HW,PS2]
 
        tpm_suspend_pcr=[HW,TPM]
index 83acf50..55bf6b4 100644 (file)
@@ -1062,6 +1062,60 @@ Enables/disables scheduler statistics. Enabling this feature
 incurs a small amount of overhead in the scheduler but is
 useful for debugging and performance tuning.
 
+sched_util_clamp_min:
+=====================
+
+Max allowed *minimum* utilization.
+
+Default value is 1024, which is the maximum possible value.
+
+It means that any requested uclamp.min value cannot be greater than
+sched_util_clamp_min, i.e., it is restricted to the range
+[0:sched_util_clamp_min].
+
+sched_util_clamp_max:
+=====================
+
+Max allowed *maximum* utilization.
+
+Default value is 1024, which is the maximum possible value.
+
+It means that any requested uclamp.max value cannot be greater than
+sched_util_clamp_max, i.e., it is restricted to the range
+[0:sched_util_clamp_max].
+
+sched_util_clamp_min_rt_default:
+================================
+
+By default Linux is tuned for performance. Which means that RT tasks always run
+at the highest frequency and most capable (highest capacity) CPU (in
+heterogeneous systems).
+
+Uclamp achieves this by setting the requested uclamp.min of all RT tasks to
+1024 by default, which effectively boosts the tasks to run at the highest
+frequency and biases them to run on the biggest CPU.
+
+This knob allows admins to change the default behavior when uclamp is being
+used. In battery powered devices particularly, running at the maximum
+capacity and frequency will increase energy consumption and shorten the battery
+life.
+
+This knob is only effective for RT tasks which the user hasn't modified their
+requested uclamp.min value via sched_setattr() syscall.
+
+This knob will not escape the range constraint imposed by sched_util_clamp_min
+defined above.
+
+For example if
+
+       sched_util_clamp_min_rt_default = 800
+       sched_util_clamp_min = 600
+
+Then the boost will be clamped to 600 because 800 is outside of the permissible
+range of [0:600]. This could happen for instance if a powersave mode will
+restrict all boosts temporarily by modifying sched_util_clamp_min. As soon as
+this restriction is lifted, the requested sched_util_clamp_min_rt_default
+will take effect.
 
 seccomp
 =======
index 0ab747e..0f1fded 100644 (file)
@@ -85,21 +85,21 @@ smp_store_release() respectively. Therefore, if you find yourself only using
 the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
 and are doing it wrong.
 
-A subtle detail of atomic_set{}() is that it should be observable to the RMW
-ops. That is:
+A note for the implementation of atomic_set{}() is that it must not break the
+atomicity of the RMW ops. That is:
 
-  C atomic-set
+  C Atomic-RMW-ops-are-atomic-WRT-atomic_set
 
   {
-    atomic_set(v, 1);
+    atomic_t v = ATOMIC_INIT(1);
   }
 
-  P1(atomic_t *v)
+  P0(atomic_t *v)
   {
-    atomic_add_unless(v, 1, 0);
+    (void)atomic_add_unless(v, 1, 0);
   }
 
-  P2(atomic_t *v)
+  P1(atomic_t *v)
   {
     atomic_set(v, 0);
   }
@@ -233,19 +233,19 @@ as well. Similarly, something like:
 is an ACQUIRE pattern (though very much not typical), but again the barrier is
 strictly stronger than ACQUIRE. As illustrated:
 
-  C strong-acquire
+  C Atomic-RMW+mb__after_atomic-is-stronger-than-acquire
 
   {
   }
 
-  P1(int *x, atomic_t *y)
+  P0(int *x, atomic_t *y)
   {
     r0 = READ_ONCE(*x);
     smp_rmb();
     r1 = atomic_read(y);
   }
 
-  P2(int *x, atomic_t *y)
+  P1(int *x, atomic_t *y)
   {
     atomic_inc(y);
     smp_mb__after_atomic();
@@ -253,14 +253,14 @@ strictly stronger than ACQUIRE. As illustrated:
   }
 
   exists
-  (r0=1 /\ r1=0)
+  (0:r0=1 /\ 0:r1=0)
 
 This should not happen; but a hypothetical atomic_inc_acquire() --
 (void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
 because it would not order the W part of the RMW against the following
 WRITE_ONCE.  Thus:
 
-  P1                   P2
+  P0                   P1
 
                        t = LL.acq *y (0)
                        t++;
index b964796..afda5e3 100644 (file)
@@ -1036,7 +1036,7 @@ Now the generic block layer performs partition-remapping early and thus
 provides drivers with a sector number relative to whole device, rather than
 having to take partition number into account in order to arrive at the true
 sector number. The routine blk_partition_remap() is invoked by
-generic_make_request even before invoking the queue specific make_request_fn,
+submit_bio_noacct even before invoking the queue specific ->submit_bio,
 so the i/o scheduler also gets to operate on whole disk sector numbers. This
 should typically not require changes to block drivers, it just never gets
 to invoke its own partition sector offset calculations since all bios
index 2c752c5..b208488 100644 (file)
@@ -47,7 +47,7 @@ the Forced Unit Access is implemented.  The REQ_PREFLUSH and REQ_FUA flags
 may both be set on a single bio.
 
 
-Implementation details for make_request_fn based block drivers
+Implementation details for bio based block drivers
 --------------------------------------------------------------
 
 These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
index dde4f7f..2de9058 100644 (file)
@@ -157,7 +157,6 @@ with the kernel as a block device by registering the following general
                cdrom_release,          /∗ release ∗/
                NULL,                   /∗ fsync ∗/
                NULL,                   /∗ fasync ∗/
-               cdrom_media_changed,    /∗ media change ∗/
                NULL                    /∗ revalidate ∗/
        };
 
@@ -366,19 +365,6 @@ which may or may not be in the drive). If the drive is not a changer,
        CDS_DRIVE_NOT_READY     /* something is wrong, tray is moving? */
        CDS_DISC_OK             /* a disc is loaded and everything is fine */
 
-::
-
-       int media_changed(struct cdrom_device_info *cdi, int disc_nr)
-
-This function is very similar to the original function in $struct
-file_operations*. It returns 1 if the medium of the device *cdi->dev*
-has changed since the last call, and 0 otherwise. The parameter
-*disc_nr* identifies a specific slot in a juke-box, it should be
-ignored for single-disc drives. Note that by `re-routing` this
-function through *cdrom_media_changed()*, we can implement separate
-queues for the VFS and a new *ioctl()* function that can report device
-changes to software (e. g., an auto-mounting daemon).
-
 ::
 
        int tray_move(struct cdrom_device_info *cdi, int position)
@@ -917,9 +903,7 @@ commands can be identified by the underscores in their names.
        maximum number of discs in the juke-box found in the *cdrom_dops*.
 `CDROM_MEDIA_CHANGED`
        Returns 1 if a disc has been changed since the last call.
-       Note that calls to *cdrom_media_changed* by the VFS are treated
-       by an independent queue, so both mechanisms will detect a
-       media change once. For juke-boxes, an extra argument *arg*
+       For juke-boxes, an extra argument *arg*
        specifies the slot for which the information is given. The special
        value *CDSL_CURRENT* requests that information about the currently
        selected slot be returned.
index 0830e5b..3517571 100644 (file)
@@ -27,22 +27,11 @@ padata_instance structure for overall control of how jobs are to be run::
 
     #include <linux/padata.h>
 
-    struct padata_instance *padata_alloc_possible(const char *name);
+    struct padata_instance *padata_alloc(const char *name);
 
 'name' simply identifies the instance.
 
-There are functions for enabling and disabling the instance::
-
-    int padata_start(struct padata_instance *pinst);
-    void padata_stop(struct padata_instance *pinst);
-
-These functions are setting or clearing the "PADATA_INIT" flag; if that flag is
-not set, other functions will refuse to work.  padata_start() returns zero on
-success (flag set) or -EINVAL if the padata cpumask contains no active CPU
-(flag not set).  padata_stop() clears the flag and blocks until the padata
-instance is unused.
-
-Finally, complete padata initialization by allocating a padata_shell::
+Then, complete padata initialization by allocating a padata_shell::
 
    struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
 
@@ -155,11 +144,10 @@ submitted.
 Destroying
 ----------
 
-Cleaning up a padata instance predictably involves calling the three free
+Cleaning up a padata instance predictably involves calling the two free
 functions that correspond to the allocation in reverse::
 
     void padata_free_shell(struct padata_shell *ps);
-    void padata_stop(struct padata_instance *pinst);
     void padata_free(struct padata_instance *pinst);
 
 It is the user's responsibility to ensure all outstanding jobs are complete
index 45d943f..40137f9 100644 (file)
@@ -169,7 +169,7 @@ Portions of this API were derived from the following projects:
 
 and;
   
-  Nettle (http://www.lysator.liu.se/~nisse/nettle/)
+  Nettle (https://www.lysator.liu.se/~nisse/nettle/)
     Niels Möller
 
 Original developers of the crypto algorithms:
index ff86bef..52019e9 100644 (file)
@@ -23,7 +23,7 @@ user space, however. This includes the difference between synchronous
 and asynchronous invocations. The user space API call is fully
 synchronous.
 
-[1] http://www.chronox.de/libkcapi.html
+[1] https://www.chronox.de/libkcapi.html
 
 User Space API General Remarks
 ------------------------------
@@ -384,4 +384,4 @@ Please see [1] for libkcapi which provides an easy-to-use wrapper around
 the aforementioned Netlink kernel interface. [1] also contains a test
 application that invokes all libkcapi API calls.
 
-[1] http://www.chronox.de/libkcapi.html
+[1] https://www.chronox.de/libkcapi.html
index b38379f..be7a0b0 100644 (file)
@@ -8,7 +8,8 @@ approach to detect races. KCSAN's primary purpose is to detect `data races`_.
 Usage
 -----
 
-KCSAN requires Clang version 11 or later.
+KCSAN is supported by both GCC and Clang. With GCC we require version 11 or
+later, and with Clang also require version 11 or later.
 
 To enable KCSAN configure the kernel with::
 
diff --git a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
new file mode 100644 (file)
index 0000000..85ef69f
--- /dev/null
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/ti,sa2ul.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: K3 SoC SA2UL crypto module
+
+maintainers:
+  - Tero Kristo <t-kristo@ti.com>
+
+properties:
+  compatible:
+    enum:
+      - ti,j721e-sa2ul
+      - ti,am654-sa2ul
+
+  reg:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  dmas:
+    items:
+      - description: TX DMA Channel
+      - description: RX DMA Channel #1
+      - description: RX DMA Channel #2
+
+  dma-names:
+    items:
+      - const: tx
+      - const: rx1
+      - const: rx2
+
+  dma-coherent: true
+
+  "#address-cells":
+    const: 2
+
+  "#size-cells":
+    const: 2
+
+  ranges:
+    description:
+      Address translation for the possible RNG child node for SA2UL
+
+patternProperties:
+  "^rng@[a-f0-9]+$":
+    type: object
+    description:
+      Child RNG node for SA2UL
+
+required:
+  - compatible
+  - reg
+  - power-domains
+  - dmas
+  - dma-names
+  - dma-coherent
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/soc/ti,sci_pm_domain.h>
+
+    main_crypto: crypto@4e00000 {
+        compatible = "ti,j721-sa2ul";
+        reg = <0x0 0x4e00000 0x0 0x1200>;
+        power-domains = <&k3_pds 264 TI_SCI_PD_EXCLUSIVE>;
+        dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>,
+               <&main_udmap 0x4001>;
+        dma-names = "tx", "rx1", "rx2";
+        dma-coherent;
+    };
index 526593c..4cc1a67 100644 (file)
@@ -47,6 +47,9 @@ properties:
     $ref: /schemas/types.yaml#/definitions/phandle-array
     description: Phandle to the device SRAM
 
+  iommus:
+    maxItems: 1
+
   memory-region:
     description:
       CMA pool to use for buffers allocation instead of the default
index 9134e9b..ebd3291 100644 (file)
@@ -28,6 +28,16 @@ Documentation/devicetree/bindings/iommu/iommu.txt.
 For arm-smmu binding, see:
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml.
 
+The MSI writes are accompanied by sideband data which is derived from the ICID.
+The msi-map property is used to associate the devices with both the ITS
+controller and the sideband data which accompanies the writes.
+
+For generic MSI bindings, see
+Documentation/devicetree/bindings/interrupt-controller/msi.txt.
+
+For GICv3 and GIC ITS bindings, see:
+Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml.
+
 Required properties:
 
     - compatible
@@ -49,11 +59,6 @@ Required properties:
                         region may not be present in some scenarios, such
                         as in the device tree presented to a virtual machine.
 
-    - msi-parent
-        Value type: <phandle>
-        Definition: Must be present and point to the MSI controller node
-                    handling message interrupts for the MC.
-
     - ranges
         Value type: <prop-encoded-array>
         Definition: A standard property.  Defines the mapping between the child
@@ -119,6 +124,28 @@ Optional properties:
   associated with the listed IOMMU, with the iommu-specifier
   (i - icid-base + iommu-base).
 
+- msi-map: Maps an ICID to a GIC ITS and associated msi-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (icid-base,gic-its,msi-base,length).
+
+  Any ICID in the interval [icid-base, icid-base + length) is
+  associated with the listed GIC ITS, with the msi-specifier
+  (i - icid-base + msi-base).
+
+Deprecated properties:
+
+    - msi-parent
+        Value type: <phandle>
+        Definition: Describes the MSI controller node handling message
+                    interrupts for the MC. When there is no translation
+                    between the ICID and deviceID this property can be used
+                    to describe the MSI controller used by the devices on the
+                    mc-bus.
+                    The use of this property for mc-bus is deprecated. Please
+                    use msi-map.
+
 Example:
 
         smmu: iommu@5000000 {
@@ -128,13 +155,24 @@ Example:
                ...
         };
 
+        gic: interrupt-controller@6000000 {
+               compatible = "arm,gic-v3";
+               ...
+        }
+        its: gic-its@6020000 {
+               compatible = "arm,gic-v3-its";
+               msi-controller;
+               ...
+        };
+
         fsl_mc: fsl-mc@80c000000 {
                 compatible = "fsl,qoriq-mc";
                 reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
                       <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
-                msi-parent = <&its>;
                 /* define map for ICIDs 23-64 */
                 iommu-map = <23 &smmu 23 41>;
+                /* define msi map for ICIDs 23-64 */
+                msi-map = <23 &its 23 41>;
                 #address-cells = <3>;
                 #size-cells = <1>;
 
index 405c2b0..659d4ef 100644 (file)
@@ -5,6 +5,9 @@ Required properties:
                "fsl,imx21-rnga"
                "fsl,imx31-rnga" (backward compatible with "fsl,imx21-rnga")
                "fsl,imx25-rngb"
+               "fsl,imx6sl-rngb" (backward compatible with "fsl,imx25-rngb")
+               "fsl,imx6sll-rngb" (backward compatible with "fsl,imx25-rngb")
+               "fsl,imx6ull-rngb" (backward compatible with "fsl,imx25-rngb")
                "fsl,imx35-rngc"
 - reg : offset and length of the register set of this block
 - interrupts : the interrupt number for the RNG block
diff --git a/Documentation/devicetree/bindings/rng/ingenic,rng.yaml b/Documentation/devicetree/bindings/rng/ingenic,rng.yaml
new file mode 100644 (file)
index 0000000..b2e4a6a
--- /dev/null
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rng/ingenic,rng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bindings for RNG in Ingenic SoCs
+
+maintainers:
+  - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+
+description:
+  The Random Number Generator in Ingenic SoCs.
+
+properties:
+  compatible:
+    enum:
+      - ingenic,jz4780-rng
+      - ingenic,x1000-rng
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    rng: rng@d8 {
+        compatible = "ingenic,jz4780-rng";
+        reg = <0xd8 0x8>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml b/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml
new file mode 100644 (file)
index 0000000..48ab82a
--- /dev/null
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rng/silex-insight,ba431-rng.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silex Insight BA431 RNG bindings
+
+description: |
+  The BA431 hardware random number generator is an IP that is FIPS-140-2/3
+  certified.
+
+maintainers:
+  - Olivier Sobrie <olivier.sobrie@silexinsight.com>
+
+properties:
+  compatible:
+    const: silex-insight,ba431-rng
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    rng@42800000 {
+      compatible = "silex-insight,ba431-rng";
+      reg = <0x42800000 0x1000>;
+    };
+
+...
index ef9519c..e361fc9 100644 (file)
@@ -55,6 +55,7 @@
 *.ver
 *.xml
 *.xz
+*.zst
 *_MODULES
 *_vga16.c
 *~
index f51bb21..f850ad0 100644 (file)
@@ -24,7 +24,7 @@ Available fault injection capabilities
 
   injects disk IO errors on devices permitted by setting
   /sys/block/<device>/make-it-fail or
-  /sys/block/<device>/<partition>/make-it-fail. (generic_make_request())
+  /sys/block/<device>/<partition>/make-it-fail. (submit_bio_noacct())
 
 - fail_mmc_request
 
index 8620c38..3999356 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: | TODO |
     |      xtensa: | TODO |
     -----------------------
index 9ed964f..79409bf 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 365df2c..9ea60e4 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 632a1c7..f8ec5c1 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 9646670..cd3510e 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 6ff3854..c3fe9b2 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index c527d05..ca6bacb 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 210256f..7563a49 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 38c40cf..4b0a1d0 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 97cd7aa..6225cfe 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 8b316c6..371f0ac 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index b805aad..38e9525 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index fb297a8..7f4a20e 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 12410f6..3db4763 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index be8acbb..43cac6e 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 6bfa36b..d636ed0 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 895c3b0..dfc93d0 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 242ff5a..1815c7f 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 98cb9d8..4f844ec 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: |  ok  |
-    |   unicore32: |  ok  |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index ee92274..5c6bcfc 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index c52116c..b55e420 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 518f352..04c17c2 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index c22cd6f..e7450fb 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 527fe4d..98e79d1 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 8a521a6..68658a6 100644 (file)
@@ -51,7 +51,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 3508236..964457a 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ..  |
     |       sparc: | TODO |
     |          um: |  ..  |
-    |   unicore32: |  ..  |
     |         x86: |  ok  |
     |      xtensa: |  ..  |
     -----------------------
index c7b837f..f54ddfc 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: |  ok  |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 593536f..4d11cbb 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: | TODO |
     |      xtensa: | TODO |
     -----------------------
index 7a27157..8287b6a 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: |  ok  |
-    |   unicore32: |  ok  |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 048bfb6..a71f3a9 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index a14bbad..d9082b9 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ..  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index 1d46da1..a84c3b9 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: |  ok  |
-    |   unicore32: |  ok  |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
index fb0d0ca..56b372d 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index adc2587..eccda07 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index f05588f..c74e3f8 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index cdfe892..1c0b95f 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ..  |
     |       sparc: |  ok  |
     |          um: |  ..  |
-    |   unicore32: |  ..  |
     |         x86: |  ok  |
     |      xtensa: |  ..  |
     -----------------------
index 2bdd3b6..30f75a7 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: |  ..  |
-    |   unicore32: |  ..  |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 8525f19..c5ff3a4 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 3a6b87d..1cb7406 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: | TODO |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 2e01738..13d0e1e 100644 (file)
@@ -28,7 +28,6 @@
     |          sh: |  ok  |
     |       sparc: |  ok  |
     |          um: | TODO |
-    |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: | TODO |
     -----------------------
index 099d45a..8b4fac4 100644 (file)
@@ -258,6 +258,13 @@ compress_extension=%s  Support adding specified extension, so that f2fs can enab
                        on compression extension list and enable compression on
                        these file by default rather than to enable it via ioctl.
                        For other files, we can still enable compression via ioctl.
+inlinecrypt
+                       When possible, encrypt/decrypt the contents of encrypted
+                       files using the blk-crypto framework rather than
+                       filesystem-layer encryption. This allows the use of
+                       inline encryption hardware. The on-disk format is
+                       unaffected. For more details, see
+                       Documentation/block/inline-encryption.rst.
 ====================== ============================================================
 
 Debugfs Entries
index f517af8..423c5a0 100644 (file)
@@ -1158,7 +1158,7 @@ setxattr() because of the special semantics of the encryption xattr.
 were to be added to or removed from anything other than an empty
 directory.)  These structs are defined as follows::
 
-    #define FS_KEY_DERIVATION_NONCE_SIZE 16
+    #define FSCRYPT_FILE_NONCE_SIZE 16
 
     #define FSCRYPT_KEY_DESCRIPTOR_SIZE  8
     struct fscrypt_context_v1 {
@@ -1167,7 +1167,7 @@ directory.)  These structs are defined as follows::
             u8 filenames_encryption_mode;
             u8 flags;
             u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
-            u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+            u8 nonce[FSCRYPT_FILE_NONCE_SIZE];
     };
 
     #define FSCRYPT_KEY_IDENTIFIER_SIZE  16
@@ -1178,7 +1178,7 @@ directory.)  These structs are defined as follows::
             u8 flags;
             u8 __reserved[4];
             u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
-            u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+            u8 nonce[FSCRYPT_FILE_NONCE_SIZE];
     };
 
 The context structs contain the same information as the corresponding
@@ -1204,6 +1204,18 @@ buffer.  Some filesystems, such as UBIFS, already use temporary
 buffers regardless of encryption.  Other filesystems, such as ext4 and
 F2FS, have to allocate bounce pages specially for encryption.
 
+Fscrypt is also able to use inline encryption hardware instead of the
+kernel crypto API for en/decryption of file contents.  When possible,
+and if directed to do so (by specifying the 'inlinecrypt' mount option
+for an ext4/F2FS filesystem), it adds encryption contexts to bios and
+uses blk-crypto to perform the en/decryption instead of making use of
+the above read/write path changes.  Of course, even if directed to
+make use of inline encryption, fscrypt will only be able to do so if
+either hardware inline encryption support is available for the
+selected encryption algorithm or CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
+is selected.  If neither is the case, fscrypt will fall back to using
+the above mentioned read/write path changes for en/decryption.
+
 Filename hashing and encoding
 -----------------------------
 
@@ -1250,11 +1262,14 @@ Tests
 
 To test fscrypt, use xfstests, which is Linux's de facto standard
 filesystem test suite.  First, run all the tests in the "encrypt"
-group on the relevant filesystem(s).  For example, to test ext4 and
+group on the relevant filesystem(s).  One can also run the tests
+with the 'inlinecrypt' mount option to test the implementation for
+inline encryption support.  For example, to test ext4 and
 f2fs encryption using `kvm-xfstests
 <https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
 
     kvm-xfstests -c ext4,f2fs -g encrypt
+    kvm-xfstests -c ext4,f2fs -g encrypt -m inlinecrypt
 
 UBIFS encryption can also be tested this way, but it should be done in
 a separate command, and it takes some time for kvm-xfstests to set up
@@ -1276,6 +1291,7 @@ This tests the encrypted I/O paths more thoroughly.  To do this with
 kvm-xfstests, use the "encrypt" filesystem configuration::
 
     kvm-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
+    kvm-xfstests -c ext4/encrypt,f2fs/encrypt -g auto -m inlinecrypt
 
 Because this runs many more tests than "-g encrypt" does, it takes
 much longer to run; so also consider using `gce-xfstests
@@ -1283,3 +1299,4 @@ much longer to run; so also consider using `gce-xfstests
 instead of kvm-xfstests::
 
     gce-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
+    gce-xfstests -c ext4/encrypt,f2fs/encrypt -g auto -m inlinecrypt
index 318605d..17bea12 100644 (file)
@@ -467,7 +467,6 @@ prototypes::
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*direct_access) (struct block_device *, sector_t, void **,
                                unsigned long *);
-       int (*media_changed) (struct gendisk *);
        void (*unlock_native_capacity) (struct gendisk *);
        int (*revalidate_disk) (struct gendisk *);
        int (*getgeo)(struct block_device *, struct hd_geometry *);
@@ -483,14 +482,13 @@ release:          yes
 ioctl:                 no
 compat_ioctl:          no
 direct_access:         no
-media_changed:         no
 unlock_native_capacity:        no
 revalidate_disk:       no
 getgeo:                        no
 swap_slot_free_notify: no      (see below)
 ======================= ===================
 
-media_changed, unlock_native_capacity and revalidate_disk are called only from
+unlock_native_capacity and revalidate_disk are called only from
 check_disk_change().
 
 swap_slot_free_notify is called with swap_lock and sometimes the page lock
diff --git a/Documentation/litmus-tests/README b/Documentation/litmus-tests/README
new file mode 100644 (file)
index 0000000..7f5c6c3
--- /dev/null
@@ -0,0 +1,35 @@
+============
+LITMUS TESTS
+============
+
+Each subdirectory contains litmus tests that are typical to describe the
+semantics of respective kernel APIs.
+For more information about how to "run" a litmus test or how to generate
+a kernel test module based on a litmus test, please see
+tools/memory-model/README.
+
+
+atomic (/atomic derectory)
+--------------------------
+
+Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus
+    Test that an atomic RMW followed by a smp_mb__after_atomic() is
+    stronger than a normal acquire: both the read and write parts of
+    the RMW are ordered before the subsequential memory accesses.
+
+Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus
+    Test that atomic_set() cannot break the atomicity of atomic RMWs.
+    NOTE: Require herd7 7.56 or later which supports "(void)expr".
+
+
+RCU (/rcu directory)
+--------------------
+
+MP+onceassign+derefonce.litmus (under tools/memory-model/litmus-tests/)
+    Demonstrates the use of rcu_assign_pointer() and rcu_dereference() to
+    ensure that an RCU reader will not see pre-initialization garbage.
+
+RCU+sync+read.litmus
+RCU+sync+free.litmus
+    Both the above litmus tests demonstrate the RCU grace period guarantee
+    that an RCU read-side critical section can never span a grace period.
diff --git a/Documentation/litmus-tests/atomic/Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus b/Documentation/litmus-tests/atomic/Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus
new file mode 100644 (file)
index 0000000..9a8e31a
--- /dev/null
@@ -0,0 +1,32 @@
+C Atomic-RMW+mb__after_atomic-is-stronger-than-acquire
+
+(*
+ * Result: Never
+ *
+ * Test that an atomic RMW followed by a smp_mb__after_atomic() is
+ * stronger than a normal acquire: both the read and write parts of
+ * the RMW are ordered before the subsequential memory accesses.
+ *)
+
+{
+}
+
+P0(int *x, atomic_t *y)
+{
+       int r0;
+       int r1;
+
+       r0 = READ_ONCE(*x);
+       smp_rmb();
+       r1 = atomic_read(y);
+}
+
+P1(int *x, atomic_t *y)
+{
+       atomic_inc(y);
+       smp_mb__after_atomic();
+       WRITE_ONCE(*x, 1);
+}
+
+exists
+(0:r0=1 /\ 0:r1=0)
diff --git a/Documentation/litmus-tests/atomic/Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus b/Documentation/litmus-tests/atomic/Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus
new file mode 100644 (file)
index 0000000..ffd4d3e
--- /dev/null
@@ -0,0 +1,25 @@
+C Atomic-RMW-ops-are-atomic-WRT-atomic_set
+
+(*
+ * Result: Never
+ *
+ * Test that atomic_set() cannot break the atomicity of atomic RMWs.
+ * NOTE: This requires herd7 7.56 or later which supports "(void)expr".
+ *)
+
+{
+       atomic_t v = ATOMIC_INIT(1);
+}
+
+P0(atomic_t *v)
+{
+       (void)atomic_add_unless(v, 1, 0);
+}
+
+P1(atomic_t *v)
+{
+       atomic_set(v, 0);
+}
+
+exists
+(v=2)
diff --git a/Documentation/litmus-tests/rcu/RCU+sync+free.litmus b/Documentation/litmus-tests/rcu/RCU+sync+free.litmus
new file mode 100644 (file)
index 0000000..4ee67e1
--- /dev/null
@@ -0,0 +1,42 @@
+C RCU+sync+free
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that an RCU reader can never see a write that
+ * follows a grace period, if it did not see writes that precede that grace
+ * period.
+ *
+ * This is a typical pattern of RCU usage, where the write before the grace
+ * period assigns a pointer, and the writes following the grace period destroy
+ * the object that the pointer used to point to.
+ *
+ * This is one implication of the RCU grace-period guarantee, which says (among
+ * other things) that an RCU read-side critical section cannot span a grace period.
+ *)
+
+{
+int x = 1;
+int *y = &x;
+int z = 1;
+}
+
+P0(int *x, int *z, int **y)
+{
+       int *r0;
+       int r1;
+
+       rcu_read_lock();
+       r0 = rcu_dereference(*y);
+       r1 = READ_ONCE(*r0);
+       rcu_read_unlock();
+}
+
+P1(int *x, int *z, int **y)
+{
+       rcu_assign_pointer(*y, z);
+       synchronize_rcu();
+       WRITE_ONCE(*x, 0);
+}
+
+exists (0:r0=x /\ 0:r1=0)
diff --git a/Documentation/litmus-tests/rcu/RCU+sync+read.litmus b/Documentation/litmus-tests/rcu/RCU+sync+read.litmus
new file mode 100644 (file)
index 0000000..f341767
--- /dev/null
@@ -0,0 +1,37 @@
+C RCU+sync+read
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that after a grace period, an RCU updater always
+ * sees all stores done in prior RCU read-side critical sections. Such
+ * read-side critical sections would have ended before the grace period ended.
+ *
+ * This is one implication of the RCU grace-period guarantee, which says (among
+ * other things) that an RCU read-side critical section cannot span a grace period.
+ *)
+
+{
+int x = 0;
+int y = 0;
+}
+
+P0(int *x, int *y)
+{
+       rcu_read_lock();
+       WRITE_ONCE(*x, 1);
+       WRITE_ONCE(*y, 1);
+       rcu_read_unlock();
+}
+
+P1(int *x, int *y)
+{
+       int r0;
+       int r1;
+
+       r0 = READ_ONCE(*x);
+       synchronize_rcu();
+       r1 = READ_ONCE(*y);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
index d785878..7003bd5 100644 (file)
@@ -14,6 +14,7 @@ locking
     mutex-design
     rt-mutex-design
     rt-mutex
+    seqlock
     spinlocks
     ww-mutex-design
     preempt-locking
index 8012a74..dfaf9fc 100644 (file)
@@ -166,4 +166,4 @@ checked for such errors.  The "rmmod" command forces a "SUCCESS",
 two are self-explanatory, while the last indicates that while there
 were no locking failures, CPU-hotplug problems were detected.
 
-Also see: Documentation/RCU/torture.txt
+Also see: Documentation/RCU/torture.rst
index 4d8236b..8f3e9a5 100644 (file)
@@ -18,7 +18,7 @@ as an alternative to these. This new data structure provided a number
 of advantages, including simpler interfaces, and at that time smaller
 code (see Disadvantages).
 
-[1] http://lwn.net/Articles/164802/
+[1] https://lwn.net/Articles/164802/
 
 Implementation
 --------------
diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
new file mode 100644 (file)
index 0000000..366dd36
--- /dev/null
@@ -0,0 +1,170 @@
+======================================
+Sequence counters and sequential locks
+======================================
+
+Introduction
+============
+
+Sequence counters are a reader-writer consistency mechanism with
+lockless readers (read-only retry loops), and no writer starvation. They
+are used for data that's rarely written to (e.g. system time), where the
+reader wants a consistent set of information and is willing to retry if
+that information changes.
+
+A data set is consistent when the sequence count at the beginning of the
+read side critical section is even and the same sequence count value is
+read again at the end of the critical section. The data in the set must
+be copied out inside the read side critical section. If the sequence
+count has changed between the start and the end of the critical section,
+the reader must retry.
+
+Writers increment the sequence count at the start and the end of their
+critical section. After starting the critical section the sequence count
+is odd and indicates to the readers that an update is in progress. At
+the end of the write side critical section the sequence count becomes
+even again which lets readers make progress.
+
+A sequence counter write side critical section must never be preempted
+or interrupted by read side sections. Otherwise the reader will spin for
+the entire scheduler tick due to the odd sequence count value and the
+interrupted writer. If that reader belongs to a real-time scheduling
+class, it can spin forever and the kernel will livelock.
+
+This mechanism cannot be used if the protected data contains pointers,
+as the writer can invalidate a pointer that the reader is following.
+
+
+.. _seqcount_t:
+
+Sequence counters (``seqcount_t``)
+==================================
+
+This is the the raw counting mechanism, which does not protect against
+multiple writers.  Write side critical sections must thus be serialized
+by an external lock.
+
+If the write serialization primitive is not implicitly disabling
+preemption, preemption must be explicitly disabled before entering the
+write side section. If the read section can be invoked from hardirq or
+softirq contexts, interrupts or bottom halves must also be respectively
+disabled before entering the write section.
+
+If it's desired to automatically handle the sequence counter
+requirements of writer serialization and non-preemptibility, use
+:ref:`seqlock_t` instead.
+
+Initialization::
+
+       /* dynamic */
+       seqcount_t foo_seqcount;
+       seqcount_init(&foo_seqcount);
+
+       /* static */
+       static seqcount_t foo_seqcount = SEQCNT_ZERO(foo_seqcount);
+
+       /* C99 struct init */
+       struct {
+               .seq   = SEQCNT_ZERO(foo.seq),
+       } foo;
+
+Write path::
+
+       /* Serialized context with disabled preemption */
+
+       write_seqcount_begin(&foo_seqcount);
+
+       /* ... [[write-side critical section]] ... */
+
+       write_seqcount_end(&foo_seqcount);
+
+Read path::
+
+       do {
+               seq = read_seqcount_begin(&foo_seqcount);
+
+               /* ... [[read-side critical section]] ... */
+
+       } while (read_seqcount_retry(&foo_seqcount, seq));
+
+
+.. _seqlock_t:
+
+Sequential locks (``seqlock_t``)
+================================
+
+This contains the :ref:`seqcount_t` mechanism earlier discussed, plus an
+embedded spinlock for writer serialization and non-preemptibility.
+
+If the read side section can be invoked from hardirq or softirq context,
+use the write side function variants which disable interrupts or bottom
+halves respectively.
+
+Initialization::
+
+       /* dynamic */
+       seqlock_t foo_seqlock;
+       seqlock_init(&foo_seqlock);
+
+       /* static */
+       static DEFINE_SEQLOCK(foo_seqlock);
+
+       /* C99 struct init */
+       struct {
+               .seql   = __SEQLOCK_UNLOCKED(foo.seql)
+       } foo;
+
+Write path::
+
+       write_seqlock(&foo_seqlock);
+
+       /* ... [[write-side critical section]] ... */
+
+       write_sequnlock(&foo_seqlock);
+
+Read path, three categories:
+
+1. Normal Sequence readers which never block a writer but they must
+   retry if a writer is in progress by detecting change in the sequence
+   number.  Writers do not wait for a sequence reader::
+
+       do {
+               seq = read_seqbegin(&foo_seqlock);
+
+               /* ... [[read-side critical section]] ... */
+
+       } while (read_seqretry(&foo_seqlock, seq));
+
+2. Locking readers which will wait if a writer or another locking reader
+   is in progress. A locking reader in progress will also block a writer
+   from entering its critical section. This read lock is
+   exclusive. Unlike rwlock_t, only one locking reader can acquire it::
+
+       read_seqlock_excl(&foo_seqlock);
+
+       /* ... [[read-side critical section]] ... */
+
+       read_sequnlock_excl(&foo_seqlock);
+
+3. Conditional lockless reader (as in 1), or locking reader (as in 2),
+   according to a passed marker. This is used to avoid lockless readers
+   starvation (too much retry loops) in case of a sharp spike in write
+   activity. First, a lockless read is tried (even marker passed). If
+   that trial fails (odd sequence counter is returned, which is used as
+   the next iteration marker), the lockless read is transformed to a
+   full locking read and no retry loop is necessary::
+
+       /* marker; even initialization */
+       int seq = 0;
+       do {
+               read_seqbegin_or_lock(&foo_seqlock, &seq);
+
+               /* ... [[read-side critical section]] ... */
+
+       } while (need_seqretry(&foo_seqlock, seq));
+       done_seqretry(&foo_seqlock, seq);
+
+
+API documentation
+=================
+
+.. kernel-doc:: include/linux/seqlock.h
index eaabc31..4e55aba 100644 (file)
@@ -553,12 +553,12 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
 DATA DEPENDENCY BARRIERS (HISTORICAL)
 -------------------------------------
 
-As of v4.15 of the Linux kernel, an smp_read_barrier_depends() was
-added to READ_ONCE(), which means that about the only people who
-need to pay attention to this section are those working on DEC Alpha
-architecture-specific code and those working on READ_ONCE() itself.
-For those who need it, and for those who are interested in the history,
-here is the story of data-dependency barriers.
+As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for
+DEC Alpha, which means that about the only people who need to pay attention
+to this section are those working on DEC Alpha architecture-specific code
+and those working on READ_ONCE() itself.  For those who need it, and for
+those who are interested in the history, here is the story of
+data-dependency barriers.
 
 The usage requirements of data dependency barriers are a little subtle, and
 it's not always obvious that they're needed.  To illustrate, consider the
@@ -2708,144 +2708,6 @@ the properties of the memory window through which devices are accessed and/or
 the use of any special device communication instructions the CPU may have.
 
 
-CACHE COHERENCY
----------------
-
-Life isn't quite as simple as it may appear above, however: for while the
-caches are expected to be coherent, there's no guarantee that that coherency
-will be ordered.  This means that while changes made on one CPU will
-eventually become visible on all CPUs, there's no guarantee that they will
-become apparent in the same order on those other CPUs.
-
-
-Consider dealing with a system that has a pair of CPUs (1 & 2), each of which
-has a pair of parallel data caches (CPU 1 has A/B, and CPU 2 has C/D):
-
-                   :
-                   :                          +--------+
-                   :      +---------+         |        |
-       +--------+  : +--->| Cache A |<------->|        |
-       |        |  : |    +---------+         |        |
-       |  CPU 1 |<---+                        |        |
-       |        |  : |    +---------+         |        |
-       +--------+  : +--->| Cache B |<------->|        |
-                   :      +---------+         |        |
-                   :                          | Memory |
-                   :      +---------+         | System |
-       +--------+  : +--->| Cache C |<------->|        |
-       |        |  : |    +---------+         |        |
-       |  CPU 2 |<---+                        |        |
-       |        |  : |    +---------+         |        |
-       +--------+  : +--->| Cache D |<------->|        |
-                   :      +---------+         |        |
-                   :                          +--------+
-                   :
-
-Imagine the system has the following properties:
-
- (*) an odd-numbered cache line may be in cache A, cache C or it may still be
-     resident in memory;
-
- (*) an even-numbered cache line may be in cache B, cache D or it may still be
-     resident in memory;
-
- (*) while the CPU core is interrogating one cache, the other cache may be
-     making use of the bus to access the rest of the system - perhaps to
-     displace a dirty cacheline or to do a speculative load;
-
- (*) each cache has a queue of operations that need to be applied to that cache
-     to maintain coherency with the rest of the system;
-
- (*) the coherency queue is not flushed by normal loads to lines already
-     present in the cache, even though the contents of the queue may
-     potentially affect those loads.
-
-Imagine, then, that two writes are made on the first CPU, with a write barrier
-between them to guarantee that they will appear to reach that CPU's caches in
-the requisite order:
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-                                       u == 0, v == 1 and p == &u, q == &u
-       v = 2;
-       smp_wmb();                      Make sure change to v is visible before
-                                        change to p
-       <A:modify v=2>                  v is now in cache A exclusively
-       p = &v;
-       <B:modify p=&v>                 p is now in cache B exclusively
-
-The write memory barrier forces the other CPUs in the system to perceive that
-the local CPU's caches have apparently been updated in the correct order.  But
-now imagine that the second CPU wants to read those values:
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-       ...
-                       q = p;
-                       x = *q;
-
-The above pair of reads may then fail to happen in the expected order, as the
-cacheline holding p may get updated in one of the second CPU's caches while
-the update to the cacheline holding v is delayed in the other of the second
-CPU's caches by some other cache event:
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-                                       u == 0, v == 1 and p == &u, q == &u
-       v = 2;
-       smp_wmb();
-       <A:modify v=2>  <C:busy>
-                       <C:queue v=2>
-       p = &v;         q = p;
-                       <D:request p>
-       <B:modify p=&v> <D:commit p=&v>
-                       <D:read p>
-                       x = *q;
-                       <C:read *q>     Reads from v before v updated in cache
-                       <C:unbusy>
-                       <C:commit v=2>
-
-Basically, while both cachelines will be updated on CPU 2 eventually, there's
-no guarantee that, without intervention, the order of update will be the same
-as that committed on CPU 1.
-
-
-To intervene, we need to interpolate a data dependency barrier or a read
-barrier between the loads (which as of v4.15 is supplied unconditionally
-by the READ_ONCE() macro).  This will force the cache to commit its
-coherency queue before processing any further requests:
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-                                       u == 0, v == 1 and p == &u, q == &u
-       v = 2;
-       smp_wmb();
-       <A:modify v=2>  <C:busy>
-                       <C:queue v=2>
-       p = &v;         q = p;
-                       <D:request p>
-       <B:modify p=&v> <D:commit p=&v>
-                       <D:read p>
-                       smp_read_barrier_depends()
-                       <C:unbusy>
-                       <C:commit v=2>
-                       x = *q;
-                       <C:read *q>     Reads from v after v updated in cache
-
-
-This sort of problem can be encountered on DEC Alpha processors as they have a
-split cache that improves performance by making better use of the data bus.
-While most CPUs do imply a data dependency barrier on the read when a memory
-access depends on a read, not all do, so it may not be relied on.
-
-Other CPUs may also have split caches, but must coordinate between the various
-cachelets for normal memory accesses.  The semantics of the Alpha removes the
-need for hardware coordination in the absence of memory barriers, which
-permitted Alpha to sport higher CPU clock rates back in the day.  However,
-please note that (again, as of v4.15) smp_read_barrier_depends() should not
-be used except in Alpha arch-specific code and within the READ_ONCE() macro.
-
-
 CACHE COHERENCY VS DMA
 ----------------------
 
@@ -3009,10 +2871,8 @@ caches with the memory coherence system, thus making it seem like pointer
 changes vs new data occur in the right order.
 
 The Alpha defines the Linux kernel's memory model, although as of v4.15
-the Linux kernel's addition of smp_read_barrier_depends() to READ_ONCE()
-greatly reduced Alpha's impact on the memory model.
-
-See the subsection on "Cache Coherency" above.
+the Linux kernel's addition of smp_mb() to READ_ONCE() on Alpha greatly
+reduced its impact on the memory model.
 
 
 VIRTUAL MACHINE GUESTS
index ff40656..b9d04ee 100644 (file)
@@ -8,9 +8,8 @@ There are various L3 encapsulation standards using UDP being discussed to
 leverage the UDP based load balancing capability of different networks.
 MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them.
 
-The Bareudp tunnel module provides a generic L3 encapsulation tunnelling
-support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside
-a UDP tunnel.
+The Bareudp tunnel module provides a generic L3 encapsulation support for
+tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel.
 
 Special Handling
 ----------------
index 1e3f3ff..2014307 100644 (file)
@@ -486,6 +486,10 @@ narrow. The description of these groups must be added to the following table:
      - Contains packet traps for packets that should be locally delivered after
        routing, but do not match more specific packet traps (e.g.,
        ``ipv4_bgp``)
+   * - ``external_delivery``
+     - Contains packet traps for packets that should be routed through an
+       external interface (e.g., management interface) that does not belong to
+       the same device (e.g., switch ASIC) as the ingress interface
    * - ``ipv6``
      - Contains packet traps for various IPv6 control packets (e.g., Router
        Advertisements)
index cdb3684..af8bdc3 100644 (file)
@@ -67,7 +67,7 @@ corresponding component. The debugfs normally should be mounted to
 The content of the directories are files which represent different views
 to the debug log. Each component can decide which views should be
 used through registering them with the function :c:func:`debug_register_view()`.
-Predefined views for hex/ascii, sprintf and raw binary data are provided.
+Predefined views for hex/ascii and sprintf data are provided.
 It is also possible to define other views. The content of
 a view can be inspected simply by reading the corresponding debugfs file.
 
@@ -119,8 +119,6 @@ Predefined views:
 
   extern struct debug_view debug_hex_ascii_view;
 
-  extern struct debug_view debug_raw_view;
-
   extern struct debug_view debug_sprintf_view;
 
 Examples
@@ -129,7 +127,7 @@ Examples
 .. code-block:: c
 
   /*
-   * hex_ascii- + raw-view Example
+   * hex_ascii-view Example
    */
 
   #include <linux/init.h>
@@ -143,7 +141,6 @@ Examples
 
       debug_info = debug_register("test", 1, 4, 4 );
       debug_register_view(debug_info, &debug_hex_ascii_view);
-      debug_register_view(debug_info, &debug_raw_view);
 
       debug_text_event(debug_info, 4 , "one ");
       debug_int_exception(debug_info, 4, 4711);
@@ -201,7 +198,7 @@ debugfs-files:
 Example::
 
   > ls /sys/kernel/debug/s390dbf/dasd
-  flush  hex_ascii  level pages raw
+  flush  hex_ascii  level pages
   > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
   00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
   00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
@@ -298,10 +295,9 @@ order to see the debug entries well formatted.
 Predefined Views
 ----------------
 
-There are three predefined views: hex_ascii, raw and sprintf.
+There are two predefined views: hex_ascii and sprintf.
 The hex_ascii view shows the data field in hex and ascii representation
 (e.g. ``45 43 4b 44 | ECKD``).
-The raw view returns a bytestream as the debug areas are stored in memory.
 
 The sprintf view formats the debug entries in the same way as the sprintf
 function would do. The sprintf event/exception functions write to the
@@ -334,11 +330,6 @@ The format of the hex_ascii and sprintf view is as follows:
 - Return Address to caller
 - data field
 
-The format of the raw view is:
-
-- Header as described in debug.h
-- datafield
-
 A typical line of the hex_ascii view will look like the following (first line
 is only for explanation and will not be displayed when 'cating' the view)::
 
index 69074e5..88900aa 100644 (file)
@@ -12,6 +12,7 @@ Linux Scheduler
     sched-deadline
     sched-design-CFS
     sched-domains
+    sched-capacity
     sched-energy
     sched-nice-design
     sched-rt-group
diff --git a/Documentation/scheduler/sched-capacity.rst b/Documentation/scheduler/sched-capacity.rst
new file mode 100644 (file)
index 0000000..00bf0d0
--- /dev/null
@@ -0,0 +1,439 @@
+=========================
+Capacity Aware Scheduling
+=========================
+
+1. CPU Capacity
+===============
+
+1.1 Introduction
+----------------
+
+Conventional, homogeneous SMP platforms are composed of purely identical
+CPUs. Heterogeneous platforms on the other hand are composed of CPUs with
+different performance characteristics - on such platforms, not all CPUs can be
+considered equal.
+
+CPU capacity is a measure of the performance a CPU can reach, normalized against
+the most performant CPU in the system. Heterogeneous systems are also called
+asymmetric CPU capacity systems, as they contain CPUs of different capacities.
+
+Disparity in maximum attainable performance (IOW in maximum CPU capacity) stems
+from two factors:
+
+- not all CPUs may have the same microarchitecture (µarch).
+- with Dynamic Voltage and Frequency Scaling (DVFS), not all CPUs may be
+  physically able to attain the higher Operating Performance Points (OPP).
+
+Arm big.LITTLE systems are an example of both. The big CPUs are more
+performance-oriented than the LITTLE ones (more pipeline stages, bigger caches,
+smarter predictors, etc), and can usually reach higher OPPs than the LITTLE ones
+can.
+
+CPU performance is usually expressed in Millions of Instructions Per Second
+(MIPS), which can also be expressed as a given amount of instructions attainable
+per Hz, leading to::
+
+  capacity(cpu) = work_per_hz(cpu) * max_freq(cpu)
+
+1.2 Scheduler terms
+-------------------
+
+Two different capacity values are used within the scheduler. A CPU's
+``capacity_orig`` is its maximum attainable capacity, i.e. its maximum
+attainable performance level. A CPU's ``capacity`` is its ``capacity_orig`` to
+which some loss of available performance (e.g. time spent handling IRQs) is
+subtracted.
+
+Note that a CPU's ``capacity`` is solely intended to be used by the CFS class,
+while ``capacity_orig`` is class-agnostic. The rest of this document will use
+the term ``capacity`` interchangeably with ``capacity_orig`` for the sake of
+brevity.
+
+1.3 Platform examples
+---------------------
+
+1.3.1 Identical OPPs
+~~~~~~~~~~~~~~~~~~~~
+
+Consider an hypothetical dual-core asymmetric CPU capacity system where
+
+- work_per_hz(CPU0) = W
+- work_per_hz(CPU1) = W/2
+- all CPUs are running at the same fixed frequency
+
+By the above definition of capacity:
+
+- capacity(CPU0) = C
+- capacity(CPU1) = C/2
+
+To draw the parallel with Arm big.LITTLE, CPU0 would be a big while CPU1 would
+be a LITTLE.
+
+With a workload that periodically does a fixed amount of work, you will get an
+execution trace like so::
+
+ CPU0 work ^
+           |     ____                ____                ____
+           |    |    |              |    |              |    |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+ CPU1 work ^
+           |     _________           _________           ____
+           |    |         |         |         |         |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+CPU0 has the highest capacity in the system (C), and completes a fixed amount of
+work W in T units of time. On the other hand, CPU1 has half the capacity of
+CPU0, and thus only completes W/2 in T.
+
+1.3.2 Different max OPPs
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually, CPUs of different capacity values also have different maximum
+OPPs. Consider the same CPUs as above (i.e. same work_per_hz()) with:
+
+- max_freq(CPU0) = F
+- max_freq(CPU1) = 2/3 * F
+
+This yields:
+
+- capacity(CPU0) = C
+- capacity(CPU1) = C/3
+
+Executing the same workload as described in 1.3.1, which each CPU running at its
+maximum frequency results in::
+
+ CPU0 work ^
+           |     ____                ____                ____
+           |    |    |              |    |              |    |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+                            workload on CPU1
+ CPU1 work ^
+           |     ______________      ______________      ____
+           |    |              |    |              |    |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+1.4 Representation caveat
+-------------------------
+
+It should be noted that having a *single* value to represent differences in CPU
+performance is somewhat of a contentious point. The relative performance
+difference between two different µarchs could be X% on integer operations, Y% on
+floating point operations, Z% on branches, and so on. Still, results using this
+simple approach have been satisfactory for now.
+
+2. Task utilization
+===================
+
+2.1 Introduction
+----------------
+
+Capacity aware scheduling requires an expression of a task's requirements with
+regards to CPU capacity. Each scheduler class can express this differently, and
+while task utilization is specific to CFS, it is convenient to describe it here
+in order to introduce more generic concepts.
+
+Task utilization is a percentage meant to represent the throughput requirements
+of a task. A simple approximation of it is the task's duty cycle, i.e.::
+
+  task_util(p) = duty_cycle(p)
+
+On an SMP system with fixed frequencies, 100% utilization suggests the task is a
+busy loop. Conversely, 10% utilization hints it is a small periodic task that
+spends more time sleeping than executing. Variable CPU frequencies and
+asymmetric CPU capacities complexify this somewhat; the following sections will
+expand on these.
+
+2.2 Frequency invariance
+------------------------
+
+One issue that needs to be taken into account is that a workload's duty cycle is
+directly impacted by the current OPP the CPU is running at. Consider running a
+periodic workload at a given frequency F::
+
+  CPU work ^
+           |     ____                ____                ____
+           |    |    |              |    |              |    |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+This yields duty_cycle(p) == 25%.
+
+Now, consider running the *same* workload at frequency F/2::
+
+  CPU work ^
+           |     _________           _________           ____
+           |    |         |         |         |         |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+This yields duty_cycle(p) == 50%, despite the task having the exact same
+behaviour (i.e. executing the same amount of work) in both executions.
+
+The task utilization signal can be made frequency invariant using the following
+formula::
+
+  task_util_freq_inv(p) = duty_cycle(p) * (curr_frequency(cpu) / max_frequency(cpu))
+
+Applying this formula to the two examples above yields a frequency invariant
+task utilization of 25%.
+
+2.3 CPU invariance
+------------------
+
+CPU capacity has a similar effect on task utilization in that running an
+identical workload on CPUs of different capacity values will yield different
+duty cycles.
+
+Consider the system described in 1.3.2., i.e.::
+
+- capacity(CPU0) = C
+- capacity(CPU1) = C/3
+
+Executing a given periodic workload on each CPU at their maximum frequency would
+result in::
+
+ CPU0 work ^
+           |     ____                ____                ____
+           |    |    |              |    |              |    |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+ CPU1 work ^
+           |     ______________      ______________      ____
+           |    |              |    |              |    |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+
+IOW,
+
+- duty_cycle(p) == 25% if p runs on CPU0 at its maximum frequency
+- duty_cycle(p) == 75% if p runs on CPU1 at its maximum frequency
+
+The task utilization signal can be made CPU invariant using the following
+formula::
+
+  task_util_cpu_inv(p) = duty_cycle(p) * (capacity(cpu) / max_capacity)
+
+with ``max_capacity`` being the highest CPU capacity value in the
+system. Applying this formula to the above example above yields a CPU
+invariant task utilization of 25%.
+
+2.4 Invariant task utilization
+------------------------------
+
+Both frequency and CPU invariance need to be applied to task utilization in
+order to obtain a truly invariant signal. The pseudo-formula for a task
+utilization that is both CPU and frequency invariant is thus, for a given
+task p::
+
+                                     curr_frequency(cpu)   capacity(cpu)
+  task_util_inv(p) = duty_cycle(p) * ------------------- * -------------
+                                     max_frequency(cpu)    max_capacity
+
+In other words, invariant task utilization describes the behaviour of a task as
+if it were running on the highest-capacity CPU in the system, running at its
+maximum frequency.
+
+Any mention of task utilization in the following sections will imply its
+invariant form.
+
+2.5 Utilization estimation
+--------------------------
+
+Without a crystal ball, task behaviour (and thus task utilization) cannot
+accurately be predicted the moment a task first becomes runnable. The CFS class
+maintains a handful of CPU and task signals based on the Per-Entity Load
+Tracking (PELT) mechanism, one of those yielding an *average* utilization (as
+opposed to instantaneous).
+
+This means that while the capacity aware scheduling criteria will be written
+considering a "true" task utilization (using a crystal ball), the implementation
+will only ever be able to use an estimator thereof.
+
+3. Capacity aware scheduling requirements
+=========================================
+
+3.1 CPU capacity
+----------------
+
+Linux cannot currently figure out CPU capacity on its own, this information thus
+needs to be handed to it. Architectures must define arch_scale_cpu_capacity()
+for that purpose.
+
+The arm and arm64 architectures directly map this to the arch_topology driver
+CPU scaling data, which is derived from the capacity-dmips-mhz CPU binding; see
+Documentation/devicetree/bindings/arm/cpu-capacity.txt.
+
+3.2 Frequency invariance
+------------------------
+
+As stated in 2.2, capacity-aware scheduling requires a frequency-invariant task
+utilization. Architectures must define arch_scale_freq_capacity(cpu) for that
+purpose.
+
+Implementing this function requires figuring out at which frequency each CPU
+have been running at. One way to implement this is to leverage hardware counters
+whose increment rate scale with a CPU's current frequency (APERF/MPERF on x86,
+AMU on arm64). Another is to directly hook into cpufreq frequency transitions,
+when the kernel is aware of the switched-to frequency (also employed by
+arm/arm64).
+
+4. Scheduler topology
+=====================
+
+During the construction of the sched domains, the scheduler will figure out
+whether the system exhibits asymmetric CPU capacities. Should that be the
+case:
+
+- The sched_asym_cpucapacity static key will be enabled.
+- The SD_ASYM_CPUCAPACITY flag will be set at the lowest sched_domain level that
+  spans all unique CPU capacity values.
+
+The sched_asym_cpucapacity static key is intended to guard sections of code that
+cater to asymmetric CPU capacity systems. Do note however that said key is
+*system-wide*. Imagine the following setup using cpusets::
+
+  capacity    C/2          C
+            ________    ________
+           /        \  /        \
+  CPUs     0  1  2  3  4  5  6  7
+           \__/  \______________/
+  cpusets   cs0         cs1
+
+Which could be created via:
+
+.. code-block:: sh
+
+  mkdir /sys/fs/cgroup/cpuset/cs0
+  echo 0-1 > /sys/fs/cgroup/cpuset/cs0/cpuset.cpus
+  echo 0 > /sys/fs/cgroup/cpuset/cs0/cpuset.mems
+
+  mkdir /sys/fs/cgroup/cpuset/cs1
+  echo 2-7 > /sys/fs/cgroup/cpuset/cs1/cpuset.cpus
+  echo 0 > /sys/fs/cgroup/cpuset/cs1/cpuset.mems
+
+  echo 0 > /sys/fs/cgroup/cpuset/cpuset.sched_load_balance
+
+Since there *is* CPU capacity asymmetry in the system, the
+sched_asym_cpucapacity static key will be enabled. However, the sched_domain
+hierarchy of CPUs 0-1 spans a single capacity value: SD_ASYM_CPUCAPACITY isn't
+set in that hierarchy, it describes an SMP island and should be treated as such.
+
+Therefore, the 'canonical' pattern for protecting codepaths that cater to
+asymmetric CPU capacities is to:
+
+- Check the sched_asym_cpucapacity static key
+- If it is enabled, then also check for the presence of SD_ASYM_CPUCAPACITY in
+  the sched_domain hierarchy (if relevant, i.e. the codepath targets a specific
+  CPU or group thereof)
+
+5. Capacity aware scheduling implementation
+===========================================
+
+5.1 CFS
+-------
+
+5.1.1 Capacity fitness
+~~~~~~~~~~~~~~~~~~~~~~
+
+The main capacity scheduling criterion of CFS is::
+
+  task_util(p) < capacity(task_cpu(p))
+
+This is commonly called the capacity fitness criterion, i.e. CFS must ensure a
+task "fits" on its CPU. If it is violated, the task will need to achieve more
+work than what its CPU can provide: it will be CPU-bound.
+
+Furthermore, uclamp lets userspace specify a minimum and a maximum utilization
+value for a task, either via sched_setattr() or via the cgroup interface (see
+Documentation/admin-guide/cgroup-v2.rst). As its name imply, this can be used to
+clamp task_util() in the previous criterion.
+
+5.1.2 Wakeup CPU selection
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+CFS task wakeup CPU selection follows the capacity fitness criterion described
+above. On top of that, uclamp is used to clamp the task utilization values,
+which lets userspace have more leverage over the CPU selection of CFS
+tasks. IOW, CFS wakeup CPU selection searches for a CPU that satisfies::
+
+  clamp(task_util(p), task_uclamp_min(p), task_uclamp_max(p)) < capacity(cpu)
+
+By using uclamp, userspace can e.g. allow a busy loop (100% utilization) to run
+on any CPU by giving it a low uclamp.max value. Conversely, it can force a small
+periodic task (e.g. 10% utilization) to run on the highest-performance CPUs by
+giving it a high uclamp.min value.
+
+.. note::
+
+  Wakeup CPU selection in CFS can be eclipsed by Energy Aware Scheduling
+  (EAS), which is described in Documentation/scheduling/sched-energy.rst.
+
+5.1.3 Load balancing
+~~~~~~~~~~~~~~~~~~~~
+
+A pathological case in the wakeup CPU selection occurs when a task rarely
+sleeps, if at all - it thus rarely wakes up, if at all. Consider::
+
+  w == wakeup event
+
+  capacity(CPU0) = C
+  capacity(CPU1) = C / 3
+
+                           workload on CPU0
+  CPU work ^
+           |     _________           _________           ____
+           |    |         |         |         |         |
+           +----+----+----+----+----+----+----+----+----+----+-> time
+                w                   w                   w
+
+                           workload on CPU1
+  CPU work ^
+           |     ____________________________________________
+           |    |
+           +----+----+----+----+----+----+----+----+----+----+->
+                w
+
+This workload should run on CPU0, but if the task either:
+
+- was improperly scheduled from the start (inaccurate initial
+  utilization estimation)
+- was properly scheduled from the start, but suddenly needs more
+  processing power
+
+then it might become CPU-bound, IOW ``task_util(p) > capacity(task_cpu(p))``;
+the CPU capacity scheduling criterion is violated, and there may not be any more
+wakeup event to fix this up via wakeup CPU selection.
+
+Tasks that are in this situation are dubbed "misfit" tasks, and the mechanism
+put in place to handle this shares the same name. Misfit task migration
+leverages the CFS load balancer, more specifically the active load balance part
+(which caters to migrating currently running tasks). When load balance happens,
+a misfit active load balance will be triggered if a misfit task can be migrated
+to a CPU with more capacity than its current one.
+
+5.2 RT
+------
+
+5.2.1 Wakeup CPU selection
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+RT task wakeup CPU selection searches for a CPU that satisfies::
+
+  task_uclamp_min(p) <= capacity(task_cpu(cpu))
+
+while still following the usual priority constraints. If none of the candidate
+CPUs can satisfy this capacity criterion, then strict priority based scheduling
+is followed and CPU capacities are ignored.
+
+5.3 DL
+------
+
+5.3.1 Wakeup CPU selection
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+DL task wakeup CPU selection searches for a CPU that satisfies::
+
+  task_bandwidth(p) < capacity(task_cpu(p))
+
+while still respecting the usual bandwidth and deadline constraints. If
+none of the candidate CPUs can satisfy this capacity criterion, then the
+task will remain on its current CPU.
index 9580c57..78f8507 100644 (file)
@@ -331,16 +331,8 @@ asymmetric CPU topologies for now. This requirement is checked at run-time by
 looking for the presence of the SD_ASYM_CPUCAPACITY flag when the scheduling
 domains are built.
 
-The flag is set/cleared automatically by the scheduler topology code whenever
-there are CPUs with different capacities in a root domain. The capacities of
-CPUs are provided by arch-specific code through the arch_scale_cpu_capacity()
-callback. As an example, arm and arm64 share an implementation of this callback
-which uses a combination of CPUFreq data and device-tree bindings to compute the
-capacity of CPUs (see drivers/base/arch_topology.c for more details).
-
-So, in order to use EAS on your platform your architecture must implement the
-arch_scale_cpu_capacity() callback, and some of the CPUs must have a lower
-capacity than others.
+See Documentation/sched/sched-capacity.rst for requirements to be met for this
+flag to be set in the sched_domain hierarchy.
 
 Please note that EAS is not fundamentally incompatible with SMP, but no
 significant savings on SMP platforms have been observed yet. This restriction
index 430a162..80ba765 100644 (file)
@@ -1453,7 +1453,7 @@ function-trace, we get a much larger output::
    => __blk_run_queue_uncond
    => __blk_run_queue
    => blk_queue_bio
-   => generic_make_request
+   => submit_bio_noacct
    => submit_bio
    => submit_bh
    => __ext3_get_inode_loc
@@ -1738,7 +1738,7 @@ tracers.
    => __blk_run_queue_uncond
    => __blk_run_queue
    => blk_queue_bio
-   => generic_make_request
+   => submit_bio_noacct
    => submit_bio
    => submit_bh
    => ext3_bread
index 34d041d..a1f772e 100644 (file)
@@ -577,7 +577,7 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 
 데이터 의존성 배리어 (역사적)
 -----------------------------
 
-리눅스 커널 v4.15 기준으로, smp_read_barrier_depends() 가 READ_ONCE() 
+리눅스 커널 v4.15 기준으로, smp_mb() 가 DEC Alpha 용 READ_ONCE() 코드
 추가되었는데, 이는 이 섹션에 주의를 기울여야 하는 사람들은 DEC Alpha 아키텍쳐
 전용 코드를 만드는 사람들과 READ_ONCE() 자체를 만드는 사람들 뿐임을 의미합니다.
 그런 분들을 위해, 그리고 역사에 관심 있는 분들을 위해, 여기 데이터 의존성
@@ -2664,144 +2664,6 @@ CPU 코어는 프로그램의 인과성이 유지된다고만 여겨진다면 
 수도 있습니다.
 
 
-캐시 일관성
------------
-
-하지만 삶은 앞에서 이야기한 것처럼 단순하지 않습니다: 캐시들은 일관적일 것으로
-기대되지만, 그 일관성이 순서에도 적용될 거라는 보장은 없습니다.  한 CPU 에서
-만들어진 변경 사항은 최종적으로는 시스템의 모든 CPU 에게 보여지게 되지만, 다른
-CPU 들에게도 같은 순서로 보이게 될 거라는 보장은 없다는 뜻입니다.
-
-
-두개의 CPU (1 & 2) 가 달려 있고, 각 CPU 에 두개의 데이터 캐시(CPU 1 은 A/B 를,
-CPU 2 는 C/D 를 갖습니다)가 병렬로 연결되어 있는 시스템을 다룬다고 생각해
-봅시다:
-
-                   :
-                   :                          +--------+
-                   :      +---------+         |        |
-       +--------+  : +--->| Cache A |<------->|        |
-       |        |  : |    +---------+         |        |
-       |  CPU 1 |<---+                        |        |
-       |        |  : |    +---------+         |        |
-       +--------+  : +--->| Cache B |<------->|        |
-                   :      +---------+         |        |
-                   :                          | Memory |
-                   :      +---------+         | System |
-       +--------+  : +--->| Cache C |<------->|        |
-       |        |  : |    +---------+         |        |
-       |  CPU 2 |<---+                        |        |
-       |        |  : |    +---------+         |        |
-       +--------+  : +--->| Cache D |<------->|        |
-                   :      +---------+         |        |
-                   :                          +--------+
-                   :
-
-이 시스템이 다음과 같은 특성을 갖는다 생각해 봅시다:
-
- (*) 홀수번 캐시라인은 캐시 A, 캐시 C 또는 메모리에 위치할 수 있음;
-
- (*) 짝수번 캐시라인은 캐시 B, 캐시 D 또는 메모리에 위치할 수 있음;
-
- (*) CPU 코어가 한개의 캐시에 접근하는 동안, 다른 캐시는 - 더티 캐시라인을
-     메모리에 내리거나 추측성 로드를 하거나 하기 위해 - 시스템의 다른 부분에
-     액세스 하기 위해 버스를 사용할 수 있음;
-
- (*) 각 캐시는 시스템의 나머지 부분들과 일관성을 맞추기 위해 해당 캐시에
-     적용되어야 할 오퍼레이션들의 큐를 가짐;
-
- (*) 이 일관성 큐는 캐시에 이미 존재하는 라인에 가해지는 평범한 로드에 의해서는
-     비워지지 않는데, 큐의 오퍼레이션들이 이 로드의 결과에 영향을 끼칠 수 있다
-     할지라도 그러함.
-
-이제, 첫번째 CPU 에서 두개의 쓰기 오퍼레이션을 만드는데, 해당 CPU 의 캐시에
-요청된 순서로 오퍼레이션이 도달됨을 보장하기 위해 두 오퍼레이션 사이에 쓰기
-배리어를 사용하는 상황을 상상해 봅시다:
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-                                       u == 0, v == 1 and p == &u, q == &u
-       v = 2;
-       smp_wmb();                      v 의 변경이 p 의 변경 전에 보일 것을
-                                        분명히 함
-       <A:modify v=2>                  v 는 이제 캐시 A 에 독점적으로 존재함
-       p = &v;
-       <B:modify p=&v>                 p 는 이제 캐시 B 에 독점적으로 존재함
-
-여기서의 쓰기 메모리 배리어는 CPU 1 의 캐시가 올바른 순서로 업데이트 된 것으로
-시스템의 다른 CPU 들이 인지하게 만듭니다.  하지만, 이제 두번째 CPU 가 그 값들을
-읽으려 하는 상황을 생각해 봅시다:
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-       ...
-                       q = p;
-                       x = *q;
-
-위의 두개의 읽기 오퍼레이션은 예상된 순서로 일어나지 못할 수 있는데, 두번째 CPU
-의 한 캐시에 다른 캐시 이벤트가 발생해 v 를 담고 있는 캐시라인의 해당 캐시에의
-업데이트가 지연되는 사이, p 를 담고 있는 캐시라인은 두번째 CPU 의 다른 캐시에
-업데이트 되어버렸을 수 있기 때문입니다.
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-                                       u == 0, v == 1 and p == &u, q == &u
-       v = 2;
-       smp_wmb();
-       <A:modify v=2>  <C:busy>
-                       <C:queue v=2>
-       p = &v;         q = p;
-                       <D:request p>
-       <B:modify p=&v> <D:commit p=&v>
-                       <D:read p>
-                       x = *q;
-                       <C:read *q>     캐시에 업데이트 되기 전의 v 를 읽음
-                       <C:unbusy>
-                       <C:commit v=2>
-
-기본적으로, 두개의 캐시라인 모두 CPU 2 에 최종적으로는 업데이트 될 것이지만,
-별도의 개입 없이는, 업데이트의 순서가 CPU 1 에서 만들어진 순서와 동일할
-것이라는 보장이 없습니다.
-
-
-여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들
-사이에 넣어야 합니다 (v4.15 부터는 READ_ONCE() 매크로에 의해 무조건적으로
-그렇게 됩니다).  이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성 큐를
-처리하도록 강제하게 됩니다.
-
-       CPU 1           CPU 2           COMMENT
-       =============== =============== =======================================
-                                       u == 0, v == 1 and p == &u, q == &u
-       v = 2;
-       smp_wmb();
-       <A:modify v=2>  <C:busy>
-                       <C:queue v=2>
-       p = &v;         q = p;
-                       <D:request p>
-       <B:modify p=&v> <D:commit p=&v>
-                       <D:read p>
-                       smp_read_barrier_depends()
-                       <C:unbusy>
-                       <C:commit v=2>
-                       x = *q;
-                       <C:read *q>     캐시에 업데이트 된 v 를 읽음
-
-
-이런 부류의 문제는 DEC Alpha 계열 프로세서들에서 발견될 수 있는데, 이들은
-데이터 버스를 좀 더 잘 사용해 성능을 개선할 수 있는, 분할된 캐시를 가지고 있기
-때문입니다.  대부분의 CPU 는 하나의 읽기 오퍼레이션의 메모리 액세스가 다른 읽기
-오퍼레이션에 의존적이라면 데이터 의존성 배리어를 내포시킵니다만, 모두가 그런건
-아니기 때문에 이점에 의존해선 안됩니다.
-
-다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리
-액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다.  Alpha 는 가장
-약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로
-사용되지 않았을 때에는 그런 조정이 필요하지 않게 했으며, 이는 Alpha 가 당시에
-더 높은 CPU 클락 속도를 가질 수 있게 했습니다.  하지만, (다시 말하건대, v4.15
-이후부터는) Alpha 아키텍쳐 전용 코드와 READ_ONCE() 매크로 내부에서를 제외하고는
-smp_read_barrier_depends() 가 사용되지 않아야 함을 알아두시기 바랍니다.
-
-
 캐시 일관성 VS DMA
 ------------------
 
@@ -2962,10 +2824,8 @@ Alpha CPU 의 일부 버전은 분할된 데이터 캐시를 가지고 있어서
 데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다.
 
 리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다만, v4.15
-부터는 리눅스 커널이 READ_ONCE() 내에 smp_read_barrier_depends() 를 추가해서
-Alpha 의 메모리 모델로의 영향력이 크게 줄어들긴 했습니다.
-
-위의 "캐시 일관성" 서브섹션을 참고하세요.
+부터는 Alpha 용 READ_ONCE() 코드 내에 smp_mb() 가 추가되어서 메모리 모델로의
+Alpha 의 영향력이 크게 줄어들었습니다.
 
 
 가상 머신 게스트
index 5325c71..7fafc7a 100644 (file)
@@ -782,9 +782,9 @@ Protocol:   2.08+
   uncompressed data should be determined using the standard magic
   numbers.  The currently supported compression formats are gzip
   (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA
-  (magic number 5D 00), XZ (magic number FD 37), and LZ4 (magic number
-  02 21).  The uncompressed payload is currently always ELF (magic
-  number 7F 45 4C 46).
+  (magic number 5D 00), XZ (magic number FD 37), LZ4 (magic number
+  02 21) and ZSTD (magic number 28 B5). The uncompressed payload is
+  currently always ELF (magic number 7F 45 4C 46).
 
 ============   ==============
 Field name:    payload_length
index f0569cf..13e323b 100644 (file)
@@ -782,7 +782,7 @@ F:  include/dt-bindings/reset/altr,rst-mgr-a10sr.h
 F:     include/linux/mfd/altera-a10sr.h
 
 ALTERA TRIPLE SPEED ETHERNET DRIVER
-M:     Thor Thayer <thor.thayer@linux.intel.com>
+M:     Joyce Ooi <joyce.ooi@intel.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/altera/
@@ -830,11 +830,20 @@ F:        include/uapi/rdma/efa-abi.h
 
 AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
 M:     Tom Lendacky <thomas.lendacky@amd.com>
+M:     John Allen <john.allen@amd.com>
 L:     linux-crypto@vger.kernel.org
 S:     Supported
 F:     drivers/crypto/ccp/
 F:     include/linux/ccp.h
 
+AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER - SEV SUPPORT
+M:     Brijesh Singh <brijesh.singh@amd.com>
+M:     Tom Lendacky <thomas.lendacky@amd.com>
+L:     linux-crypto@vger.kernel.org
+S:     Supported
+F:     drivers/crypto/ccp/sev*
+F:     include/uapi/linux/psp-sev.h
+
 AMD DISPLAY CORE
 M:     Harry Wentland <harry.wentland@amd.com>
 M:     Leo Li <sunpeng.li@amd.com>
@@ -1425,7 +1434,7 @@ F:        arch/arm*/include/asm/perf_event.h
 F:     arch/arm*/kernel/hw_breakpoint.c
 F:     arch/arm*/kernel/perf_*
 F:     arch/arm/oprofile/common.c
-F:     drivers/perf/*
+F:     drivers/perf/
 F:     include/linux/perf/arm_pmu.h
 
 ARM PORT
@@ -9972,6 +9981,7 @@ M:        Luc Maranget <luc.maranget@inria.fr>
 M:     "Paul E. McKenney" <paulmck@kernel.org>
 R:     Akira Yokosawa <akiyks@gmail.com>
 R:     Daniel Lustig <dlustig@nvidia.com>
+R:     Joel Fernandes <joel@joelfernandes.org>
 L:     linux-kernel@vger.kernel.org
 L:     linux-arch@vger.kernel.org
 S:     Supported
@@ -9980,6 +9990,7 @@ F:        Documentation/atomic_bitops.txt
 F:     Documentation/atomic_t.txt
 F:     Documentation/core-api/atomic_ops.rst
 F:     Documentation/core-api/refcount-vs-atomic.rst
+F:     Documentation/litmus-tests/
 F:     Documentation/memory-barriers.txt
 F:     tools/memory-model/
 
@@ -13592,16 +13603,6 @@ F:     drivers/block/pktcdvd.c
 F:     include/linux/pktcdvd.h
 F:     include/uapi/linux/pktcdvd.h
 
-PKUNITY SOC DRIVERS
-M:     Guan Xuetao <gxt@pku.edu.cn>
-S:     Maintained
-W:     http://mprc.pku.edu.cn/~guanxuetao/linux
-T:     git git://github.com/gxt/linux.git
-F:     drivers/i2c/busses/i2c-puv3.c
-F:     drivers/input/serio/i8042-unicore32io.h
-F:     drivers/rtc/rtc-puv3.c
-F:     drivers/video/fbdev/fb-puv3.c
-
 PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
 M:     Tomasz Duszynski <tduszyns@gmail.com>
 S:     Maintained
@@ -14188,7 +14189,8 @@ F:      Documentation/devicetree/bindings/net/qcom,ethqos.txt
 F:     drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
 
 QUALCOMM GENERIC INTERFACE I2C DRIVER
-M:     Alok Chauhan <alokc@codeaurora.org>
+M:     Akash Asthana <akashast@codeaurora.org>
+M:     Mukesh Savaliya <msavaliy@codeaurora.org>
 L:     linux-i2c@vger.kernel.org
 L:     linux-arm-msm@vger.kernel.org
 S:     Supported
@@ -14449,7 +14451,7 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:     Documentation/RCU/
 F:     include/linux/rcu*
 F:     kernel/rcu/
-X:     Documentation/RCU/torture.txt
+X:     Documentation/RCU/torture.rst
 X:     include/linux/srcu*.h
 X:     kernel/rcu/srcu*.c
 
@@ -17301,7 +17303,7 @@ M:      Josh Triplett <josh@joshtriplett.org>
 L:     linux-kernel@vger.kernel.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
-F:     Documentation/RCU/torture.txt
+F:     Documentation/RCU/torture.rst
 F:     kernel/locking/locktorture.c
 F:     kernel/rcu/rcuperf.c
 F:     kernel/rcu/rcutorture.c
@@ -17545,13 +17547,6 @@ L:     linux-fsdevel@vger.kernel.org
 S:     Supported
 F:     fs/unicode/
 
-UNICORE32 ARCHITECTURE
-M:     Guan Xuetao <gxt@pku.edu.cn>
-S:     Maintained
-W:     http://mprc.pku.edu.cn/~guanxuetao/linux
-T:     git git://github.com/gxt/linux.git
-F:     arch/unicore32/
-
 UNIFDEF
 M:     Tony Finch <dot@dotat.at>
 S:     Maintained
index 229e67f..aef9ca6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
@@ -464,6 +464,7 @@ KLZOP               = lzop
 LZMA           = lzma
 LZ4            = lz4c
 XZ             = xz
+ZSTD           = zstd
 
 CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
                  -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF)
@@ -512,7 +513,7 @@ CLANG_FLAGS :=
 export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
 export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE LEX YACC AWK INSTALLKERNEL
 export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
-export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ
+export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
 export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS
index 2144530..e41c113 100644 (file)
 
 /*
  * To ensure dependency ordering is preserved for the _relaxed and
- * _release atomics, an smp_read_barrier_depends() is unconditionally
- * inserted into the _relaxed variants, which are used to build the
- * barriered versions. Avoid redundant back-to-back fences in the
- * _acquire and _fence versions.
+ * _release atomics, an smp_mb() is unconditionally inserted into the
+ * _relaxed variants, which are used to build the barriered versions.
+ * Avoid redundant back-to-back fences in the _acquire and _fence
+ * versions.
  */
 #define __atomic_acquire_fence()
 #define __atomic_post_full_fence()
 
-#define ATOMIC_INIT(i)         { (i) }
 #define ATOMIC64_INIT(i)       { (i) }
 
 #define atomic_read(v)         READ_ONCE((v)->counter)
@@ -70,7 +69,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)    \
        ".previous"                                                     \
        :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
        :"Ir" (i), "m" (v->counter) : "memory");                        \
-       smp_read_barrier_depends();                                     \
+       smp_mb();                                                       \
        return result;                                                  \
 }
 
@@ -88,7 +87,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)     \
        ".previous"                                                     \
        :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
        :"Ir" (i), "m" (v->counter) : "memory");                        \
-       smp_read_barrier_depends();                                     \
+       smp_mb();                                                       \
        return result;                                                  \
 }
 
@@ -123,7 +122,7 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
        ".previous"                                                     \
        :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
        :"Ir" (i), "m" (v->counter) : "memory");                        \
-       smp_read_barrier_depends();                                     \
+       smp_mb();                                                       \
        return result;                                                  \
 }
 
@@ -141,7 +140,7 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)  \
        ".previous"                                                     \
        :"=&r" (temp), "=m" (v->counter), "=&r" (result)                \
        :"Ir" (i), "m" (v->counter) : "memory");                        \
-       smp_read_barrier_depends();                                     \
+       smp_mb();                                                       \
        return result;                                                  \
 }
 
index 92ec486..c56bfff 100644 (file)
@@ -2,64 +2,15 @@
 #ifndef __BARRIER_H
 #define __BARRIER_H
 
-#include <asm/compiler.h>
-
 #define mb()   __asm__ __volatile__("mb": : :"memory")
 #define rmb()  __asm__ __volatile__("mb": : :"memory")
 #define wmb()  __asm__ __volatile__("wmb": : :"memory")
 
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *     CPU 0                           CPU 1
- *
- *     b = 2;
- *     memory_barrier();
- *     p = &b;                         q = p;
- *                                     read_barrier_depends();
- *                                     d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *     CPU 0                           CPU 1
- *
- *     a = 2;
- *     memory_barrier();
- *     b = 3;                          y = b;
- *                                     read_barrier_depends();
- *                                     x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- */
-#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
+#define __smp_load_acquire(p)                                          \
+({                                                                     \
+       compiletime_assert_atomic_type(*p);                             \
+       __READ_ONCE(*p);                                                \
+})
 
 #ifdef CONFIG_SMP
 #define __ASM_SMP_MB   "\tmb\n"
index 162c17b..660b14c 100644 (file)
@@ -277,9 +277,9 @@ extern inline pte_t pte_mkdirty(pte_t pte)  { pte_val(pte) |= __DIRTY_BITS; retur
 extern inline pte_t pte_mkyoung(pte_t pte)     { pte_val(pte) |= __ACCESS_BITS; return pte; }
 
 /*
- * The smp_read_barrier_depends() in the following functions are required to
- * order the load of *dir (the pointer in the top level page table) with any
- * subsequent load of the returned pmd_t *ret (ret is data dependent on *dir).
+ * The smp_rmb() in the following functions are required to order the load of
+ * *dir (the pointer in the top level page table) with any subsequent load of
+ * the returned pmd_t *ret (ret is data dependent on *dir).
  *
  * If this ordering is not enforced, the CPU might load an older value of
  * *ret, which may be uninitialized data. See mm/memory.c:__pte_alloc for
@@ -293,7 +293,7 @@ extern inline pte_t pte_mkyoung(pte_t pte)  { pte_val(pte) |= __ACCESS_BITS; retu
 extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
 {
        pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
-       smp_read_barrier_depends(); /* see above */
+       smp_rmb(); /* see above */
        return ret;
 }
 #define pmd_offset pmd_offset
@@ -303,7 +303,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
 {
        pte_t *ret = (pte_t *) pmd_page_vaddr(*dir)
                + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1));
-       smp_read_barrier_depends(); /* see above */
+       smp_rmb(); /* see above */
        return ret;
 }
 #define pte_offset_kernel pte_offset_kernel
diff --git a/arch/alpha/include/asm/rwonce.h b/arch/alpha/include/asm/rwonce.h
new file mode 100644 (file)
index 0000000..35542bc
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/barrier.h>
+
+/*
+ * Alpha is apparently daft enough to reorder address-dependent loads
+ * on some CPU implementations. Knock some common sense into it with
+ * a memory barrier in READ_ONCE().
+ *
+ * For the curious, more information about this unusual reordering is
+ * available in chapter 15 of the "perfbook":
+ *
+ *  https://kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html
+ *
+ */
+#define __READ_ONCE(x)                                                 \
+({                                                                     \
+       __unqual_scalar_typeof(x) __x =                                 \
+               (*(volatile typeof(__x) *)(&(x)));                      \
+       mb();                                                           \
+       (typeof(x))__x;                                                 \
+})
+
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
index 7298ce8..c614857 100644 (file)
@@ -14,8 +14,6 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define ATOMIC_INIT(i) { (i) }
-
 #ifndef CONFIG_ARC_PLAT_EZNPS
 
 #define atomic_read(v)  READ_ONCE((v)->counter)
index 3481165..9b1a24c 100644 (file)
 
                        comphy: phy@18300 {
                                compatible = "marvell,armada-380-comphy";
-                               reg = <0x18300 0x100>;
+                               reg-names = "comphy", "conf";
+                               reg = <0x18300 0x100>, <0x18460 4>;
                                #address-cells = <1>;
                                #size-cells = <0>;
 
index 756f3a9..12997da 100644 (file)
 
        pinctrl_usbotg: usbotggrp {
                fsl,pins = <
-                       MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x17059
+                       MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID 0x17059
                >;
        };
 
                        MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17070
                        MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17070
                        MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17070
+                       MX6QDL_PAD_GPIO_1__GPIO1_IO01  0x1b0b0
                >;
        };
 
index 8259244..14fd1de 100644 (file)
@@ -99,7 +99,7 @@
 &fec2 {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet2>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ethphy0>;
        fsl,magic-packet;
        status = "okay";
index 3e5fb72..c99aa27 100644 (file)
 &fec2 {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet2>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ethphy2>;
        status = "okay";
 };
index db640ba..8b3d64c 100644 (file)
 
 &gbe0 {
        phy-handle = <&ethphy0>;
-       phy-mode = "rgmii-id";
+       phy-mode = "rgmii-rxid";
        status = "okay";
 };
 
index bf531ef..0f95a6e 100644 (file)
                default-pool {
                        compatible = "shared-dma-pool";
                        size = <0x6000000>;
-                       alloc-ranges = <0x4a000000 0x6000000>;
+                       alloc-ranges = <0x40000000 0x10000000>;
                        reusable;
                        linux,cma-default;
                };
index e6b0367..c2b4fbf 100644 (file)
                default-pool {
                        compatible = "shared-dma-pool";
                        size = <0x6000000>;
-                       alloc-ranges = <0x4a000000 0x6000000>;
+                       alloc-ranges = <0x40000000 0x10000000>;
                        reusable;
                        linux,cma-default;
                };
index ffe1d10..6d6a379 100644 (file)
                default-pool {
                        compatible = "shared-dma-pool";
                        size = <0x6000000>;
-                       alloc-ranges = <0x4a000000 0x6000000>;
+                       alloc-ranges = <0x40000000 0x10000000>;
                        reusable;
                        linux,cma-default;
                };
index 5cbd4a6..3f13a76 100644 (file)
@@ -39,7 +39,7 @@
  * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
  * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
  * at:
- * http://www.intel.com/products/processor/manuals/
+ * https://www.intel.com/products/processor/manuals/
  * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
  * Volume 2B: Instruction Set Reference, N-Z
  *
index a00fd32..f13401f 100644 (file)
@@ -16,6 +16,7 @@
 #include <crypto/gf128mul.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
+#include <linux/jump_label.h>
 #include <linux/module.h>
 
 MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
@@ -27,12 +28,8 @@ MODULE_ALIAS_CRYPTO("ghash");
 #define GHASH_DIGEST_SIZE      16
 
 struct ghash_key {
-       u64     h[2];
-       u64     h2[2];
-       u64     h3[2];
-       u64     h4[2];
-
        be128   k;
+       u64     h[][2];
 };
 
 struct ghash_desc_ctx {
@@ -46,16 +43,12 @@ struct ghash_async_ctx {
 };
 
 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
-                                      struct ghash_key const *k,
-                                      const char *head);
+                                      u64 const h[][2], const char *head);
 
 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
-                                     struct ghash_key const *k,
-                                     const char *head);
+                                     u64 const h[][2], const char *head);
 
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
-                                 struct ghash_key const *k,
-                                 const char *head);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -70,7 +63,10 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 {
        if (likely(crypto_simd_usable())) {
                kernel_neon_begin();
-               pmull_ghash_update(blocks, dg, src, key, head);
+               if (static_branch_likely(&use_p64))
+                       pmull_ghash_update_p64(blocks, dg, src, key->h, head);
+               else
+                       pmull_ghash_update_p8(blocks, dg, src, key->h, head);
                kernel_neon_end();
        } else {
                be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -161,25 +157,26 @@ static int ghash_setkey(struct crypto_shash *tfm,
                        const u8 *inkey, unsigned int keylen)
 {
        struct ghash_key *key = crypto_shash_ctx(tfm);
-       be128 h;
 
        if (keylen != GHASH_BLOCK_SIZE)
                return -EINVAL;
 
        /* needed for the fallback */
        memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-       ghash_reflect(key->h, &key->k);
+       ghash_reflect(key->h[0], &key->k);
 
-       h = key->k;
-       gf128mul_lle(&h, &key->k);
-       ghash_reflect(key->h2, &h);
+       if (static_branch_likely(&use_p64)) {
+               be128 h = key->k;
 
-       gf128mul_lle(&h, &key->k);
-       ghash_reflect(key->h3, &h);
+               gf128mul_lle(&h, &key->k);
+               ghash_reflect(key->h[1], &h);
 
-       gf128mul_lle(&h, &key->k);
-       ghash_reflect(key->h4, &h);
+               gf128mul_lle(&h, &key->k);
+               ghash_reflect(key->h[2], &h);
 
+               gf128mul_lle(&h, &key->k);
+               ghash_reflect(key->h[3], &h);
+       }
        return 0;
 }
 
@@ -195,7 +192,7 @@ static struct shash_alg ghash_alg = {
        .base.cra_driver_name   = "ghash-ce-sync",
        .base.cra_priority      = 300 - 1,
        .base.cra_blocksize     = GHASH_BLOCK_SIZE,
-       .base.cra_ctxsize       = sizeof(struct ghash_key),
+       .base.cra_ctxsize       = sizeof(struct ghash_key) + sizeof(u64[2]),
        .base.cra_module        = THIS_MODULE,
 };
 
@@ -354,10 +351,10 @@ static int __init ghash_ce_mod_init(void)
        if (!(elf_hwcap & HWCAP_NEON))
                return -ENODEV;
 
-       if (elf_hwcap2 & HWCAP2_PMULL)
-               pmull_ghash_update = pmull_ghash_update_p64;
-       else
-               pmull_ghash_update = pmull_ghash_update_p8;
+       if (elf_hwcap2 & HWCAP2_PMULL) {
+               ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
+               static_branch_enable(&use_p64);
+       }
 
        err = crypto_register_shash(&ghash_alg);
        if (err)
index f82cd8c..1c8b685 100644 (file)
@@ -13,7 +13,7 @@
 @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
 @ project. The module is, however, dual licensed under OpenSSL and
 @ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
+@ details see https://www.openssl.org/~appro/cryptogams/.
 @ ====================================================================
 
 @ sha1_block procedure for ARMv4.
index a03cf4d..9f96ff4 100644 (file)
@@ -13,7 +13,7 @@
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
+# details see https://www.openssl.org/~appro/cryptogams/.
 # ====================================================================
 
 # SHA256 block procedure for ARMv4. May 2007.
index 054aae0..ea04b2a 100644 (file)
@@ -12,7 +12,7 @@
 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 @ project. The module is, however, dual licensed under OpenSSL and
 @ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
+@ details see https://www.openssl.org/~appro/cryptogams/.
 @ ====================================================================
 
 @ SHA256 block procedure for ARMv4. May 2007.
index 788c17b..69df689 100644 (file)
@@ -13,7 +13,7 @@
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
+# details see https://www.openssl.org/~appro/cryptogams/.
 # ====================================================================
 
 # SHA512 block procedure for ARMv4. September 2007.
@@ -43,7 +43,7 @@
 # terms it's 22.6 cycles per byte, which is disappointing result.
 # Technical writers asserted that 3-way S4 pipeline can sustain
 # multiple NEON instructions per cycle, but dual NEON issue could
-# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+# not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
 # for further details. On side note Cortex-A15 processes one byte in
 # 16 cycles.
 
index 710ea30..cb147db 100644 (file)
@@ -12,7 +12,7 @@
 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 @ project. The module is, however, dual licensed under OpenSSL and
 @ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
+@ details see https://www.openssl.org/~appro/cryptogams/.
 @ ====================================================================
 
 @ SHA512 block procedure for ARMv4. September 2007.
@@ -42,7 +42,7 @@
 @ terms it's 22.6 cycles per byte, which is disappointing result.
 @ Technical writers asserted that 3-way S4 pipeline can sustain
 @ multiple NEON instructions per cycle, but dual NEON issue could
-@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+@ not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
 @ for further details. On side note Cortex-A15 processes one byte in
 @ 16 cycles.
 
index 75bb2c5..455eb19 100644 (file)
@@ -15,8 +15,6 @@
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
 
-#define ATOMIC_INIT(i) { (i) }
-
 #ifdef __KERNEL__
 
 /*
index f44f448..e2fcb3c 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef _ASM_ARM_PERCPU_H_
 #define _ASM_ARM_PERCPU_H_
 
+register unsigned long current_stack_pointer asm ("sp");
+
 /*
  * Same as asm-generic/percpu.h, except that we store the per cpu offset
  * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
index 3609a69..536b6b9 100644 (file)
@@ -75,11 +75,6 @@ struct thread_info {
        .addr_limit     = KERNEL_DS,                                    \
 }
 
-/*
- * how to get the current stack pointer in C
- */
-register unsigned long current_stack_pointer asm ("sp");
-
 /*
  * how to get the thread information struct from C
  */
index 435aba2..e0593cf 100644 (file)
@@ -16,8 +16,9 @@
 /* Enable topology flag updates */
 #define arch_update_cpu_topology topology_update_cpu_topology
 
-/* Replace task scheduler's default thermal pressure retrieve API */
+/* Replace task scheduler's default thermal pressure API */
 #define arch_scale_thermal_pressure topology_get_thermal_pressure
+#define arch_set_thermal_pressure   topology_set_thermal_pressure
 
 #else
 
index 36dc185..1b207cf 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
 #include <asm/errno.h>
 #include <asm/unistd.h>
 #include <asm/vdso/cp15.h>
index 02ca7ad..7fff88e 100644 (file)
@@ -683,6 +683,12 @@ static void disable_single_step(struct perf_event *bp)
        arch_install_hw_breakpoint(bp);
 }
 
+static int watchpoint_fault_on_uaccess(struct pt_regs *regs,
+                                      struct arch_hw_breakpoint *info)
+{
+       return !user_mode(regs) && info->ctrl.privilege == ARM_BREAKPOINT_USER;
+}
+
 static void watchpoint_handler(unsigned long addr, unsigned int fsr,
                               struct pt_regs *regs)
 {
@@ -742,16 +748,27 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
                }
 
                pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+
+               /*
+                * If we triggered a user watchpoint from a uaccess routine,
+                * then handle the stepping ourselves since userspace really
+                * can't help us with this.
+                */
+               if (watchpoint_fault_on_uaccess(regs, info))
+                       goto step;
+
                perf_bp_event(wp, regs);
 
                /*
-                * If no overflow handler is present, insert a temporary
-                * mismatch breakpoint so we can single-step over the
-                * watchpoint trigger.
+                * Defer stepping to the overflow handler if one is installed.
+                * Otherwise, insert a temporary mismatch breakpoint so that
+                * we can single-step over the watchpoint trigger.
                 */
-               if (is_default_overflow_handler(wp))
-                       enable_single_step(wp, instruction_pointer(regs));
+               if (!is_default_overflow_handler(wp))
+                       goto unlock;
 
+step:
+               enable_single_step(wp, instruction_pointer(regs));
 unlock:
                rcu_read_unlock();
        }
index 6bfdca4..fddd08a 100644 (file)
@@ -184,6 +184,7 @@ static void __init patch_vdso(void *ehdr)
        if (!cntvct_ok) {
                vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
                vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
+               vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64");
        }
 }
 
index 628028b..bcd8261 100644 (file)
@@ -966,7 +966,7 @@ void __init create_mapping_late(struct mm_struct *mm, struct map_desc *md,
        pud_t *pud;
 
        p4d = p4d_alloc(mm, pgd_offset(mm, md->virtual), md->virtual);
-       if (!WARN_ON(!p4d))
+       if (WARN_ON(!p4d))
                return;
        pud = pud_alloc(mm, p4d, md->virtual);
        if (WARN_ON(!pud))
index 66dc41f..73aee72 100644 (file)
@@ -118,6 +118,7 @@ config ARM64
        select GENERIC_STRNLEN_USER
        select GENERIC_TIME_VSYSCALL
        select GENERIC_GETTIMEOFDAY
+       select GENERIC_VDSO_TIME_NS
        select HANDLE_DOMAIN_IRQ
        select HARDIRQS_SW_RESEND
        select HAVE_PCI
@@ -1327,6 +1328,8 @@ config SWP_EMULATION
          ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that
          they are always undefined. Say Y here to enable software
          emulation of these instructions for userspace using LDXR/STXR.
+         This feature can be controlled at runtime with the abi.swp
+         sysctl which is disabled by default.
 
          In some older versions of glibc [<=2.8] SWP is used during futex
          trylock() operations with the assumption that the code will not
@@ -1353,7 +1356,8 @@ config CP15_BARRIER_EMULATION
          Say Y here to enable software emulation of these
          instructions for AArch32 userspace code. When this option is
          enabled, CP15 barrier usage is traced which can help
-         identify software that needs updating.
+         identify software that needs updating. This feature can be
+         controlled at runtime with the abi.cp15_barrier sysctl.
 
          If unsure, say Y
 
@@ -1364,7 +1368,8 @@ config SETEND_EMULATION
          AArch32 EL0, and is deprecated in ARMv8.
 
          Say Y here to enable software emulation of the instruction
-         for AArch32 userspace code.
+         for AArch32 userspace code. This feature can be controlled
+         at runtime with the abi.setend sysctl.
 
          Note: All the cpus on the system must have mixed endian support at EL0
          for this feature to be enabled. If a new CPU - which doesn't support mixed
@@ -1596,6 +1601,20 @@ config ARM64_AMU_EXTN
          correctly reflect reality. Most commonly, the value read will be 0,
          indicating that the counter is not enabled.
 
+config AS_HAS_ARMV8_4
+       def_bool $(cc-option,-Wa$(comma)-march=armv8.4-a)
+
+config ARM64_TLB_RANGE
+       bool "Enable support for tlbi range feature"
+       default y
+       depends on AS_HAS_ARMV8_4
+       help
+         ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a
+         range of input addresses.
+
+         The feature introduces new assembly instructions, and they were
+         support when binutils >= 2.30.
+
 endmenu
 
 menu "ARMv8.5 architectural features"
index 70f5905..55bc854 100644 (file)
@@ -82,11 +82,18 @@ endif
 # compiler to generate them and consequently to break the single image contract
 # we pass it only to the assembler. This option is utilized only in case of non
 # integrated assemblers.
+ifneq ($(CONFIG_AS_HAS_ARMV8_4), y)
 branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a
 endif
+endif
 
 KBUILD_CFLAGS += $(branch-prot-flags-y)
 
+ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
+# make sure to pass the newest target architecture to -march.
+KBUILD_CFLAGS  += -Wa,-march=armv8.4-a
+endif
+
 ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
 KBUILD_CFLAGS  += -ffixed-x18
 endif
index 78b1361..9ce78a7 100644 (file)
                        resets = <&ccu RST_BUS_VE>;
                        interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
                        allwinner,sram = <&ve_sram 1>;
+                       iommus = <&iommu 3>;
                };
 
                gpu: gpu@1800000 {
index 2ca7ba6..39273b5 100644 (file)
@@ -66,6 +66,7 @@ CONFIG_SCHED_SMT=y
 CONFIG_NUMA=y
 CONFIG_SECCOMP=y
 CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
 CONFIG_CRASH_DUMP=y
 CONFIG_XEN=y
 CONFIG_COMPAT=y
index 22831d3..da10348 100644 (file)
@@ -31,12 +31,8 @@ MODULE_ALIAS_CRYPTO("ghash");
 #define GCM_IV_SIZE            12
 
 struct ghash_key {
-       u64                     h[2];
-       u64                     h2[2];
-       u64                     h3[2];
-       u64                     h4[2];
-
        be128                   k;
+       u64                     h[][2];
 };
 
 struct ghash_desc_ctx {
@@ -51,22 +47,18 @@ struct gcm_aes_ctx {
 };
 
 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
-                                      struct ghash_key const *k,
-                                      const char *head);
+                                      u64 const h[][2], const char *head);
 
 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
-                                     struct ghash_key const *k,
-                                     const char *head);
+                                     u64 const h[][2], const char *head);
 
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
-                                 struct ghash_key const *k, u64 dg[],
-                                 u8 ctr[], u32 const rk[], int rounds,
-                                 u8 tag[]);
+                                 u64 const h[][2], u64 dg[], u8 ctr[],
+                                 u32 const rk[], int rounds, u8 tag[]);
 
 asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
-                                 struct ghash_key const *k, u64 dg[],
-                                 u8 ctr[], u32 const rk[], int rounds,
-                                 u8 tag[]);
+                                 u64 const h[][2], u64 dg[], u8 ctr[],
+                                 u32 const rk[], int rounds, u8 tag[]);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -77,48 +69,51 @@ static int ghash_init(struct shash_desc *desc)
 }
 
 static void ghash_do_update(int blocks, u64 dg[], const char *src,
-                           struct ghash_key *key, const char *head,
-                           void (*simd_update)(int blocks, u64 dg[],
-                                               const char *src,
-                                               struct ghash_key const *k,
-                                               const char *head))
+                           struct ghash_key *key, const char *head)
 {
-       if (likely(crypto_simd_usable() && simd_update)) {
-               kernel_neon_begin();
-               simd_update(blocks, dg, src, key, head);
-               kernel_neon_end();
-       } else {
-               be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+       be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
 
-               do {
-                       const u8 *in = src;
-
-                       if (head) {
-                               in = head;
-                               blocks++;
-                               head = NULL;
-                       } else {
-                               src += GHASH_BLOCK_SIZE;
-                       }
+       do {
+               const u8 *in = src;
+
+               if (head) {
+                       in = head;
+                       blocks++;
+                       head = NULL;
+               } else {
+                       src += GHASH_BLOCK_SIZE;
+               }
 
-                       crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
-                       gf128mul_lle(&dst, &key->k);
-               } while (--blocks);
+               crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+               gf128mul_lle(&dst, &key->k);
+       } while (--blocks);
 
-               dg[0] = be64_to_cpu(dst.b);
-               dg[1] = be64_to_cpu(dst.a);
+       dg[0] = be64_to_cpu(dst.b);
+       dg[1] = be64_to_cpu(dst.a);
+}
+
+static __always_inline
+void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
+                         struct ghash_key *key, const char *head,
+                         void (*simd_update)(int blocks, u64 dg[],
+                                             const char *src,
+                                             u64 const h[][2],
+                                             const char *head))
+{
+       if (likely(crypto_simd_usable())) {
+               kernel_neon_begin();
+               simd_update(blocks, dg, src, key->h, head);
+               kernel_neon_end();
+       } else {
+               ghash_do_update(blocks, dg, src, key, head);
        }
 }
 
 /* avoid hogging the CPU for too long */
 #define MAX_BLOCKS     (SZ_64K / GHASH_BLOCK_SIZE)
 
-static int __ghash_update(struct shash_desc *desc, const u8 *src,
-                         unsigned int len,
-                         void (*simd_update)(int blocks, u64 dg[],
-                                             const char *src,
-                                             struct ghash_key const *k,
-                                             const char *head))
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+                       unsigned int len)
 {
        struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
        unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -143,9 +138,9 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
                do {
                        int chunk = min(blocks, MAX_BLOCKS);
 
-                       ghash_do_update(chunk, ctx->digest, src, key,
-                                       partial ? ctx->buf : NULL,
-                                       simd_update);
+                       ghash_do_simd_update(chunk, ctx->digest, src, key,
+                                            partial ? ctx->buf : NULL,
+                                            pmull_ghash_update_p8);
 
                        blocks -= chunk;
                        src += chunk * GHASH_BLOCK_SIZE;
@@ -157,39 +152,7 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
        return 0;
 }
 
-static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
-                          unsigned int len)
-{
-       return __ghash_update(desc, src, len, pmull_ghash_update_p8);
-}
-
-static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
-                           unsigned int len)
-{
-       return __ghash_update(desc, src, len, pmull_ghash_update_p64);
-}
-
-static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
-{
-       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-       unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
-
-       if (partial) {
-               struct ghash_key *key = crypto_shash_ctx(desc->tfm);
-
-               memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
-
-               ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
-                               pmull_ghash_update_p8);
-       }
-       put_unaligned_be64(ctx->digest[1], dst);
-       put_unaligned_be64(ctx->digest[0], dst + 8);
-
-       *ctx = (struct ghash_desc_ctx){};
-       return 0;
-}
-
-static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
+static int ghash_final(struct shash_desc *desc, u8 *dst)
 {
        struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
        unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -199,8 +162,8 @@ static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
 
                memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-               ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
-                               pmull_ghash_update_p64);
+               ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL,
+                                    pmull_ghash_update_p8);
        }
        put_unaligned_be64(ctx->digest[1], dst);
        put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -220,29 +183,6 @@ static void ghash_reflect(u64 h[], const be128 *k)
                h[1] ^= 0xc200000000000000UL;
 }
 
-static int __ghash_setkey(struct ghash_key *key,
-                         const u8 *inkey, unsigned int keylen)
-{
-       be128 h;
-
-       /* needed for the fallback */
-       memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-
-       ghash_reflect(key->h, &key->k);
-
-       h = key->k;
-       gf128mul_lle(&h, &key->k);
-       ghash_reflect(key->h2, &h);
-
-       gf128mul_lle(&h, &key->k);
-       ghash_reflect(key->h3, &h);
-
-       gf128mul_lle(&h, &key->k);
-       ghash_reflect(key->h4, &h);
-
-       return 0;
-}
-
 static int ghash_setkey(struct crypto_shash *tfm,
                        const u8 *inkey, unsigned int keylen)
 {
@@ -251,38 +191,28 @@ static int ghash_setkey(struct crypto_shash *tfm,
        if (keylen != GHASH_BLOCK_SIZE)
                return -EINVAL;
 
-       return __ghash_setkey(key, inkey, keylen);
+       /* needed for the fallback */
+       memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
+
+       ghash_reflect(key->h[0], &key->k);
+       return 0;
 }
 
-static struct shash_alg ghash_alg[] = {{
+static struct shash_alg ghash_alg = {
        .base.cra_name          = "ghash",
        .base.cra_driver_name   = "ghash-neon",
        .base.cra_priority      = 150,
        .base.cra_blocksize     = GHASH_BLOCK_SIZE,
-       .base.cra_ctxsize       = sizeof(struct ghash_key),
-       .base.cra_module        = THIS_MODULE,
-
-       .digestsize             = GHASH_DIGEST_SIZE,
-       .init                   = ghash_init,
-       .update                 = ghash_update_p8,
-       .final                  = ghash_final_p8,
-       .setkey                 = ghash_setkey,
-       .descsize               = sizeof(struct ghash_desc_ctx),
-}, {
-       .base.cra_name          = "ghash",
-       .base.cra_driver_name   = "ghash-ce",
-       .base.cra_priority      = 200,
-       .base.cra_blocksize     = GHASH_BLOCK_SIZE,
-       .base.cra_ctxsize       = sizeof(struct ghash_key),
+       .base.cra_ctxsize       = sizeof(struct ghash_key) + sizeof(u64[2]),
        .base.cra_module        = THIS_MODULE,
 
        .digestsize             = GHASH_DIGEST_SIZE,
        .init                   = ghash_init,
-       .update                 = ghash_update_p64,
-       .final                  = ghash_final_p64,
+       .update                 = ghash_update,
+       .final                  = ghash_final,
        .setkey                 = ghash_setkey,
        .descsize               = sizeof(struct ghash_desc_ctx),
-}};
+};
 
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
@@ -301,6 +231,7 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
 {
        struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
        u8 key[GHASH_BLOCK_SIZE];
+       be128 h;
        int ret;
 
        ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
@@ -309,7 +240,22 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
 
        aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
 
-       return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
+       /* needed for the fallback */
+       memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
+
+       ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k);
+
+       h = ctx->ghash_key.k;
+       gf128mul_lle(&h, &ctx->ghash_key.k);
+       ghash_reflect(ctx->ghash_key.h[1], &h);
+
+       gf128mul_lle(&h, &ctx->ghash_key.k);
+       ghash_reflect(ctx->ghash_key.h[2], &h);
+
+       gf128mul_lle(&h, &ctx->ghash_key.k);
+       ghash_reflect(ctx->ghash_key.h[3], &h);
+
+       return 0;
 }
 
 static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
@@ -341,9 +287,9 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
        if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
                int blocks = count / GHASH_BLOCK_SIZE;
 
-               ghash_do_update(blocks, dg, src, &ctx->ghash_key,
-                               *buf_count ? buf : NULL,
-                               pmull_ghash_update_p64);
+               ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key,
+                                    *buf_count ? buf : NULL,
+                                    pmull_ghash_update_p64);
 
                src += blocks * GHASH_BLOCK_SIZE;
                count %= GHASH_BLOCK_SIZE;
@@ -387,8 +333,8 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
 
        if (buf_count) {
                memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
-               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
-                               pmull_ghash_update_p64);
+               ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL,
+                                    pmull_ghash_update_p64);
        }
 }
 
@@ -433,8 +379,8 @@ static int gcm_encrypt(struct aead_request *req)
                        }
 
                        kernel_neon_begin();
-                       pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
-                                         iv, ctx->aes_key.key_enc, nrounds,
+                       pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+                                         dg, iv, ctx->aes_key.key_enc, nrounds,
                                          tag);
                        kernel_neon_end();
 
@@ -464,7 +410,7 @@ static int gcm_encrypt(struct aead_request *req)
                        } while (--remaining > 0);
 
                        ghash_do_update(blocks, dg, walk.dst.virt.addr,
-                                       &ctx->ghash_key, NULL, NULL);
+                                       &ctx->ghash_key, NULL);
 
                        err = skcipher_walk_done(&walk,
                                                 walk.nbytes % AES_BLOCK_SIZE);
@@ -483,7 +429,7 @@ static int gcm_encrypt(struct aead_request *req)
 
                tag = (u8 *)&lengths;
                ghash_do_update(1, dg, tag, &ctx->ghash_key,
-                               walk.nbytes ? buf : NULL, NULL);
+                               walk.nbytes ? buf : NULL);
 
                if (walk.nbytes)
                        err = skcipher_walk_done(&walk, 0);
@@ -547,8 +493,8 @@ static int gcm_decrypt(struct aead_request *req)
                        }
 
                        kernel_neon_begin();
-                       pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
-                                         iv, ctx->aes_key.key_enc, nrounds,
+                       pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
+                                         dg, iv, ctx->aes_key.key_enc, nrounds,
                                          tag);
                        kernel_neon_end();
 
@@ -568,7 +514,7 @@ static int gcm_decrypt(struct aead_request *req)
                        u8 *dst = walk.dst.virt.addr;
 
                        ghash_do_update(blocks, dg, walk.src.virt.addr,
-                                       &ctx->ghash_key, NULL, NULL);
+                                       &ctx->ghash_key, NULL);
 
                        do {
                                aes_encrypt(&ctx->aes_key, buf, iv);
@@ -591,7 +537,7 @@ static int gcm_decrypt(struct aead_request *req)
 
                tag = (u8 *)&lengths;
                ghash_do_update(1, dg, tag, &ctx->ghash_key,
-                               walk.nbytes ? buf : NULL, NULL);
+                               walk.nbytes ? buf : NULL);
 
                if (walk.nbytes) {
                        aes_encrypt(&ctx->aes_key, buf, iv);
@@ -635,43 +581,28 @@ static struct aead_alg gcm_aes_alg = {
        .base.cra_driver_name   = "gcm-aes-ce",
        .base.cra_priority      = 300,
        .base.cra_blocksize     = 1,
-       .base.cra_ctxsize       = sizeof(struct gcm_aes_ctx),
+       .base.cra_ctxsize       = sizeof(struct gcm_aes_ctx) +
+                                 4 * sizeof(u64[2]),
        .base.cra_module        = THIS_MODULE,
 };
 
 static int __init ghash_ce_mod_init(void)
 {
-       int ret;
-
        if (!cpu_have_named_feature(ASIMD))
                return -ENODEV;
 
        if (cpu_have_named_feature(PMULL))
-               ret = crypto_register_shashes(ghash_alg,
-                                             ARRAY_SIZE(ghash_alg));
-       else
-               /* only register the first array element */
-               ret = crypto_register_shash(ghash_alg);
+               return crypto_register_aead(&gcm_aes_alg);
 
-       if (ret)
-               return ret;
-
-       if (cpu_have_named_feature(PMULL)) {
-               ret = crypto_register_aead(&gcm_aes_alg);
-               if (ret)
-                       crypto_unregister_shashes(ghash_alg,
-                                                 ARRAY_SIZE(ghash_alg));
-       }
-       return ret;
+       return crypto_register_shash(&ghash_alg);
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
        if (cpu_have_named_feature(PMULL))
-               crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
+               crypto_unregister_aead(&gcm_aes_alg);
        else
-               crypto_unregister_shash(ghash_alg);
-       crypto_unregister_aead(&gcm_aes_alg);
+               crypto_unregister_shash(&ghash_alg);
 }
 
 static const struct cpu_feature ghash_cpu_feature[] = {
index a45366c..bd68e1b 100644 (file)
 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
 
 /* ACPI table mapping after acpi_permanent_mmap is set */
-static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
-                                           acpi_size size)
-{
-       /* For normal memory we already have a cacheable mapping. */
-       if (memblock_is_map_memory(phys))
-               return (void __iomem *)__phys_to_virt(phys);
-
-       /*
-        * We should still honor the memory's attribute here because
-        * crash dump kernel possibly excludes some ACPI (reclaim)
-        * regions from memblock list.
-        */
-       return __ioremap(phys, size, __acpi_get_mem_attribute(phys));
-}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
 #define acpi_os_ioremap acpi_os_ioremap
 
 typedef u64 phys_cpuid_t;
index 12f0eb5..619db9b 100644 (file)
@@ -77,9 +77,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
        "663:\n\t"                                                      \
        newinstr "\n"                                                   \
        "664:\n\t"                                                      \
-       ".previous\n\t"                                                 \
        ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
-       ".org   . - (662b-661b) + (664b-663b)\n"                        \
+       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
+       ".previous\n"                                                   \
        ".endif\n"
 
 #define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
index a08890d..015ddff 100644 (file)
@@ -99,8 +99,6 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
        return __lse_ll_sc_body(atomic64_dec_if_positive, v);
 }
 
-#define ATOMIC_INIT(i) { (i) }
-
 #define arch_atomic_read(v)                    __READ_ONCE((v)->counter)
 #define arch_atomic_set(v, i)                  __WRITE_ONCE(((v)->counter), (i))
 
index b6f7bc6..93a161b 100644 (file)
@@ -24,16 +24,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
        __uint128_t tmp;
        u64 sum;
+       int n = ihl; /* we want it signed */
 
        tmp = *(const __uint128_t *)iph;
        iph += 16;
-       ihl -= 4;
+       n -= 4;
        tmp += ((tmp >> 64) | (tmp << 64));
        sum = tmp >> 64;
        do {
                sum += *(const u32 *)iph;
                iph += 4;
-       } while (--ihl);
+       } while (--n > 0);
 
        sum += ((sum >> 32) | (sum << 32));
        return csum_fold((__force u32)(sum >> 32));
index d7b3bb0..07b643a 100644 (file)
@@ -62,7 +62,9 @@
 #define ARM64_HAS_GENERIC_AUTH                 52
 #define ARM64_HAS_32BIT_EL1                    53
 #define ARM64_BTI                              54
+#define ARM64_HAS_ARMv8_4_TTL                  55
+#define ARM64_HAS_TLB_RANGE                    56
 
-#define ARM64_NCAPS                            55
+#define ARM64_NCAPS                            57
 
 #endif /* __ASM_CPUCAPS_H */
index f7c3d1f..89b4f01 100644 (file)
@@ -692,6 +692,12 @@ static inline bool system_supports_bti(void)
        return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
 }
 
+static inline bool system_supports_tlb_range(void)
+{
+       return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
+               cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
+}
+
 #define ARM64_BP_HARDEN_UNKNOWN                -1
 #define ARM64_BP_HARDEN_WA_NEEDED      0
 #define ARM64_BP_HARDEN_NOT_REQUIRED   1
@@ -774,6 +780,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 }
 
 u32 get_kvm_ipa_limit(void);
+void dump_cpu_features(void);
 
 #endif /* __ASSEMBLY__ */
 
index 94ba0c5..5abf91e 100644 (file)
@@ -49,6 +49,8 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
                                 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at
 
+void __init arm64_hugetlb_cma_reserve(void);
+
 #include <asm-generic/hugetlb.h>
 
 #endif /* __ASM_HUGETLB_H */
index d683bcb..22f73fe 100644 (file)
@@ -95,6 +95,7 @@
 #define KERNEL_HWCAP_DGH               __khwcap2_feature(DGH)
 #define KERNEL_HWCAP_RNG               __khwcap2_feature(RNG)
 #define KERNEL_HWCAP_BTI               __khwcap2_feature(BTI)
+/* reserved for KERNEL_HWCAP_MTE       __khwcap2_feature(MTE) */
 
 /*
  * This yields a mask that user programs can use to figure out what
index 3bf626f..329fb15 100644 (file)
@@ -8,7 +8,7 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
-#include <linux/pgtable.h>
+#include <asm/pgtable-hwdef.h>
 #include <asm/sparsemem.h>
 
 /*
index c3e6fcc..e21d4a0 100644 (file)
@@ -380,9 +380,14 @@ struct kvm_vcpu_arch {
 #define vcpu_has_sve(vcpu) (system_supports_sve() && \
                            ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
 
-#define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \
-                                 system_supports_generic_auth()) && \
-                                ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH))
+#ifdef CONFIG_ARM64_PTR_AUTH
+#define vcpu_has_ptrauth(vcpu)                                         \
+       ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||                \
+         cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&               \
+        (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)
+#else
+#define vcpu_has_ptrauth(vcpu)         false
+#endif
 
 #define vcpu_gp_regs(v)                (&(v)->arch.ctxt.gp_regs)
 
index a1871bb..afa7225 100644 (file)
 #ifndef __ASM_MEMORY_H
 #define __ASM_MEMORY_H
 
-#include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/sizes.h>
-#include <linux/types.h>
-#include <asm/bug.h>
 #include <asm/page-def.h>
 
 /*
 #endif
 
 #ifndef __ASSEMBLY__
-extern u64                     vabits_actual;
-#define PAGE_END               (_PAGE_END(vabits_actual))
 
 #include <linux/bitops.h>
+#include <linux/compiler.h>
 #include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <asm/bug.h>
+
+extern u64                     vabits_actual;
+#define PAGE_END               (_PAGE_END(vabits_actual))
 
 extern s64                     physvirt_offset;
 extern s64                     memstart_addr;
@@ -322,6 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x)
        __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr));      \
 })
 
+void dump_mem_limit(void);
 #endif /* !ASSEMBLY */
 
 /*
index b0bd9b5..f2d7537 100644 (file)
@@ -175,7 +175,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
  * take CPU migration into account.
  */
 #define destroy_context(mm)            do { } while(0)
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
+void check_and_switch_context(struct mm_struct *mm);
 
 #define init_new_context(tsk,mm)       ({ atomic64_set(&(mm)->context.id, 0); 0; })
 
@@ -214,8 +214,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 
 static inline void __switch_mm(struct mm_struct *next)
 {
-       unsigned int cpu = smp_processor_id();
-
        /*
         * init_mm.pgd does not contain any user mappings and it is always
         * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -225,7 +223,7 @@ static inline void __switch_mm(struct mm_struct *next)
                return;
        }
 
-       check_and_switch_context(next, cpu);
+       check_and_switch_context(next);
 }
 
 static inline void
index e7765b6..2c2d7db 100644 (file)
 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD                                0x36
 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD                   0x37
 #define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD                   0x38
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD                 0x39
+#define ARMV8_PMUV3_PERFCTR_OP_RETIRED                         0x3A
+#define ARMV8_PMUV3_PERFCTR_OP_SPEC                            0x3B
+#define ARMV8_PMUV3_PERFCTR_STALL                              0x3C
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND                 0x3D
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND                        0x3E
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT                         0x3F
 
 /* Statistical profiling extension microarchitectural events */
 #define        ARMV8_SPE_PERFCTR_SAMPLE_POP                            0x4000
 #define        ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE                       0x4002
 #define        ARMV8_SPE_PERFCTR_SAMPLE_COLLISION                      0x4003
 
+/* AMUv1 architecture events */
+#define        ARMV8_AMU_PERFCTR_CNT_CYCLES                            0x4004
+#define        ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM                     0x4005
+
+/* long-latency read miss events */
+#define        ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS                     0x4006
+#define        ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD                  0x4009
+#define        ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS                     0x400A
+#define        ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD                  0x400B
+
+/* additional latency from alignment events */
+#define        ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT                      0x4020
+#define        ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT                        0x4021
+#define        ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT                        0x4022
+
+/* Armv8.5 Memory Tagging Extension events */
+#define        ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED                    0x4024
+#define        ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD                 0x4025
+#define        ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR                 0x4026
+
 /* ARMv8 recommended implementation defined event types */
 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD                      0x40
 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR                      0x41
index 9c91a8f..d400a4d 100644 (file)
@@ -29,7 +29,7 @@
  * Size mapped by an entry at level n ( 0 <= n <= 3)
  * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
  * in the final page. The maximum number of translation levels supported by
- * the architecture is 4. Hence, starting at at level n, we have further
+ * the architecture is 4. Hence, starting at level n, we have further
  * ((4 - n) - 1) levels of translation excluding the offset within the page.
  * So, the total number of bits mapped by an entry at level n is :
  *
  * Contiguous page definitions.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
-#define CONT_PTE_SHIFT         5
-#define CONT_PMD_SHIFT         5
+#define CONT_PTE_SHIFT         (5 + PAGE_SHIFT)
+#define CONT_PMD_SHIFT         (5 + PMD_SHIFT)
 #elif defined(CONFIG_ARM64_16K_PAGES)
-#define CONT_PTE_SHIFT         7
-#define CONT_PMD_SHIFT         5
+#define CONT_PTE_SHIFT         (7 + PAGE_SHIFT)
+#define CONT_PMD_SHIFT         (5 + PMD_SHIFT)
 #else
-#define CONT_PTE_SHIFT         4
-#define CONT_PMD_SHIFT         4
+#define CONT_PTE_SHIFT         (4 + PAGE_SHIFT)
+#define CONT_PMD_SHIFT         (4 + PMD_SHIFT)
 #endif
 
-#define CONT_PTES              (1 << CONT_PTE_SHIFT)
+#define CONT_PTES              (1 << (CONT_PTE_SHIFT - PAGE_SHIFT))
 #define CONT_PTE_SIZE          (CONT_PTES * PAGE_SIZE)
 #define CONT_PTE_MASK          (~(CONT_PTE_SIZE - 1))
-#define CONT_PMDS              (1 << CONT_PMD_SHIFT)
+#define CONT_PMDS              (1 << (CONT_PMD_SHIFT - PMD_SHIFT))
 #define CONT_PMD_SIZE          (CONT_PMDS * PMD_SIZE)
 #define CONT_PMD_MASK          (~(CONT_PMD_SIZE - 1))
-/* the the numerical offset of the PTE within a range of CONT_PTES */
+/* the numerical offset of the PTE within a range of CONT_PTES */
 #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
 
 /*
 #define PTE_S2_RDONLY          (_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR            (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 #define PTE_S2_XN              (_AT(pteval_t, 2) << 53)  /* XN[1:0] */
+#define PTE_S2_SW_RESVD                (_AT(pteval_t, 15) << 55) /* Reserved for SW */
 
 #define PMD_S2_RDONLY          (_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
 #define PMD_S2_RDWR            (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
 #define PMD_S2_XN              (_AT(pmdval_t, 2) << 53)  /* XN[1:0] */
+#define PMD_S2_SW_RESVD                (_AT(pmdval_t, 15) << 55) /* Reserved for SW */
 
 #define PUD_S2_RDONLY          (_AT(pudval_t, 1) << 6)   /* HAP[2:1] */
 #define PUD_S2_RDWR            (_AT(pudval_t, 3) << 6)   /* HAP[2:1] */
 #define TCR_TxSZ(x)            (TCR_T0SZ(x) | TCR_T1SZ(x))
 #define TCR_TxSZ_WIDTH         6
 #define TCR_T0SZ_MASK          (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ_MASK          (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET)
 
 #define TCR_EPD0_SHIFT         7
 #define TCR_EPD0_MASK          (UL(1) << TCR_EPD0_SHIFT)
index 758e2d1..d5d3fbe 100644 (file)
@@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
 extern void __pud_error(const char *file, int line, unsigned long val);
 extern void __pgd_error(const char *file, int line, unsigned long val);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+
+/* Set stride and tlb_level in flush_*_tlb_range */
+#define flush_pmd_tlb_range(vma, addr, end)    \
+       __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
+#define flush_pud_tlb_range(vma, addr, end)    \
+       __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
index 953b6a1..966ed30 100644 (file)
@@ -27,7 +27,7 @@
  *
  * Some code sections either automatically switch back to PSR.I or explicitly
  * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
- * in the  the priority mask, it indicates that PSR.I should be set and
+ * in the priority mask, it indicates that PSR.I should be set and
  * interrupt disabling temporarily does not rely on IRQ priorities.
  */
 #define GIC_PRIO_IRQON                 0xe0
index ea268d8..a0c8a0b 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/thread_info.h>
-#include <asm/pointer_auth.h>
 
 DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
 
index b767904..996bf98 100644 (file)
@@ -256,4 +256,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
        return (boundary - 1 < end - 1) ? boundary : end;
 }
 
+/*
+ * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
+ * the architectural page-table level.
+ */
+#define S2_NO_LEVEL_HINT       0
+#define S2_PUD_LEVEL           1
+#define S2_PMD_LEVEL           2
+#define S2_PTE_LEVEL           3
+
 #endif /* __ARM64_S2_PGTABLE_H_ */
index 463175f..554a7e8 100644 (file)
  */
 
 #define SYS_AMEVCNTR0_EL0(n)           SYS_AM_EL0(4 + ((n) >> 3), (n) & 7)
-#define SYS_AMEVTYPE0_EL0(n)           SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER0_EL0(n)          SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
 #define SYS_AMEVCNTR1_EL0(n)           SYS_AM_EL0(12 + ((n) >> 3), (n) & 7)
-#define SYS_AMEVTYPE1_EL0(n)           SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER1_EL0(n)          SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
 
 /* AMU v1: Fixed (architecturally defined) activity monitors */
 #define SYS_AMEVCNTR0_CORE_EL0         SYS_AMEVCNTR0_EL0(0)
 #define ID_AA64ISAR0_SHA1_SHIFT                8
 #define ID_AA64ISAR0_AES_SHIFT         4
 
+#define ID_AA64ISAR0_TLB_RANGE_NI      0x0
+#define ID_AA64ISAR0_TLB_RANGE         0x2
+
 /* id_aa64isar1 */
 #define ID_AA64ISAR1_I8MM_SHIFT                52
 #define ID_AA64ISAR1_DGH_SHIFT         48
 #define ID_AA64ZFR0_SVEVER_SVE2                0x1
 
 /* id_aa64mmfr0 */
+#define ID_AA64MMFR0_ECV_SHIFT         60
+#define ID_AA64MMFR0_FGT_SHIFT         56
+#define ID_AA64MMFR0_EXS_SHIFT         44
 #define ID_AA64MMFR0_TGRAN4_2_SHIFT    40
 #define ID_AA64MMFR0_TGRAN64_2_SHIFT   36
 #define ID_AA64MMFR0_TGRAN16_2_SHIFT   32
 #endif
 
 /* id_aa64mmfr1 */
+#define ID_AA64MMFR1_ETS_SHIFT         36
+#define ID_AA64MMFR1_TWED_SHIFT                32
+#define ID_AA64MMFR1_XNX_SHIFT         28
+#define ID_AA64MMFR1_SPECSEI_SHIFT     24
 #define ID_AA64MMFR1_PAN_SHIFT         20
 #define ID_AA64MMFR1_LOR_SHIFT         16
 #define ID_AA64MMFR1_HPD_SHIFT         12
 
 /* id_aa64mmfr2 */
 #define ID_AA64MMFR2_E0PD_SHIFT                60
+#define ID_AA64MMFR2_EVT_SHIFT         56
+#define ID_AA64MMFR2_BBM_SHIFT         52
+#define ID_AA64MMFR2_TTL_SHIFT         48
 #define ID_AA64MMFR2_FWB_SHIFT         40
+#define ID_AA64MMFR2_IDS_SHIFT         36
 #define ID_AA64MMFR2_AT_SHIFT          32
+#define ID_AA64MMFR2_ST_SHIFT          28
+#define ID_AA64MMFR2_NV_SHIFT          24
+#define ID_AA64MMFR2_CCIDX_SHIFT       20
 #define ID_AA64MMFR2_LVA_SHIFT         16
 #define ID_AA64MMFR2_IESB_SHIFT                12
 #define ID_AA64MMFR2_LSM_SHIFT         8
 #define ID_AA64MMFR2_CNP_SHIFT         0
 
 /* id_aa64dfr0 */
+#define ID_AA64DFR0_DOUBLELOCK_SHIFT   36
 #define ID_AA64DFR0_PMSVER_SHIFT       32
 #define ID_AA64DFR0_CTX_CMPS_SHIFT     28
 #define ID_AA64DFR0_WRPS_SHIFT         20
 #define ID_ISAR6_DP_SHIFT              4
 #define ID_ISAR6_JSCVT_SHIFT           0
 
+#define ID_MMFR0_INNERSHR_SHIFT                28
+#define ID_MMFR0_FCSE_SHIFT            24
+#define ID_MMFR0_AUXREG_SHIFT          20
+#define ID_MMFR0_TCM_SHIFT             16
+#define ID_MMFR0_SHARELVL_SHIFT                12
+#define ID_MMFR0_OUTERSHR_SHIFT                8
+#define ID_MMFR0_PMSA_SHIFT            4
+#define ID_MMFR0_VMSA_SHIFT            0
+
 #define ID_MMFR4_EVT_SHIFT             28
 #define ID_MMFR4_CCIDX_SHIFT           24
 #define ID_MMFR4_LSM_SHIFT             20
 #define ID_MMFR4_HPDS_SHIFT            16
 #define ID_MMFR4_CNP_SHIFT             12
 #define ID_MMFR4_XNX_SHIFT             8
+#define ID_MMFR4_AC2_SHIFT             4
 #define ID_MMFR4_SPECSEI_SHIFT         0
 
 #define ID_MMFR5_ETS_SHIFT             0
 
 #define ID_PFR0_DIT_SHIFT              24
 #define ID_PFR0_CSV2_SHIFT             16
+#define ID_PFR0_STATE3_SHIFT           12
+#define ID_PFR0_STATE2_SHIFT           8
+#define ID_PFR0_STATE1_SHIFT           4
+#define ID_PFR0_STATE0_SHIFT           0
+
+#define ID_DFR0_PERFMON_SHIFT          24
+#define ID_DFR0_MPROFDBG_SHIFT         20
+#define ID_DFR0_MMAPTRC_SHIFT          16
+#define ID_DFR0_COPTRC_SHIFT           12
+#define ID_DFR0_MMAPDBG_SHIFT          8
+#define ID_DFR0_COPSDBG_SHIFT          4
+#define ID_DFR0_COPDBG_SHIFT           0
 
 #define ID_PFR2_SSBS_SHIFT             4
 #define ID_PFR2_CSV3_SHIFT             0
 #define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN64_SUPPORTED
 #endif
 
+#define MVFR2_FPMISC_SHIFT             4
+#define MVFR2_SIMDMISC_SHIFT           0
+
+#define DCZID_DZP_SHIFT                        4
+#define DCZID_BS_SHIFT                 0
 
 /*
  * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
index b76df82..61c97d3 100644 (file)
@@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
 
+/*
+ * get the tlbi levels in arm64.  Default value is 0 if more than one
+ * of cleared_* is set or neither is set.
+ * Arm64 doesn't support p4ds now.
+ */
+static inline int tlb_get_level(struct mmu_gather *tlb)
+{
+       if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
+                                  tlb->cleared_puds ||
+                                  tlb->cleared_p4ds))
+               return 3;
+
+       if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
+                                  tlb->cleared_puds ||
+                                  tlb->cleared_p4ds))
+               return 2;
+
+       if (tlb->cleared_puds && !(tlb->cleared_ptes ||
+                                  tlb->cleared_pmds ||
+                                  tlb->cleared_p4ds))
+               return 1;
+
+       return 0;
+}
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
        struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
        bool last_level = !tlb->freed_tables;
        unsigned long stride = tlb_get_unmap_size(tlb);
+       int tlb_level = tlb_get_level(tlb);
 
        /*
         * If we're tearing down the address space then we only care about
@@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
                return;
        }
 
-       __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
+       __flush_tlb_range(&vma, tlb->start, tlb->end, stride,
+                         last_level, tlb_level);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
index bc39490..d493174 100644 (file)
@@ -10,6 +10,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/bitfield.h>
 #include <linux/mm_types.h>
 #include <linux/sched.h>
 #include <asm/cputype.h>
                __ta;                                           \
        })
 
+/*
+ * Get translation granule of the system, which is decided by
+ * PAGE_SIZE.  Used by TTL.
+ *  - 4KB      : 1
+ *  - 16KB     : 2
+ *  - 64KB     : 3
+ */
+#define TLBI_TTL_TG_4K         1
+#define TLBI_TTL_TG_16K                2
+#define TLBI_TTL_TG_64K                3
+
+static inline unsigned long get_trans_granule(void)
+{
+       switch (PAGE_SIZE) {
+       case SZ_4K:
+               return TLBI_TTL_TG_4K;
+       case SZ_16K:
+               return TLBI_TTL_TG_16K;
+       case SZ_64K:
+               return TLBI_TTL_TG_64K;
+       default:
+               return 0;
+       }
+}
+
+/*
+ * Level-based TLBI operations.
+ *
+ * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
+ * the level at which the invalidation must take place. If the level is
+ * wrong, no invalidation may take place. In the case where the level
+ * cannot be easily determined, a 0 value for the level parameter will
+ * perform a non-hinted invalidation.
+ *
+ * For Stage-2 invalidation, use the level values provided to that effect
+ * in asm/stage2_pgtable.h.
+ */
+#define TLBI_TTL_MASK          GENMASK_ULL(47, 44)
+
+#define __tlbi_level(op, addr, level) do {                             \
+       u64 arg = addr;                                                 \
+                                                                       \
+       if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) &&               \
+           level) {                                                    \
+               u64 ttl = level & 3;                                    \
+               ttl |= get_trans_granule() << 2;                        \
+               arg &= ~TLBI_TTL_MASK;                                  \
+               arg |= FIELD_PREP(TLBI_TTL_MASK, ttl);                  \
+       }                                                               \
+                                                                       \
+       __tlbi(op, arg);                                                \
+} while(0)
+
+#define __tlbi_user_level(op, arg, level) do {                         \
+       if (arm64_kernel_unmapped_at_el0())                             \
+               __tlbi_level(op, (arg | USER_ASID_FLAG), level);        \
+} while (0)
+
+/*
+ * This macro creates a properly formatted VA operand for the TLB RANGE.
+ * The value bit assignments are:
+ *
+ * +----------+------+-------+-------+-------+----------------------+
+ * |   ASID   |  TG  | SCALE |  NUM  |  TTL  |        BADDR         |
+ * +-----------------+-------+-------+-------+----------------------+
+ * |63      48|47  46|45   44|43   39|38   37|36                   0|
+ *
+ * The address range is determined by below formula:
+ * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ */
+#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl)                \
+       ({                                                      \
+               unsigned long __ta = (addr) >> PAGE_SHIFT;      \
+               __ta &= GENMASK_ULL(36, 0);                     \
+               __ta |= (unsigned long)(ttl) << 37;             \
+               __ta |= (unsigned long)(num) << 39;             \
+               __ta |= (unsigned long)(scale) << 44;           \
+               __ta |= get_trans_granule() << 46;              \
+               __ta |= (unsigned long)(asid) << 48;            \
+               __ta;                                           \
+       })
+
+/* These macros are used by the TLBI RANGE feature. */
+#define __TLBI_RANGE_PAGES(num, scale) \
+       ((unsigned long)((num) + 1) << (5 * (scale) + 1))
+#define MAX_TLBI_RANGE_PAGES           __TLBI_RANGE_PAGES(31, 3)
+
+/*
+ * Generate 'num' values from -1 to 30 with -1 rejected by the
+ * __flush_tlb_range() loop below.
+ */
+#define TLBI_RANGE_MASK                        GENMASK_ULL(4, 0)
+#define __TLBI_RANGE_NUM(pages, scale) \
+       ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+
 /*
  *     TLB Invalidation
  *     ================
@@ -179,34 +276,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
-                                    unsigned long stride, bool last_level)
+                                    unsigned long stride, bool last_level,
+                                    int tlb_level)
 {
+       int num = 0;
+       int scale = 0;
        unsigned long asid = ASID(vma->vm_mm);
        unsigned long addr;
+       unsigned long pages;
 
        start = round_down(start, stride);
        end = round_up(end, stride);
+       pages = (end - start) >> PAGE_SHIFT;
 
-       if ((end - start) >= (MAX_TLBI_OPS * stride)) {
+       /*
+        * When not uses TLB range ops, we can handle up to
+        * (MAX_TLBI_OPS - 1) pages;
+        * When uses TLB range ops, we can handle up to
+        * (MAX_TLBI_RANGE_PAGES - 1) pages.
+        */
+       if ((!system_supports_tlb_range() &&
+            (end - start) >= (MAX_TLBI_OPS * stride)) ||
+           pages >= MAX_TLBI_RANGE_PAGES) {
                flush_tlb_mm(vma->vm_mm);
                return;
        }
 
-       /* Convert the stride into units of 4k */
-       stride >>= 12;
+       dsb(ishst);
 
-       start = __TLBI_VADDR(start, asid);
-       end = __TLBI_VADDR(end, asid);
+       /*
+        * When the CPU does not support TLB range operations, flush the TLB
+        * entries one by one at the granularity of 'stride'. If the the TLB
+        * range ops are supported, then:
+        *
+        * 1. If 'pages' is odd, flush the first page through non-range
+        *    operations;
+        *
+        * 2. For remaining pages: the minimum range granularity is decided
+        *    by 'scale', so multiple range TLBI operations may be required.
+        *    Start from scale = 0, flush the corresponding number of pages
+        *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+        *    until no pages left.
+        *
+        * Note that certain ranges can be represented by either num = 31 and
+        * scale or num = 0 and scale + 1. The loop below favours the latter
+        * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+        */
+       while (pages > 0) {
+               if (!system_supports_tlb_range() ||
+                   pages % 2 == 1) {
+                       addr = __TLBI_VADDR(start, asid);
+                       if (last_level) {
+                               __tlbi_level(vale1is, addr, tlb_level);
+                               __tlbi_user_level(vale1is, addr, tlb_level);
+                       } else {
+                               __tlbi_level(vae1is, addr, tlb_level);
+                               __tlbi_user_level(vae1is, addr, tlb_level);
+                       }
+                       start += stride;
+                       pages -= stride >> PAGE_SHIFT;
+                       continue;
+               }
 
-       dsb(ishst);
-       for (addr = start; addr < end; addr += stride) {
-               if (last_level) {
-                       __tlbi(vale1is, addr);
-                       __tlbi_user(vale1is, addr);
-               } else {
-                       __tlbi(vae1is, addr);
-                       __tlbi_user(vae1is, addr);
+               num = __TLBI_RANGE_NUM(pages, scale);
+               if (num >= 0) {
+                       addr = __TLBI_VADDR_RANGE(start, asid, scale,
+                                                 num, tlb_level);
+                       if (last_level) {
+                               __tlbi(rvale1is, addr);
+                               __tlbi_user(rvale1is, addr);
+                       } else {
+                               __tlbi(rvae1is, addr);
+                               __tlbi_user(rvae1is, addr);
+                       }
+                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+                       pages -= __TLBI_RANGE_PAGES(num, scale);
                }
+               scale++;
        }
        dsb(ish);
 }
@@ -217,8 +363,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
        /*
         * We cannot use leaf-only invalidation here, since we may be invalidating
         * table entries as part of collapsing hugepages or moving page tables.
+        * Set the tlb_level to 0 because we can not get enough information here.
         */
-       __flush_tlb_range(vma, start, end, PAGE_SIZE, false);
+       __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
 }
 
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
index 0cc835d..e042f65 100644 (file)
@@ -34,8 +34,9 @@ void topology_scale_freq_tick(void);
 /* Enable topology flag updates */
 #define arch_update_cpu_topology topology_update_cpu_topology
 
-/* Replace task scheduler's default thermal pressure retrieve API */
+/* Replace task scheduler's default thermal pressure API */
 #define arch_scale_thermal_pressure topology_get_thermal_pressure
+#define arch_set_thermal_pressure   topology_set_thermal_pressure
 
 #include <asm-generic/topology.h>
 
index bc5c7b0..8d7c466 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 
 #include <asm/cpufeature.h>
+#include <asm/mmu.h>
 #include <asm/ptrace.h>
 #include <asm/memory.h>
 #include <asm/extable.h>
index 0746842..f99dcb9 100644 (file)
@@ -12,6 +12,8 @@
  */
 #define VDSO_LBASE     0x0
 
+#define __VVAR_PAGES    2
+
 #ifndef __ASSEMBLY__
 
 #include <generated/vdso-offsets.h>
index 9a625e8..75cbae6 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
 
@@ -152,6 +153,18 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
        return ret;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+       const struct vdso_data *ret;
+
+       /* See __arch_get_vdso_data(). */
+       asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data));
+
+       return ret;
+}
+#endif
+
 static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
 {
        return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
index afba6ba..9c29ad3 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
 #include <asm/unistd.h>
 
 #define VDSO_HAS_CLOCK_GETRES          1
@@ -96,6 +97,14 @@ const struct vdso_data *__arch_get_vdso_data(void)
        return _vdso_data;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+       return _timens_data;
+}
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
index 2d6ba1c..912162f 100644 (file)
@@ -74,5 +74,6 @@
 #define HWCAP2_DGH             (1 << 15)
 #define HWCAP2_RNG             (1 << 16)
 #define HWCAP2_BTI             (1 << 17)
+/* reserved for HWCAP2_MTE     (1 << 18) */
 
 #endif /* _UAPI__ASM_HWCAP_H */
index 8b0ebce..0c796c7 100644 (file)
@@ -179,7 +179,7 @@ struct sve_context {
  * The same convention applies when returning from a signal: a caller
  * will need to remove or resize the sve_context block if it wants to
  * make the SVE registers live when they were previously non-live or
- * vice-versa.  This may require the the caller to allocate fresh
+ * vice-versa.  This may require the caller to allocate fresh
  * memory and/or move other context blocks in the signal frame.
  *
  * Changing the vector length during signal return is not permitted:
index a7586a4..4559664 100644 (file)
@@ -261,6 +261,81 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
        return __pgprot(PROT_DEVICE_nGnRnE);
 }
 
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+       efi_memory_desc_t *md, *region = NULL;
+       pgprot_t prot;
+
+       if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+               return NULL;
+
+       for_each_efi_memory_desc(md) {
+               u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+               if (phys < md->phys_addr || phys >= end)
+                       continue;
+
+               if (phys + size > end) {
+                       pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+                       return NULL;
+               }
+               region = md;
+               break;
+       }
+
+       /*
+        * It is fine for AML to remap regions that are not represented in the
+        * EFI memory map at all, as it only describes normal memory, and MMIO
+        * regions that require a virtual mapping to make them accessible to
+        * the EFI runtime services.
+        */
+       prot = __pgprot(PROT_DEVICE_nGnRnE);
+       if (region) {
+               switch (region->type) {
+               case EFI_LOADER_CODE:
+               case EFI_LOADER_DATA:
+               case EFI_BOOT_SERVICES_CODE:
+               case EFI_BOOT_SERVICES_DATA:
+               case EFI_CONVENTIONAL_MEMORY:
+               case EFI_PERSISTENT_MEMORY:
+                       pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys);
+                       return NULL;
+
+               case EFI_RUNTIME_SERVICES_CODE:
+                       /*
+                        * This would be unusual, but not problematic per se,
+                        * as long as we take care not to create a writable
+                        * mapping for executable code.
+                        */
+                       prot = PAGE_KERNEL_RO;
+                       break;
+
+               case EFI_ACPI_RECLAIM_MEMORY:
+                       /*
+                        * ACPI reclaim memory is used to pass firmware tables
+                        * and other data that is intended for consumption by
+                        * the OS only, which may decide it wants to reclaim
+                        * that memory and use it for something else. We never
+                        * do that, but we usually add it to the linear map
+                        * anyway, in which case we should use the existing
+                        * mapping.
+                        */
+                       if (memblock_is_map_memory(phys))
+                               return (void __iomem *)__phys_to_virt(phys);
+                       /* fall through */
+
+               default:
+                       if (region->attribute & EFI_MEMORY_WB)
+                               prot = PAGE_KERNEL;
+                       else if (region->attribute & EFI_MEMORY_WT)
+                               prot = __pgprot(PROT_NORMAL_WT);
+                       else if (region->attribute & EFI_MEMORY_WC)
+                               prot = __pgprot(PROT_NORMAL_NC);
+               }
+       }
+       return __ioremap(phys, size, prot);
+}
+
 /*
  * Claim Synchronous External Aborts as a firmware first notification.
  *
index 9fae0ef..a389b99 100644 (file)
@@ -119,25 +119,12 @@ static inline void finalize_system_capabilities(void)
        static_branch_enable(&arm64_const_caps_ready);
 }
 
-static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
+void dump_cpu_features(void)
 {
        /* file-wide pr_fmt adds "CPU features: " prefix */
        pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
-       return 0;
 }
 
-static struct notifier_block cpu_hwcaps_notifier = {
-       .notifier_call = dump_cpu_hwcaps
-};
-
-static int __init register_cpu_hwcaps_dumper(void)
-{
-       atomic_notifier_chain_register(&panic_notifier_list,
-                                      &cpu_hwcaps_notifier);
-       return 0;
-}
-__initcall(register_cpu_hwcaps_dumper);
-
 DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcap_keys);
 
@@ -269,6 +256,9 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
        /*
         * Page size not being supported at Stage-2 is not fatal. You
         * just give up KVM if PAGE_SIZE isn't supported there. Go fix
@@ -312,6 +302,10 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
@@ -323,8 +317,15 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
@@ -345,7 +346,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
         * make use of *minLine.
         * If we have differing I-cache policies, report it as the weakest - VIPT.
         */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT),       /* L1Ip */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT),   /* L1Ip */
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
@@ -356,19 +357,19 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
 };
 
 static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
-       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf),   /* InnerShr */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),       /* FCSE */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),    /* AuxReg */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),       /* TCM */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),       /* ShareLvl */
-       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf),    /* OuterShr */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),        /* PMSA */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),        /* VMSA */
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0),
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
-       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0),
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
@@ -384,14 +385,14 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_mvfr2[] = {
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),                /* FPMisc */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),                /* SIMDMisc */
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_dczid[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1),            /* DZP */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),       /* BS */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -423,7 +424,8 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),        /* ac2 */
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0),
+
        /*
         * SpecSEI = 1 indicates that the PE might generate an SError on an
         * external abort on speculative read. It is safe to assume that an
@@ -465,10 +467,10 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = {
 static const struct arm64_ftr_bits ftr_id_pfr0[] = {
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),               /* State3 */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),                /* State2 */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),                /* State1 */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),                /* State0 */
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -492,13 +494,13 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = {
 
 static const struct arm64_ftr_bits ftr_id_dfr0[] = {
        /* [31:28] TraceFilt */
-       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf),   /* PerfMon */
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -697,11 +699,52 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
 
 static void __init sort_ftr_regs(void)
 {
-       int i;
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) {
+               const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg;
+               const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits;
+               unsigned int j = 0;
+
+               /*
+                * Features here must be sorted in descending order with respect
+                * to their shift values and should not overlap with each other.
+                */
+               for (; ftr_bits->width != 0; ftr_bits++, j++) {
+                       unsigned int width = ftr_reg->ftr_bits[j].width;
+                       unsigned int shift = ftr_reg->ftr_bits[j].shift;
+                       unsigned int prev_shift;
+
+                       WARN((shift  + width) > 64,
+                               "%s has invalid feature at shift %d\n",
+                               ftr_reg->name, shift);
+
+                       /*
+                        * Skip the first feature. There is nothing to
+                        * compare against for now.
+                        */
+                       if (j == 0)
+                               continue;
+
+                       prev_shift = ftr_reg->ftr_bits[j - 1].shift;
+                       WARN((shift + width) > prev_shift,
+                               "%s has feature overlap at shift %d\n",
+                               ftr_reg->name, shift);
+               }
 
-       /* Check that the array is sorted so that we can do the binary search */
-       for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++)
+               /*
+                * Skip the first register. There is nothing to
+                * compare against for now.
+                */
+               if (i == 0)
+                       continue;
+               /*
+                * Registers here must be sorted in ascending order with respect
+                * to sys_id for subsequent binary search in get_arm64_ftr_reg()
+                * to work correctly.
+                */
                BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+       }
 }
 
 /*
@@ -1884,6 +1927,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .cpu_enable = cpu_has_fwb,
        },
+       {
+               .desc = "ARMv8.4 Translation Table Level",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_HAS_ARMv8_4_TTL,
+               .sys_reg = SYS_ID_AA64MMFR2_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64MMFR2_TTL_SHIFT,
+               .min_field_value = 1,
+               .matches = has_cpuid_feature,
+       },
+       {
+               .desc = "TLB range maintenance instructions",
+               .capability = ARM64_HAS_TLB_RANGE,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .matches = has_cpuid_feature,
+               .sys_reg = SYS_ID_AA64ISAR0_EL1,
+               .field_pos = ID_AA64ISAR0_TLB_SHIFT,
+               .sign = FTR_UNSIGNED,
+               .min_field_value = ID_AA64ISAR0_TLB_RANGE,
+       },
 #ifdef CONFIG_ARM64_HW_AFDBM
        {
                /*
index 8663746..393c6fb 100644 (file)
@@ -93,6 +93,7 @@ static const char *const hwcap_str[] = {
        "dgh",
        "rng",
        "bti",
+       /* reserved for "mte" */
        NULL
 };
 
index 1f646b0..314391a 100644 (file)
@@ -7,6 +7,14 @@
 #include <linux/crash_core.h>
 #include <asm/cpufeature.h>
 #include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+static inline u64 get_tcr_el1_t1sz(void);
+
+static inline u64 get_tcr_el1_t1sz(void)
+{
+       return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+}
 
 void arch_crash_save_vmcoreinfo(void)
 {
@@ -16,6 +24,8 @@ void arch_crash_save_vmcoreinfo(void)
                                                kimage_voffset);
        vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
                                                PHYS_OFFSET);
+       vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n",
+                                               get_tcr_el1_t1sz());
        vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
        vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n",
                                                system_supports_address_auth() ?
index 35de8ba..2646178 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm_pointer_auth.h>
+#include <asm/bug.h>
 #include <asm/cpufeature.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
@@ -226,28 +227,9 @@ alternative_else_nop_endif
        add     x29, sp, #S_STACKFRAME
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
-       /*
-        * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
-        * EL0, there is no need to check the state of TTBR0_EL1 since
-        * accesses are always enabled.
-        * Note that the meaning of this bit differs from the ARMv8.1 PAN
-        * feature as all TTBR0_EL1 accesses are disabled, not just those to
-        * user mappings.
-        */
-alternative_if ARM64_HAS_PAN
-       b       1f                              // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+       bl      __swpan_entry_el\el
 alternative_else_nop_endif
-
-       .if     \el != 0
-       mrs     x21, ttbr0_el1
-       tst     x21, #TTBR_ASID_MASK            // Check for the reserved ASID
-       orr     x23, x23, #PSR_PAN_BIT          // Set the emulated PAN in the saved SPSR
-       b.eq    1f                              // TTBR0 access already disabled
-       and     x23, x23, #~PSR_PAN_BIT         // Clear the emulated PAN in the saved SPSR
-       .endif
-
-       __uaccess_ttbr0_disable x21
-1:
 #endif
 
        stp     x22, x23, [sp, #S_PC]
@@ -301,34 +283,9 @@ alternative_else_nop_endif
        .endif
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
-       /*
-        * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
-        * PAN bit checking.
-        */
-alternative_if ARM64_HAS_PAN
-       b       2f                              // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+       bl      __swpan_exit_el\el
 alternative_else_nop_endif
-
-       .if     \el != 0
-       tbnz    x22, #22, 1f                    // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
-       .endif
-
-       __uaccess_ttbr0_enable x0, x1
-
-       .if     \el == 0
-       /*
-        * Enable errata workarounds only if returning to user. The only
-        * workaround currently required for TTBR0_EL1 changes are for the
-        * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
-        * corruption).
-        */
-       bl      post_ttbr_update_workaround
-       .endif
-1:
-       .if     \el != 0
-       and     x22, x22, #~PSR_PAN_BIT         // ARMv8.0 CPUs do not understand this bit
-       .endif
-2:
 #endif
 
        .if     \el == 0
@@ -401,6 +358,49 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
        sb
        .endm
 
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+       /*
+        * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+        * EL0, there is no need to check the state of TTBR0_EL1 since
+        * accesses are always enabled.
+        * Note that the meaning of this bit differs from the ARMv8.1 PAN
+        * feature as all TTBR0_EL1 accesses are disabled, not just those to
+        * user mappings.
+        */
+SYM_CODE_START_LOCAL(__swpan_entry_el1)
+       mrs     x21, ttbr0_el1
+       tst     x21, #TTBR_ASID_MASK            // Check for the reserved ASID
+       orr     x23, x23, #PSR_PAN_BIT          // Set the emulated PAN in the saved SPSR
+       b.eq    1f                              // TTBR0 access already disabled
+       and     x23, x23, #~PSR_PAN_BIT         // Clear the emulated PAN in the saved SPSR
+SYM_INNER_LABEL(__swpan_entry_el0, SYM_L_LOCAL)
+       __uaccess_ttbr0_disable x21
+1:     ret
+SYM_CODE_END(__swpan_entry_el1)
+
+       /*
+        * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+        * PAN bit checking.
+        */
+SYM_CODE_START_LOCAL(__swpan_exit_el1)
+       tbnz    x22, #22, 1f                    // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+       __uaccess_ttbr0_enable x0, x1
+1:     and     x22, x22, #~PSR_PAN_BIT         // ARMv8.0 CPUs do not understand this bit
+       ret
+SYM_CODE_END(__swpan_exit_el1)
+
+SYM_CODE_START_LOCAL(__swpan_exit_el0)
+       __uaccess_ttbr0_enable x0, x1
+       /*
+        * Enable errata workarounds only if returning to user. The only
+        * workaround currently required for TTBR0_EL1 changes are for the
+        * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+        * corruption).
+        */
+       b       post_ttbr_update_workaround
+SYM_CODE_END(__swpan_exit_el0)
+#endif
+
        .macro  irq_stack_entry
        mov     x19, sp                 // preserve the original sp
 #ifdef CONFIG_SHADOW_CALL_STACK
index 65b08a7..0ce3a28 100644 (file)
@@ -253,6 +253,40 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
        return ret;
 }
 
+static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela,
+                                 Elf64_Word dstidx)
+{
+
+       Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info);
+
+       if (s->st_shndx == dstidx)
+               return false;
+
+       return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 ||
+              ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26;
+}
+
+/* Group branch PLT relas at the front end of the array. */
+static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
+                                     int numrels, Elf64_Word dstidx)
+{
+       int i = 0, j = numrels - 1;
+
+       if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+               return 0;
+
+       while (i < j) {
+               if (branch_rela_needs_plt(syms, &rela[i], dstidx))
+                       i++;
+               else if (branch_rela_needs_plt(syms, &rela[j], dstidx))
+                       swap(rela[i], rela[j]);
+               else
+                       j--;
+       }
+
+       return i;
+}
+
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
                              char *secstrings, struct module *mod)
 {
@@ -290,7 +324,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 
        for (i = 0; i < ehdr->e_shnum; i++) {
                Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
-               int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+               int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
                Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
 
                if (sechdrs[i].sh_type != SHT_RELA)
@@ -300,8 +334,14 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
                if (!(dstsec->sh_flags & SHF_EXECINSTR))
                        continue;
 
-               /* sort by type, symbol index and addend */
-               sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+               /*
+                * sort branch relocations requiring a PLT by type, symbol index
+                * and addend
+                */
+               nents = partition_branch_plt_relas(syms, rels, numrels,
+                                                  sechdrs[i].sh_info);
+               if (nents)
+                       sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL);
 
                if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
                        core_plts += count_plts(syms, rels, numrels,
index 4d78794..462f9a9 100644 (file)
 #include <asm/sysreg.h>
 #include <asm/virt.h>
 
+#include <clocksource/arm_arch_timer.h>
+
 #include <linux/acpi.h>
 #include <linux/clocksource.h>
 #include <linux/kvm_host.h>
 #include <linux/of.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
+#include <linux/sched_clock.h>
 #include <linux/smp.h>
 
 /* ARMv8 Cortex-A53 specific event types. */
@@ -155,7 +158,7 @@ armv8pmu_events_sysfs_show(struct device *dev,
 
        pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
 
-       return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
+       return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
 }
 
 #define ARMV8_EVENT_ATTR(name, config)                                         \
@@ -222,10 +225,29 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
        ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
        ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
        ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+       ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD),
+       ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED),
+       ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC),
+       ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL),
+       ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND),
+       ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND),
+       ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT),
        ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
        ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
        ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
        ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
+       ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES),
+       ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM),
+       ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS),
+       ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
+       ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
+       ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+       ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
+       ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
+       ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
+       ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED),
+       ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD),
+       ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR),
        NULL,
 };
 
@@ -244,10 +266,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
            test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
                return attr->mode;
 
-       pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
-       if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
-           test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap))
-               return attr->mode;
+       if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) {
+               u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
+
+               if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
+                   test_bit(id, cpu_pmu->pmceid_ext_bitmap))
+                       return attr->mode;
+       }
 
        return 0;
 }
@@ -1165,28 +1190,54 @@ device_initcall(armv8_pmu_driver_init)
 void arch_perf_update_userpage(struct perf_event *event,
                               struct perf_event_mmap_page *userpg, u64 now)
 {
-       u32 freq;
-       u32 shift;
+       struct clock_read_data *rd;
+       unsigned int seq;
+       u64 ns;
 
-       /*
-        * Internal timekeeping for enabled/running/stopped times
-        * is always computed with the sched_clock.
-        */
-       freq = arch_timer_get_rate();
-       userpg->cap_user_time = 1;
+       userpg->cap_user_time = 0;
+       userpg->cap_user_time_zero = 0;
+       userpg->cap_user_time_short = 0;
+
+       do {
+               rd = sched_clock_read_begin(&seq);
+
+               if (rd->read_sched_clock != arch_timer_read_counter)
+                       return;
+
+               userpg->time_mult = rd->mult;
+               userpg->time_shift = rd->shift;
+               userpg->time_zero = rd->epoch_ns;
+               userpg->time_cycles = rd->epoch_cyc;
+               userpg->time_mask = rd->sched_clock_mask;
+
+               /*
+                * Subtract the cycle base, such that software that
+                * doesn't know about cap_user_time_short still 'works'
+                * assuming no wraps.
+                */
+               ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+               userpg->time_zero -= ns;
+
+       } while (sched_clock_read_retry(seq));
+
+       userpg->time_offset = userpg->time_zero - now;
 
-       clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
-                       NSEC_PER_SEC, 0);
        /*
         * time_shift is not expected to be greater than 31 due to
         * the original published conversion algorithm shifting a
         * 32-bit value (now specifies a 64-bit value) - refer
         * perf_event_mmap_page documentation in perf_event.h.
         */
-       if (shift == 32) {
-               shift = 31;
+       if (userpg->time_shift == 32) {
+               userpg->time_shift = 31;
                userpg->time_mult >>= 1;
        }
-       userpg->time_shift = (u16)shift;
-       userpg->time_offset = -now;
+
+       /*
+        * Internal timekeeping for enabled/running/stopped times
+        * is always computed with the sched_clock.
+        */
+       userpg->cap_user_time = 1;
+       userpg->cap_user_time_zero = 1;
+       userpg->cap_user_time_short = 1;
 }
index 93b3844..c793276 100644 (file)
@@ -400,11 +400,7 @@ static int __init topology_init(void)
 }
 subsys_initcall(topology_init);
 
-/*
- * Dump out kernel offset information on panic.
- */
-static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
-                             void *p)
+static void dump_kernel_offset(void)
 {
        const unsigned long offset = kaslr_offset();
 
@@ -415,17 +411,25 @@ static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
        } else {
                pr_emerg("Kernel Offset: disabled\n");
        }
+}
+
+static int arm64_panic_block_dump(struct notifier_block *self,
+                                 unsigned long v, void *p)
+{
+       dump_kernel_offset();
+       dump_cpu_features();
+       dump_mem_limit();
        return 0;
 }
 
-static struct notifier_block kernel_offset_notifier = {
-       .notifier_call = dump_kernel_offset
+static struct notifier_block arm64_panic_block = {
+       .notifier_call = arm64_panic_block_dump
 };
 
-static int __init register_kernel_offset_dumper(void)
+static int __init register_arm64_panic_block(void)
 {
        atomic_notifier_chain_register(&panic_notifier_list,
-                                      &kernel_offset_notifier);
+                                      &arm64_panic_block);
        return 0;
 }
-__initcall(register_kernel_offset_dumper);
+device_initcall(register_arm64_panic_block);
index 139679c..2dd8e3b 100644 (file)
@@ -199,12 +199,12 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 
        put_task_stack(tsk);
 }
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
        __save_stack_trace(tsk, trace, 1);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 void save_stack_trace(struct stack_trace *trace)
 {
index 47f651d..13ebd5c 100644 (file)
@@ -855,7 +855,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
        pr_emerg("Task stack:     [0x%016lx..0x%016lx]\n",
                 tsk_stk, tsk_stk + THREAD_SIZE);
        pr_emerg("IRQ stack:      [0x%016lx..0x%016lx]\n",
-                irq_stk, irq_stk + THREAD_SIZE);
+                irq_stk, irq_stk + IRQ_STACK_SIZE);
        pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
                 ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
 
index e546df0..d4202a3 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/time_namespace.h>
 #include <linux/timekeeper_internal.h>
 #include <linux/vmalloc.h>
 #include <vdso/datapage.h>
@@ -40,6 +41,12 @@ enum vdso_abi {
 #endif /* CONFIG_COMPAT_VDSO */
 };
 
+enum vvar_pages {
+       VVAR_DATA_PAGE_OFFSET,
+       VVAR_TIMENS_PAGE_OFFSET,
+       VVAR_NR_PAGES,
+};
+
 struct vdso_abi_info {
        const char *name;
        const char *vdso_code_start;
@@ -107,25 +114,122 @@ static int __vdso_init(enum vdso_abi abi)
                        vdso_info[abi].vdso_code_start) >>
                        PAGE_SHIFT;
 
-       /* Allocate the vDSO pagelist, plus a page for the data. */
-       vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1,
+       vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
                                sizeof(struct page *),
                                GFP_KERNEL);
        if (vdso_pagelist == NULL)
                return -ENOMEM;
 
-       /* Grab the vDSO data page. */
-       vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
        /* Grab the vDSO code pages. */
        pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
 
        for (i = 0; i < vdso_info[abi].vdso_pages; i++)
-               vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+               vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+       vdso_info[abi].cm->pages = vdso_pagelist;
+
+       return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+       return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+       struct mm_struct *mm = task->mm;
+       struct vm_area_struct *vma;
+
+       mmap_read_lock(mm);
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               unsigned long size = vma->vm_end - vma->vm_start;
 
-       vdso_info[abi].dm->pages = &vdso_pagelist[0];
-       vdso_info[abi].cm->pages = &vdso_pagelist[1];
+               if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
+                       zap_page_range(vma, vma->vm_start, size);
+#ifdef CONFIG_COMPAT_VDSO
+               if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
+                       zap_page_range(vma, vma->vm_start, size);
+#endif
+       }
+
+       mmap_read_unlock(mm);
+       return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       if (likely(vma->vm_mm == current->mm))
+               return current->nsproxy->time_ns->vvar_page;
+
+       /*
+        * VM_PFNMAP | VM_IO protect .fault() handler from being called
+        * through interfaces like /proc/$pid/mem or
+        * process_vm_{readv,writev}() as long as there's no .access()
+        * in special_mapping_vmops.
+        * For more details check_vma_flags() and __access_remote_vm()
+        */
+       WARN(1, "vvar_page accessed remotely");
+
+       return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+                            struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct page *timens_page = find_timens_vvar_page(vma);
+       unsigned long pfn;
+
+       switch (vmf->pgoff) {
+       case VVAR_DATA_PAGE_OFFSET:
+               if (timens_page)
+                       pfn = page_to_pfn(timens_page);
+               else
+                       pfn = sym_to_pfn(vdso_data);
+               break;
+#ifdef CONFIG_TIME_NS
+       case VVAR_TIMENS_PAGE_OFFSET:
+               /*
+                * If a task belongs to a time namespace then a namespace
+                * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+                * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+                * offset.
+                * See also the comment near timens_setup_vdso_data().
+                */
+               if (!timens_page)
+                       return VM_FAULT_SIGBUS;
+               pfn = sym_to_pfn(vdso_data);
+               break;
+#endif /* CONFIG_TIME_NS */
+       default:
+               return VM_FAULT_SIGBUS;
+       }
+
+       return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static int vvar_mremap(const struct vm_special_mapping *sm,
+                      struct vm_area_struct *new_vma)
+{
+       unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+       if (new_size != VVAR_NR_PAGES * PAGE_SIZE)
+               return -EINVAL;
 
        return 0;
 }
@@ -139,9 +243,11 @@ static int __setup_additional_pages(enum vdso_abi abi,
        unsigned long gp_flags = 0;
        void *ret;
 
+       BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+
        vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
        /* Be sure to map the data page */
-       vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+       vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
 
        vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
        if (IS_ERR_VALUE(vdso_base)) {
@@ -149,8 +255,8 @@ static int __setup_additional_pages(enum vdso_abi abi,
                goto up_fail;
        }
 
-       ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
-                                      VM_READ|VM_MAYREAD,
+       ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
+                                      VM_READ|VM_MAYREAD|VM_PFNMAP,
                                       vdso_info[abi].dm);
        if (IS_ERR(ret))
                goto up_fail;
@@ -158,7 +264,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
        if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
                gp_flags = VM_ARM64_BTI;
 
-       vdso_base += PAGE_SIZE;
+       vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
        mm->context.vdso = (void *)vdso_base;
        ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
                                       VM_READ|VM_EXEC|gp_flags|
@@ -206,6 +312,8 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
 #ifdef CONFIG_COMPAT_VDSO
        [AA32_MAP_VVAR] = {
                .name = "[vvar]",
+               .fault = vvar_fault,
+               .mremap = vvar_mremap,
        },
        [AA32_MAP_VDSO] = {
                .name = "[vdso]",
@@ -371,6 +479,8 @@ enum aarch64_map {
 static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
        [AA64_MAP_VVAR] = {
                .name   = "[vvar]",
+               .fault = vvar_fault,
+               .mremap = vvar_mremap,
        },
        [AA64_MAP_VDSO] = {
                .name   = "[vdso]",
index 7ad2d3a..d808ad3 100644 (file)
@@ -17,7 +17,10 @@ OUTPUT_ARCH(aarch64)
 
 SECTIONS
 {
-       PROVIDE(_vdso_data = . - PAGE_SIZE);
+       PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+       PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
        . = VDSO_LBASE + SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
index 337d035..3348ce5 100644 (file)
@@ -17,7 +17,10 @@ OUTPUT_ARCH(arm)
 
 SECTIONS
 {
-       PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+       PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+       PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
        . = VDSO_LBASE + SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
index 5423ffe..ec8e894 100644 (file)
@@ -10,7 +10,6 @@
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
 #include <asm/kernel-pgtable.h>
-#include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
 
index e76c0e8..86971fe 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 
+#include <asm/alternative.h>
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
index 8c0035c..31058e6 100644 (file)
@@ -1326,7 +1326,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
        return true;
 }
 
-static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
+static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
 {
        pud_t *pudp;
        pmd_t *pmdp;
@@ -1338,11 +1338,11 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
                return false;
 
        if (pudp)
-               return kvm_s2pud_exec(pudp);
+               return sz <= PUD_SIZE && kvm_s2pud_exec(pudp);
        else if (pmdp)
-               return kvm_s2pmd_exec(pmdp);
+               return sz <= PMD_SIZE && kvm_s2pmd_exec(pmdp);
        else
-               return kvm_s2pte_exec(ptep);
+               return sz == PAGE_SIZE && kvm_s2pte_exec(ptep);
 }
 
 static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -1958,7 +1958,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         * execute permissions, and we preserve whatever we have.
         */
        needs_exec = exec_fault ||
-               (fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa));
+               (fault_status == FSC_PERM &&
+                stage2_is_exec(kvm, fault_ipa, vma_pagesize));
 
        if (vma_pagesize == PUD_SIZE) {
                pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
index baf5ce9..d319667 100644 (file)
@@ -1024,9 +1024,9 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 /* Macro to expand the AMU counter and type registers*/
 #define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu }
-#define AMU_AMEVTYPE0_EL0(n) { SYS_DESC(SYS_AMEVTYPE0_EL0(n)), access_amu }
+#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu }
 #define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu }
-#define AMU_AMEVTYPE1_EL0(n) { SYS_DESC(SYS_AMEVTYPE1_EL0(n)), access_amu }
+#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu }
 
 static bool trap_ptrauth(struct kvm_vcpu *vcpu,
                         struct sys_reg_params *p,
@@ -1629,22 +1629,22 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        AMU_AMEVCNTR0_EL0(13),
        AMU_AMEVCNTR0_EL0(14),
        AMU_AMEVCNTR0_EL0(15),
-       AMU_AMEVTYPE0_EL0(0),
-       AMU_AMEVTYPE0_EL0(1),
-       AMU_AMEVTYPE0_EL0(2),
-       AMU_AMEVTYPE0_EL0(3),
-       AMU_AMEVTYPE0_EL0(4),
-       AMU_AMEVTYPE0_EL0(5),
-       AMU_AMEVTYPE0_EL0(6),
-       AMU_AMEVTYPE0_EL0(7),
-       AMU_AMEVTYPE0_EL0(8),
-       AMU_AMEVTYPE0_EL0(9),
-       AMU_AMEVTYPE0_EL0(10),
-       AMU_AMEVTYPE0_EL0(11),
-       AMU_AMEVTYPE0_EL0(12),
-       AMU_AMEVTYPE0_EL0(13),
-       AMU_AMEVTYPE0_EL0(14),
-       AMU_AMEVTYPE0_EL0(15),
+       AMU_AMEVTYPER0_EL0(0),
+       AMU_AMEVTYPER0_EL0(1),
+       AMU_AMEVTYPER0_EL0(2),
+       AMU_AMEVTYPER0_EL0(3),
+       AMU_AMEVTYPER0_EL0(4),
+       AMU_AMEVTYPER0_EL0(5),
+       AMU_AMEVTYPER0_EL0(6),
+       AMU_AMEVTYPER0_EL0(7),
+       AMU_AMEVTYPER0_EL0(8),
+       AMU_AMEVTYPER0_EL0(9),
+       AMU_AMEVTYPER0_EL0(10),
+       AMU_AMEVTYPER0_EL0(11),
+       AMU_AMEVTYPER0_EL0(12),
+       AMU_AMEVTYPER0_EL0(13),
+       AMU_AMEVTYPER0_EL0(14),
+       AMU_AMEVTYPER0_EL0(15),
        AMU_AMEVCNTR1_EL0(0),
        AMU_AMEVCNTR1_EL0(1),
        AMU_AMEVCNTR1_EL0(2),
@@ -1661,22 +1661,22 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        AMU_AMEVCNTR1_EL0(13),
        AMU_AMEVCNTR1_EL0(14),
        AMU_AMEVCNTR1_EL0(15),
-       AMU_AMEVTYPE1_EL0(0),
-       AMU_AMEVTYPE1_EL0(1),
-       AMU_AMEVTYPE1_EL0(2),
-       AMU_AMEVTYPE1_EL0(3),
-       AMU_AMEVTYPE1_EL0(4),
-       AMU_AMEVTYPE1_EL0(5),
-       AMU_AMEVTYPE1_EL0(6),
-       AMU_AMEVTYPE1_EL0(7),
-       AMU_AMEVTYPE1_EL0(8),
-       AMU_AMEVTYPE1_EL0(9),
-       AMU_AMEVTYPE1_EL0(10),
-       AMU_AMEVTYPE1_EL0(11),
-       AMU_AMEVTYPE1_EL0(12),
-       AMU_AMEVTYPE1_EL0(13),
-       AMU_AMEVTYPE1_EL0(14),
-       AMU_AMEVTYPE1_EL0(15),
+       AMU_AMEVTYPER1_EL0(0),
+       AMU_AMEVTYPER1_EL0(1),
+       AMU_AMEVTYPER1_EL0(2),
+       AMU_AMEVTYPER1_EL0(3),
+       AMU_AMEVTYPER1_EL0(4),
+       AMU_AMEVTYPER1_EL0(5),
+       AMU_AMEVTYPER1_EL0(6),
+       AMU_AMEVTYPER1_EL0(7),
+       AMU_AMEVTYPER1_EL0(8),
+       AMU_AMEVTYPER1_EL0(9),
+       AMU_AMEVTYPER1_EL0(10),
+       AMU_AMEVTYPER1_EL0(11),
+       AMU_AMEVTYPER1_EL0(12),
+       AMU_AMEVTYPER1_EL0(13),
+       AMU_AMEVTYPER1_EL0(14),
+       AMU_AMEVTYPER1_EL0(15),
 
        { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
        { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
index d702d60..a206655 100644 (file)
@@ -198,9 +198,10 @@ set_asid:
        return idx2asid(asid) | generation;
 }
 
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+void check_and_switch_context(struct mm_struct *mm)
 {
        unsigned long flags;
+       unsigned int cpu;
        u64 asid, old_active_asid;
 
        if (system_supports_cnp())
@@ -222,9 +223,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
         *   relaxed xchg in flush_context will treat us as reserved
         *   because atomic RmWs are totally ordered for a given location.
         */
-       old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+       old_active_asid = atomic64_read(this_cpu_ptr(&active_asids));
        if (old_active_asid && asid_gen_match(asid) &&
-           atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+           atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids),
                                     old_active_asid, asid))
                goto switch_mm_fastpath;
 
@@ -236,10 +237,11 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
                atomic64_set(&mm->context.id, asid);
        }
 
+       cpu = smp_processor_id();
        if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
                local_flush_tlb_all();
 
-       atomic64_set(&per_cpu(active_asids, cpu), asid);
+       atomic64_set(this_cpu_ptr(&active_asids), asid);
        raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
 switch_mm_fastpath:
index 0a52ce4..aa421bf 100644 (file)
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 
+/*
+ * HugeTLB Support Matrix
+ *
+ * ---------------------------------------------------
+ * | Page Size | CONT PTE |  PMD  | CONT PMD |  PUD  |
+ * ---------------------------------------------------
+ * |     4K    |   64K    |   2M  |    32M   |   1G  |
+ * |    16K    |    2M    |  32M  |     1G   |       |
+ * |    64K    |    2M    | 512M  |    16G   |       |
+ * ---------------------------------------------------
+ */
+
+/*
+ * Reserve CMA areas for the largest supported gigantic
+ * huge page when requested. Any other smaller gigantic
+ * huge pages could still be served from those areas.
+ */
+#ifdef CONFIG_CMA
+void __init arm64_hugetlb_cma_reserve(void)
+{
+       int order;
+
+#ifdef CONFIG_ARM64_4K_PAGES
+       order = PUD_SHIFT - PAGE_SHIFT;
+#else
+       order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT;
+#endif
+       /*
+        * HugeTLB CMA reservation is required for gigantic
+        * huge pages which could not be allocated via the
+        * page allocator. Just warn if there is any change
+        * breaking this assumption.
+        */
+       WARN_ON(order <= MAX_ORDER);
+       hugetlb_cma_reserve(order);
+}
+#endif /* CONFIG_CMA */
+
 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
 bool arch_hugetlb_migration_supported(struct hstate *h)
 {
@@ -457,9 +495,9 @@ static int __init hugetlbpage_init(void)
 #ifdef CONFIG_ARM64_4K_PAGES
        hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 #endif
-       hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT);
+       hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
        hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-       hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT);
+       hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
 
        return 0;
 }
index 1e93cfc..f8c19c6 100644 (file)
@@ -425,8 +425,8 @@ void __init bootmem_init(void)
         * initialize node_online_map that gets used in hugetlb_cma_reserve()
         * while allocating required CMA size across online nodes.
         */
-#ifdef CONFIG_ARM64_4K_PAGES
-       hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
+       arm64_hugetlb_cma_reserve();
 #endif
 
        /*
@@ -563,27 +563,11 @@ void free_initmem(void)
        unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
-/*
- * Dump out memory limit information on panic.
- */
-static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
+void dump_mem_limit(void)
 {
        if (memory_limit != PHYS_ADDR_MAX) {
                pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
        } else {
                pr_emerg("Memory Limit: none\n");
        }
-       return 0;
-}
-
-static struct notifier_block mem_limit_notifier = {
-       .notifier_call = dump_mem_limit,
-};
-
-static int __init register_mem_limit_dumper(void)
-{
-       atomic_notifier_chain_register(&panic_notifier_list,
-                                      &mem_limit_notifier);
-       return 0;
 }
-__initcall(register_mem_limit_dumper);
index c6b6a06..a990d15 100644 (file)
@@ -12,8 +12,6 @@
  * resource counting etc..
  */
 
-#define ATOMIC_INIT(i) { (i) }
-
 #define atomic_read(v)         READ_ONCE((v)->counter)
 #define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
 
index 0231d69..4ab895d 100644 (file)
@@ -12,8 +12,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define ATOMIC_INIT(i)         { (i) }
-
 /*  Normal writes in our arch don't clear lock reservations  */
 
 static inline void atomic_set(atomic_t *v, int new)
index 50440f3..f267d95 100644 (file)
@@ -19,7 +19,6 @@
 #include <asm/barrier.h>
 
 
-#define ATOMIC_INIT(i)         { (i) }
 #define ATOMIC64_INIT(i)       { (i) }
 
 #define atomic_read(v)         READ_ONCE((v)->counter)
diff --git a/arch/m68k/Kbuild b/arch/m68k/Kbuild
new file mode 100644 (file)
index 0000000..18abb35
--- /dev/null
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y                          += kernel/ mm/
+obj-$(CONFIG_Q40)              += q40/
+obj-$(CONFIG_AMIGA)            += amiga/
+obj-$(CONFIG_ATARI)            += atari/
+obj-$(CONFIG_MAC)              += mac/
+obj-$(CONFIG_HP300)            += hp300/
+obj-$(CONFIG_APOLLO)           += apollo/
+obj-$(CONFIG_MVME147)          += mvme147/
+obj-$(CONFIG_MVME16x)          += mvme16x/
+obj-$(CONFIG_BVME6000)         += bvme6000/
+obj-$(CONFIG_SUN3X)            += sun3x/ sun3/
+obj-$(CONFIG_SUN3)             += sun3/
+obj-$(CONFIG_NATFEAT)          += emu/
+obj-$(CONFIG_M68040)           += fpsp040/
+obj-$(CONFIG_M68060)           += ifpsp060/
+obj-$(CONFIG_M68KFPU_EMU)      += math-emu/
+obj-$(CONFIG_M68000)           += 68000/
+obj-$(CONFIG_COLDFIRE)         += coldfire/
index 0415d28..4438ffb 100644 (file)
@@ -32,30 +32,33 @@ endif
 #      compiler cpu type flag.
 #
 ifndef CONFIG_M68040
-cpuflags-$(CONFIG_M68060)      := -m68060
+cpuflags-$(CONFIG_M68060)      = -m68060
 endif
 ifndef CONFIG_M68060
-cpuflags-$(CONFIG_M68040)      := -m68040
+cpuflags-$(CONFIG_M68040)      = -m68040
 endif
-cpuflags-$(CONFIG_M68030)      :=
-cpuflags-$(CONFIG_M68020)      :=
-cpuflags-$(CONFIG_M68000)      := -m68000
-cpuflags-$(CONFIG_M5441x)      := $(call cc-option,-mcpu=54455,-mcfv4e)
-cpuflags-$(CONFIG_M54xx)       := $(call cc-option,-mcpu=5475,-m5200)
-cpuflags-$(CONFIG_M5407)       := $(call cc-option,-mcpu=5407,-m5200)
-cpuflags-$(CONFIG_M532x)       := $(call cc-option,-mcpu=532x,-m5307)
-cpuflags-$(CONFIG_M537x)       := $(call cc-option,-mcpu=537x,-m5307)
-cpuflags-$(CONFIG_M5307)       := $(call cc-option,-mcpu=5307,-m5200)
-cpuflags-$(CONFIG_M528x)       := $(call cc-option,-mcpu=528x,-m5307)
-cpuflags-$(CONFIG_M5275)       := $(call cc-option,-mcpu=5275,-m5307)
-cpuflags-$(CONFIG_M5272)       := $(call cc-option,-mcpu=5272,-m5307)
-cpuflags-$(CONFIG_M5271)       := $(call cc-option,-mcpu=5271,-m5307)
-cpuflags-$(CONFIG_M523x)       := $(call cc-option,-mcpu=523x,-m5307)
-cpuflags-$(CONFIG_M525x)       := $(call cc-option,-mcpu=5253,-m5200)
-cpuflags-$(CONFIG_M5249)       := $(call cc-option,-mcpu=5249,-m5200)
-cpuflags-$(CONFIG_M520x)       := $(call cc-option,-mcpu=5208,-m5200)
-cpuflags-$(CONFIG_M5206e)      := $(call cc-option,-mcpu=5206e,-m5200)
-cpuflags-$(CONFIG_M5206)       := $(call cc-option,-mcpu=5206,-m5200)
+cpuflags-$(CONFIG_M68030)      =
+cpuflags-$(CONFIG_M68020)      =
+cpuflags-$(CONFIG_M68000)      = -m68000
+cpuflags-$(CONFIG_M5441x)      = $(call cc-option,-mcpu=54455,-mcfv4e)
+cpuflags-$(CONFIG_M54xx)       = $(call cc-option,-mcpu=5475,-m5200)
+cpuflags-$(CONFIG_M5407)       = $(call cc-option,-mcpu=5407,-m5200)
+cpuflags-$(CONFIG_M532x)       = $(call cc-option,-mcpu=532x,-m5307)
+cpuflags-$(CONFIG_M537x)       = $(call cc-option,-mcpu=537x,-m5307)
+cpuflags-$(CONFIG_M5307)       = $(call cc-option,-mcpu=5307,-m5200)
+cpuflags-$(CONFIG_M528x)       = $(call cc-option,-mcpu=528x,-m5307)
+cpuflags-$(CONFIG_M5275)       = $(call cc-option,-mcpu=5275,-m5307)
+cpuflags-$(CONFIG_M5272)       = $(call cc-option,-mcpu=5272,-m5307)
+cpuflags-$(CONFIG_M5271)       = $(call cc-option,-mcpu=5271,-m5307)
+cpuflags-$(CONFIG_M523x)       = $(call cc-option,-mcpu=523x,-m5307)
+cpuflags-$(CONFIG_M525x)       = $(call cc-option,-mcpu=5253,-m5200)
+cpuflags-$(CONFIG_M5249)       = $(call cc-option,-mcpu=5249,-m5200)
+cpuflags-$(CONFIG_M520x)       = $(call cc-option,-mcpu=5208,-m5200)
+cpuflags-$(CONFIG_M5206e)      = $(call cc-option,-mcpu=5206e,-m5200)
+cpuflags-$(CONFIG_M5206)       = $(call cc-option,-mcpu=5206,-m5200)
+
+# Evaluate tune cc-option calls now
+cpuflags-y := $(cpuflags-y)
 
 KBUILD_AFLAGS += $(cpuflags-y)
 KBUILD_CFLAGS += $(cpuflags-y)
@@ -67,9 +70,8 @@ ifdef CONFIG_MMU
 KBUILD_CFLAGS += -fno-strength-reduce -ffixed-a2
 else
 # we can use a m68k-linux-gcc toolchain with these in place
-KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
-KBUILD_CFLAGS += -D__uClinux__
-KBUILD_AFLAGS += -D__uClinux__
+KBUILD_CPPFLAGS += -DUTS_SYSNAME=\"uClinux\"
+KBUILD_CPPFLAGS += -D__uClinux__
 endif
 
 KBUILD_LDFLAGS := -m m68kelf
@@ -97,27 +99,9 @@ head-$(CONFIG_SUN3)          := arch/m68k/kernel/sun3-head.o
 head-$(CONFIG_M68000)          := arch/m68k/68000/head.o
 head-$(CONFIG_COLDFIRE)                := arch/m68k/coldfire/head.o
 
-core-y                         += arch/m68k/kernel/    arch/m68k/mm/
+core-y                         += arch/m68k/
 libs-y                         += arch/m68k/lib/
 
-core-$(CONFIG_Q40)             += arch/m68k/q40/
-core-$(CONFIG_AMIGA)           += arch/m68k/amiga/
-core-$(CONFIG_ATARI)           += arch/m68k/atari/
-core-$(CONFIG_MAC)             += arch/m68k/mac/
-core-$(CONFIG_HP300)           += arch/m68k/hp300/
-core-$(CONFIG_APOLLO)          += arch/m68k/apollo/
-core-$(CONFIG_MVME147)         += arch/m68k/mvme147/
-core-$(CONFIG_MVME16x)         += arch/m68k/mvme16x/
-core-$(CONFIG_BVME6000)                += arch/m68k/bvme6000/
-core-$(CONFIG_SUN3X)           += arch/m68k/sun3x/     arch/m68k/sun3/
-core-$(CONFIG_SUN3)            += arch/m68k/sun3/      arch/m68k/sun3/prom/
-core-$(CONFIG_NATFEAT)         += arch/m68k/emu/
-core-$(CONFIG_M68040)          += arch/m68k/fpsp040/
-core-$(CONFIG_M68060)          += arch/m68k/ifpsp060/
-core-$(CONFIG_M68KFPU_EMU)     += arch/m68k/math-emu/
-core-$(CONFIG_M68000)          += arch/m68k/68000/
-core-$(CONFIG_COLDFIRE)                += arch/m68k/coldfire/
-
 
 all:   zImage
 
@@ -154,8 +138,7 @@ else
        $(KBZIP2) -1c vmlinux >vmlinux.bz2
 endif
 
-archclean:
-       rm -f vmlinux.gz vmlinux.bz2
+CLEAN_FILES += vmlinux.gz vmlinux.bz2
 
 archheaders:
        $(Q)$(MAKE) $(build)=arch/m68k/kernel/syscalls all
index 888b75e..f9f4fa5 100644 (file)
@@ -594,6 +594,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -615,6 +616,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -643,6 +645,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 4530384..f4828e8 100644 (file)
@@ -550,6 +550,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -571,6 +572,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -599,6 +601,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index de824c1..e7911f1 100644 (file)
@@ -572,6 +572,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -593,6 +594,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -621,6 +623,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 071839c..d574e43 100644 (file)
@@ -543,6 +543,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -564,6 +565,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -592,6 +594,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 37ac7b0..c7ce206 100644 (file)
@@ -552,6 +552,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -573,6 +574,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -601,6 +603,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 6087798..522dcf6 100644 (file)
@@ -574,6 +574,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -595,6 +596,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -623,6 +625,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 0abb53c..2433409 100644 (file)
@@ -660,6 +660,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -681,6 +682,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -709,6 +711,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index cb14c23..5568aa7 100644 (file)
@@ -542,6 +542,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -563,6 +564,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -591,6 +593,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index e8a1920..5b1e72c 100644 (file)
@@ -543,6 +543,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -564,6 +565,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -592,6 +594,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 2cbf416..c3a3dcf 100644 (file)
@@ -561,6 +561,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -582,6 +583,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -610,6 +612,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index fed3cc7..3c00e52 100644 (file)
@@ -545,6 +545,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -566,6 +567,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -593,6 +595,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index 0954fde..241242d 100644 (file)
@@ -544,6 +544,7 @@ CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -565,6 +566,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC32_SELFTEST=m
 CONFIG_CRC64=m
 CONFIG_XZ_DEC_TEST=m
@@ -593,6 +595,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
index c3a6304..92d26c8 100644 (file)
@@ -59,9 +59,9 @@ struct nfhd_device {
        struct gendisk *disk;
 };
 
-static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t nfhd_submit_bio(struct bio *bio)
 {
-       struct nfhd_device *dev = queue->queuedata;
+       struct nfhd_device *dev = bio->bi_disk->private_data;
        struct bio_vec bvec;
        struct bvec_iter iter;
        int dir, len, shift;
@@ -93,6 +93,7 @@ static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 
 static const struct block_device_operations nfhd_ops = {
        .owner  = THIS_MODULE,
+       .submit_bio = nfhd_submit_bio,
        .getgeo = nfhd_getgeo,
 };
 
@@ -118,11 +119,10 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
        dev->bsize = bsize;
        dev->bshift = ffs(bsize) - 10;
 
-       dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE);
+       dev->queue = blk_alloc_queue(NUMA_NO_NODE);
        if (dev->queue == NULL)
                goto free_dev;
 
-       dev->queue->queuedata = dev;
        blk_queue_logical_block_size(dev->queue, bsize);
 
        dev->disk = alloc_disk(16);
index 47228b0..756c5cc 100644 (file)
@@ -16,8 +16,6 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-#define ATOMIC_INIT(i) { (i) }
-
 #define atomic_read(v)         READ_ONCE((v)->counter)
 #define atomic_set(v, i)       WRITE_ONCE(((v)->counter), (i))
 
index 8a6dc6e..911826e 100644 (file)
        ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; })
 
 #define rom_out_8(addr, b)     \
-       ({u8 __w, __v = (b);  u32 _addr = ((u32) (addr)); \
+       ({u8 __maybe_unused __w, __v = (b);  u32 _addr = ((u32) (addr)); \
        __w = ((*(__force volatile u8 *)  ((_addr | 0x10000) + (__v<<1)))); })
 #define rom_out_be16(addr, w)  \
-       ({u16 __w, __v = (w); u32 _addr = ((u32) (addr)); \
+       ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
        __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \
        __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); })
 #define rom_out_le16(addr, w)  \
-       ({u16 __w, __v = (w); u32 _addr = ((u32) (addr)); \
+       ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
        __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \
        __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); })
 
index b3ff395..fc034fd 100644 (file)
 #define        FMT4SIZE        0
 #else
 #define        FORMAT          0
-#define        FMT4SIZE        sizeof(((struct frame *)0)->un.fmt4)
+#define        FMT4SIZE        sizeof_field(struct frame, un.fmt4)
 #endif
 
 static const int frame_size_change[16] = {
-  [1]  = -1, /* sizeof(((struct frame *)0)->un.fmt1), */
-  [2]  = sizeof(((struct frame *)0)->un.fmt2),
-  [3]  = sizeof(((struct frame *)0)->un.fmt3),
+  [1]  = -1, /* sizeof_field(struct frame, un.fmt1), */
+  [2]  = sizeof_field(struct frame, un.fmt2),
+  [3]  = sizeof_field(struct frame, un.fmt3),
   [4]  = FMT4SIZE,
-  [5]  = -1, /* sizeof(((struct frame *)0)->un.fmt5), */
-  [6]  = -1, /* sizeof(((struct frame *)0)->un.fmt6), */
-  [7]  = sizeof(((struct frame *)0)->un.fmt7),
-  [8]  = -1, /* sizeof(((struct frame *)0)->un.fmt8), */
-  [9]  = sizeof(((struct frame *)0)->un.fmt9),
-  [10] = sizeof(((struct frame *)0)->un.fmta),
-  [11] = sizeof(((struct frame *)0)->un.fmtb),
-  [12] = -1, /* sizeof(((struct frame *)0)->un.fmtc), */
-  [13] = -1, /* sizeof(((struct frame *)0)->un.fmtd), */
-  [14] = -1, /* sizeof(((struct frame *)0)->un.fmte), */
-  [15] = -1, /* sizeof(((struct frame *)0)->un.fmtf), */
+  [5]  = -1, /* sizeof_field(struct frame, un.fmt5), */
+  [6]  = -1, /* sizeof_field(struct frame, un.fmt6), */
+  [7]  = sizeof_field(struct frame, un.fmt7),
+  [8]  = -1, /* sizeof_field(struct frame, un.fmt8), */
+  [9]  = sizeof_field(struct frame, un.fmt9),
+  [10] = sizeof_field(struct frame, un.fmta),
+  [11] = sizeof_field(struct frame, un.fmtb),
+  [12] = -1, /* sizeof_field(struct frame, un.fmtc), */
+  [13] = -1, /* sizeof_field(struct frame, un.fmtd), */
+  [14] = -1, /* sizeof_field(struct frame, un.fmte), */
+  [15] = -1, /* sizeof_field(struct frame, un.fmtf), */
 };
 
 static inline int frame_extra_sizes(int f)
@@ -651,7 +651,7 @@ static int mangle_kernel_stack(struct pt_regs *regs, int formatvec,
        } else {
                struct switch_stack *sw = (struct switch_stack *)regs - 1;
                /* yes, twice as much as max(sizeof(frame.un.fmt<x>)) */
-               unsigned long buf[sizeof(((struct frame *)0)->un) / 2];
+               unsigned long buf[sizeof_field(struct frame, un) / 2];
 
                /* that'll make sure that expansion won't crap over data */
                if (copy_from_user(buf + fsize / 4, fp, fsize))
index d3775af..c669a76 100644 (file)
@@ -183,7 +183,7 @@ static __inline__ void iop_writeb(volatile struct mac_iop *iop, __u16 addr, __u8
 
 static __inline__ void iop_stop(volatile struct mac_iop *iop)
 {
-       iop->status_ctrl &= ~IOP_RUN;
+       iop->status_ctrl = IOP_AUTOINC;
 }
 
 static __inline__ void iop_start(volatile struct mac_iop *iop)
@@ -191,14 +191,9 @@ static __inline__ void iop_start(volatile struct mac_iop *iop)
        iop->status_ctrl = IOP_RUN | IOP_AUTOINC;
 }
 
-static __inline__ void iop_bypass(volatile struct mac_iop *iop)
-{
-       iop->status_ctrl |= IOP_BYPASS;
-}
-
 static __inline__ void iop_interrupt(volatile struct mac_iop *iop)
 {
-       iop->status_ctrl |= IOP_IRQ;
+       iop->status_ctrl = IOP_IRQ | IOP_RUN | IOP_AUTOINC;
 }
 
 static int iop_alive(volatile struct mac_iop *iop)
@@ -244,7 +239,6 @@ void __init iop_preinit(void)
                } else {
                        iop_base[IOP_NUM_SCC] = (struct mac_iop *) SCC_IOP_BASE_QUADRA;
                }
-               iop_base[IOP_NUM_SCC]->status_ctrl = 0x87;
                iop_scc_present = 1;
        } else {
                iop_base[IOP_NUM_SCC] = NULL;
@@ -256,7 +250,7 @@ void __init iop_preinit(void)
                } else {
                        iop_base[IOP_NUM_ISM] = (struct mac_iop *) ISM_IOP_BASE_QUADRA;
                }
-               iop_base[IOP_NUM_ISM]->status_ctrl = 0;
+               iop_stop(iop_base[IOP_NUM_ISM]);
                iop_ism_present = 1;
        } else {
                iop_base[IOP_NUM_ISM] = NULL;
@@ -353,8 +347,8 @@ void iop_complete_message(struct iop_msg *msg)
        int chan = msg->channel;
        int i,offset;
 
-       iop_pr_debug("msg %p iop_num %d channel %d\n", msg, msg->iop_num,
-                    msg->channel);
+       iop_pr_debug("iop_num %d chan %d reply %*ph\n",
+                    msg->iop_num, msg->channel, IOP_MSG_LEN, msg->reply);
 
        offset = IOP_ADDR_RECV_MSG + (msg->channel * IOP_MSG_LEN);
 
@@ -378,6 +372,9 @@ static void iop_do_send(struct iop_msg *msg)
        volatile struct mac_iop *iop = iop_base[msg->iop_num];
        int i,offset;
 
+       iop_pr_debug("iop_num %d chan %d message %*ph\n",
+                    msg->iop_num, msg->channel, IOP_MSG_LEN, msg->message);
+
        offset = IOP_ADDR_SEND_MSG + (msg->channel * IOP_MSG_LEN);
 
        for (i = 0 ; i < IOP_MSG_LEN ; i++, offset++) {
@@ -400,8 +397,6 @@ static void iop_handle_send(uint iop_num, uint chan)
        struct iop_msg *msg;
        int i,offset;
 
-       iop_pr_debug("iop_num %d chan %d\n", iop_num, chan);
-
        iop_writeb(iop, IOP_ADDR_SEND_STATE + chan, IOP_MSG_IDLE);
 
        if (!(msg = iop_send_queue[iop_num][chan])) return;
@@ -411,11 +406,15 @@ static void iop_handle_send(uint iop_num, uint chan)
        for (i = 0 ; i < IOP_MSG_LEN ; i++, offset++) {
                msg->reply[i] = iop_readb(iop, offset);
        }
+       iop_pr_debug("iop_num %d chan %d reply %*ph\n",
+                    iop_num, chan, IOP_MSG_LEN, msg->reply);
+
        if (msg->handler) (*msg->handler)(msg);
        msg->status = IOP_MSGSTATUS_UNUSED;
        msg = msg->next;
        iop_send_queue[iop_num][chan] = msg;
-       if (msg) iop_do_send(msg);
+       if (msg && iop_readb(iop, IOP_ADDR_SEND_STATE + chan) == IOP_MSG_IDLE)
+               iop_do_send(msg);
 }
 
 /*
@@ -429,8 +428,6 @@ static void iop_handle_recv(uint iop_num, uint chan)
        int i,offset;
        struct iop_msg *msg;
 
-       iop_pr_debug("iop_num %d chan %d\n", iop_num, chan);
-
        msg = iop_get_unused_msg();
        msg->iop_num = iop_num;
        msg->channel = chan;
@@ -442,6 +439,8 @@ static void iop_handle_recv(uint iop_num, uint chan)
        for (i = 0 ; i < IOP_MSG_LEN ; i++, offset++) {
                msg->message[i] = iop_readb(iop, offset);
        }
+       iop_pr_debug("iop_num %d chan %d message %*ph\n",
+                    iop_num, chan, IOP_MSG_LEN, msg->message);
 
        iop_writeb(iop, IOP_ADDR_RECV_STATE + chan, IOP_MSG_RCVD);
 
@@ -451,9 +450,7 @@ static void iop_handle_recv(uint iop_num, uint chan)
        if (msg->handler) {
                (*msg->handler)(msg);
        } else {
-               iop_pr_debug("unclaimed message on iop_num %d chan %d\n",
-                            iop_num, chan);
-               iop_pr_debug("%*ph\n", IOP_MSG_LEN, msg->message);
+               memset(msg->reply, 0, IOP_MSG_LEN);
                iop_complete_message(msg);
        }
 }
@@ -489,16 +486,12 @@ int iop_send_message(uint iop_num, uint chan, void *privdata,
 
        if (!(q = iop_send_queue[iop_num][chan])) {
                iop_send_queue[iop_num][chan] = msg;
+               iop_do_send(msg);
        } else {
                while (q->next) q = q->next;
                q->next = msg;
        }
 
-       if (iop_readb(iop_base[iop_num],
-           IOP_ADDR_SEND_STATE + chan) == IOP_MSG_IDLE) {
-               iop_do_send(msg);
-       }
-
        return 0;
 }
 
@@ -567,35 +560,34 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id)
        int i,state;
        u8 events = iop->status_ctrl & (IOP_INT0 | IOP_INT1);
 
-       iop_pr_debug("status %02X\n", iop->status_ctrl);
-
        do {
+               iop_pr_debug("iop_num %d status %02X\n", iop_num,
+                            iop->status_ctrl);
+
                /* INT0 indicates state change on an outgoing message channel */
                if (events & IOP_INT0) {
                        iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC;
-                       iop_pr_debug("new status %02X, send states",
-                                    iop->status_ctrl);
                        for (i = 0; i < NUM_IOP_CHAN; i++) {
                                state = iop_readb(iop, IOP_ADDR_SEND_STATE + i);
-                               iop_pr_cont(" %02X", state);
                                if (state == IOP_MSG_COMPLETE)
                                        iop_handle_send(iop_num, i);
+                               else if (state != IOP_MSG_IDLE)
+                                       iop_pr_debug("chan %d send state %02X\n",
+                                                    i, state);
                        }
-                       iop_pr_cont("\n");
                }
 
                /* INT1 for incoming messages */
                if (events & IOP_INT1) {
                        iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC;
-                       iop_pr_debug("new status %02X, recv states",
-                                    iop->status_ctrl);
                        for (i = 0; i < NUM_IOP_CHAN; i++) {
                                state = iop_readb(iop, IOP_ADDR_RECV_STATE + i);
-                               iop_pr_cont(" %02X", state);
                                if (state == IOP_MSG_NEW)
                                        iop_handle_recv(iop_num, i);
+                               else if (state != IOP_MSG_IDLE)
+                                       iop_pr_debug("chan %d recv state %02X\n",
+                                                    i, state);
                        }
-                       iop_pr_cont("\n");
                }
 
                events = iop->status_ctrl & (IOP_INT0 | IOP_INT1);
index 9960c46..4e99e17 100644 (file)
@@ -5,4 +5,4 @@
 
 obj-y  := sun3ints.o sun3dvma.o idprom.o
 
-obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o
+obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o prom/
index e5ac883..f904084 100644 (file)
@@ -45,7 +45,6 @@ static __always_inline type pfx##_xchg(pfx##_t *v, type n)            \
        return xchg(&v->counter, n);                                    \
 }
 
-#define ATOMIC_INIT(i)         { (i) }
 ATOMIC_OPS(atomic, int)
 
 #ifdef CONFIG_64BIT
index 5958217..9b3cc77 100644 (file)
@@ -728,6 +728,7 @@ err_free_resource:
        pci_free_resource_list(&host->windows);
 err_remove_domain:
        irq_domain_remove(domain);
+       irq_domain_free_fwnode(fn);
        return err;
 }
 
@@ -735,8 +736,10 @@ static int bridge_remove(struct platform_device *pdev)
 {
        struct pci_bus *bus = platform_get_drvdata(pdev);
        struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
+       struct fwnode_handle *fn = bc->domain->fwnode;
 
        irq_domain_remove(bc->domain);
+       irq_domain_free_fwnode(fn);
        pci_lock_rescan_remove();
        pci_stop_root_bus(bus);
        pci_remove_root_bus(bus);
index 6dd4171..0386232 100644 (file)
@@ -136,8 +136,6 @@ ATOMIC_OPS(xor, ^=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define ATOMIC_INIT(i) { (i) }
-
 #ifdef CONFIG_64BIT
 
 #define ATOMIC64_INIT(i) { (i) }
index 498785f..0311c3c 100644 (file)
@@ -11,8 +11,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define ATOMIC_INIT(i)         { (i) }
-
 /*
  * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
new file mode 100644 (file)
index 0000000..1625888
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef _ASM_POWERPC_DTL_H
+#define _ASM_POWERPC_DTL_H
+
+#include <asm/lppaca.h>
+#include <linux/spinlock_types.h>
+
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+       u8      dispatch_reason;
+       u8      preempt_reason;
+       __be16  processor_id;
+       __be32  enqueue_to_dispatch_time;
+       __be32  ready_to_enqueue_time;
+       __be32  waiting_to_ready_time;
+       __be64  timebase;
+       __be64  fault_addr;
+       __be64  srr0;
+       __be64  srr1;
+};
+
+#define DISPATCH_LOG_BYTES     4096    /* bytes per cpu */
+#define N_DISPATCH_LOG         (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
+/*
+ * Dispatch trace log event enable mask:
+ *   0x1: voluntary virtual processor waits
+ *   0x2: time-slice preempts
+ *   0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE           0x1
+#define DTL_LOG_PREEMPT                0x2
+#define DTL_LOG_FAULT          0x4
+#define DTL_LOG_ALL            (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
+extern struct kmem_cache *dtl_cache;
+extern rwlock_t dtl_access_lock;
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log.  If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif /* _ASM_POWERPC_DTL_H */
index 3b4b305..c390ec3 100644 (file)
@@ -42,7 +42,6 @@
  */
 #include <linux/cache.h>
 #include <linux/threads.h>
-#include <linux/spinlock_types.h>
 #include <asm/types.h>
 #include <asm/mmu.h>
 #include <asm/firmware.h>
@@ -146,49 +145,6 @@ struct slb_shadow {
        } save_area[SLB_NUM_BOLTED];
 } ____cacheline_aligned;
 
-/*
- * Layout of entries in the hypervisor's dispatch trace log buffer.
- */
-struct dtl_entry {
-       u8      dispatch_reason;
-       u8      preempt_reason;
-       __be16  processor_id;
-       __be32  enqueue_to_dispatch_time;
-       __be32  ready_to_enqueue_time;
-       __be32  waiting_to_ready_time;
-       __be64  timebase;
-       __be64  fault_addr;
-       __be64  srr0;
-       __be64  srr1;
-};
-
-#define DISPATCH_LOG_BYTES     4096    /* bytes per cpu */
-#define N_DISPATCH_LOG         (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
-
-/*
- * Dispatch trace log event enable mask:
- *   0x1: voluntary virtual processor waits
- *   0x2: time-slice preempts
- *   0x4: virtual partition memory page faults
- */
-#define DTL_LOG_CEDE           0x1
-#define DTL_LOG_PREEMPT                0x2
-#define DTL_LOG_FAULT          0x4
-#define DTL_LOG_ALL            (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
-
-extern struct kmem_cache *dtl_cache;
-extern rwlock_t dtl_access_lock;
-
-/*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
- * reading from the dispatch trace log.  If other code wants to consume
- * DTL entries, it can set this pointer to a function that will get
- * called once for each DTL entry that gets processed.
- */
-extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
-
-extern void register_dtl_buffer(int cpu);
-extern void alloc_dtl_buffers(unsigned long *time_limit);
 extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
 
 #endif /* CONFIG_PPC_BOOK3S */
index 45a839a..84b2564 100644 (file)
@@ -29,7 +29,6 @@
 #include <asm/hmi.h>
 #include <asm/cpuidle.h>
 #include <asm/atomic.h>
-#include <asm/rtas-types.h>
 
 #include <asm-generic/mmiowb_types.h>
 
@@ -53,6 +52,7 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
 #define get_slb_shadow()       (get_paca()->slb_shadow_ptr)
 
 struct task_struct;
+struct rtas_args;
 
 /*
  * Defines the layout of the paca.
index 0fc8bad..446e54c 100644 (file)
@@ -3072,10 +3072,18 @@ do_hash_page:
        ori     r0,r0,DSISR_BAD_FAULT_64S@l
        and.    r0,r5,r0                /* weird error? */
        bne-    handle_page_fault       /* if not, try to insert a HPTE */
+
+       /*
+        * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
+        * don't call hash_page, just fail the fault. This is required to
+        * prevent re-entrancy problems in the hash code, namely perf
+        * interrupts hitting while something holds H_PAGE_BUSY, and taking a
+        * hash fault. See the comment in hash_preload().
+        */
        ld      r11, PACA_THREAD_INFO(r13)
-       lwz     r0,TI_PREEMPT(r11)      /* If we're in an "NMI" */
-       andis.  r0,r0,NMI_MASK@h        /* (i.e. an irq when soft-disabled) */
-       bne     77f                     /* then don't call hash_page now */
+       lwz     r0,TI_PREEMPT(r11)
+       andis.  r0,r0,NMI_MASK@h
+       bne     77f
 
        /*
         * r3 contains the trap number
index 6fcae43..f85539e 100644 (file)
@@ -183,6 +183,8 @@ static inline unsigned long read_spurr(unsigned long tb)
 
 #ifdef CONFIG_PPC_SPLPAR
 
+#include <asm/dtl.h>
+
 /*
  * Scan the dispatch trace log and count up the stolen time.
  * Should be called with interrupts disabled.
index 6bf6664..ebb04f3 100644 (file)
@@ -74,6 +74,7 @@
 #include <asm/hw_breakpoint.h>
 #include <asm/kvm_book3s_uvmem.h>
 #include <asm/ultravisor.h>
+#include <asm/dtl.h>
 
 #include "book3s.h"
 
index 468169e..9b9f92a 100644 (file)
@@ -1559,6 +1559,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
        pgd_t *pgdir;
        int rc, ssize, update_flags = 0;
        unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
+       unsigned long flags;
 
        BUG_ON(get_region_id(ea) != USER_REGION_ID);
 
@@ -1592,6 +1593,28 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
                return;
 #endif /* CONFIG_PPC_64K_PAGES */
 
+       /*
+        * __hash_page_* must run with interrupts off, as it sets the
+        * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any
+        * time and may take a hash fault reading the user stack, see
+        * read_user_stack_slow() in the powerpc/perf code.
+        *
+        * If that takes a hash fault on the same page as we lock here, it
+        * will bail out when seeing H_PAGE_BUSY set, and retry the access
+        * leading to an infinite loop.
+        *
+        * Disabling interrupts here does not prevent perf interrupts, but it
+        * will prevent them taking hash faults (see the NMI test in
+        * do_hash_page), then read_user_stack's copy_from_user_nofault will
+        * fail and perf will fall back to read_user_stack_slow(), which
+        * walks the Linux page tables.
+        *
+        * Interrupts must also be off for the duration of the
+        * mm_is_thread_local test and update, to prevent preempt running the
+        * mm on another CPU (XXX: this may be racy vs kthread_use_mm).
+        */
+       local_irq_save(flags);
+
        /* Is that local to this CPU ? */
        if (mm_is_thread_local(mm))
                update_flags |= HPTE_LOCAL_UPDATE;
@@ -1614,6 +1637,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
                                   mm_ctx_user_psize(&mm->context),
                                   mm_ctx_user_psize(&mm->context),
                                   pte_val(*ptep));
+
+       local_irq_restore(flags);
 }
 
 /*
index cd6a742..01d7028 100644 (file)
@@ -2179,6 +2179,12 @@ static void __perf_event_interrupt(struct pt_regs *regs)
 
        perf_read_regs(regs);
 
+       /*
+        * If perf interrupts hit in a local_irq_disable (soft-masked) region,
+        * we consider them as NMIs. This is required to prevent hash faults on
+        * user addresses when reading callchains. See the NMI test in
+        * do_hash_page.
+        */
        nmi = perf_intr_is_nmi(regs);
        if (nmi)
                nmi_enter();
index eab8aa2..982f069 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/smp.h>
 #include <linux/uaccess.h>
 #include <asm/firmware.h>
+#include <asm/dtl.h>
 #include <asm/lppaca.h>
 #include <asm/debugfs.h>
 #include <asm/plpar_wrappers.h>
index fd26f3d..f71ff2c 100644 (file)
@@ -40,6 +40,7 @@
 #include <asm/fadump.h>
 #include <asm/asm-prototypes.h>
 #include <asm/debugfs.h>
+#include <asm/dtl.h>
 
 #include "pseries.h"
 
index 2db8469..27094c8 100644 (file)
@@ -70,6 +70,7 @@
 #include <asm/idle.h>
 #include <asm/swiotlb.h>
 #include <asm/svm.h>
+#include <asm/dtl.h>
 
 #include "pseries.h"
 #include "../../../../drivers/pci/pci.h"
index 40c0637..e6d7a34 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/svm.h>
 #include <asm/swiotlb.h>
 #include <asm/ultravisor.h>
+#include <asm/dtl.h>
 
 static int __init init_svm(void)
 {
index 96f95c9..400a8c8 100644 (file)
@@ -19,8 +19,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define ATOMIC_INIT(i) { (i) }
-
 #define __atomic_acquire_fence()                                       \
        __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
 
index c8e8186..3099362 100644 (file)
@@ -4,6 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
 #include <asm/unistd.h>
 #include <asm/csr.h>
 #include <uapi/linux/time.h>
index c7d7ede..9cfd8de 100644 (file)
@@ -102,7 +102,6 @@ config S390
        select ARCH_INLINE_WRITE_UNLOCK_BH
        select ARCH_INLINE_WRITE_UNLOCK_IRQ
        select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
-       select ARCH_KEEP_MEMBLOCK
        select ARCH_STACKWALK
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_NUMA_BALANCING
@@ -126,6 +125,7 @@ config S390
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN
        select HAVE_ARCH_KASAN_VMALLOC
+       select CLOCKSOURCE_VALIDATE_LAST_CYCLE
        select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_SOFT_DIRTY
@@ -145,6 +145,7 @@ config S390
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        select HAVE_FENTRY
        select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_FUNCTION_ERROR_INJECTION
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
        select HAVE_FUTEX_CMPXCHG if FUTEX
@@ -626,10 +627,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
        def_bool y
 
-config FORCE_MAX_ZONEORDER
-       int
-       default "9"
-
 config MAX_PHYSMEM_BITS
        int "Maximum size of supported physical memory in bits (42-53)"
        range 42 53
index 5503217..a363d30 100644 (file)
@@ -129,8 +129,7 @@ static void appldata_get_os_data(void *data)
 
        os_data->nr_cpus = j;
 
-       new_size = sizeof(struct appldata_os_data) +
-                  (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
+       new_size = struct_size(os_data, os_cpu, os_data->nr_cpus);
        if (ops.size != new_size) {
                if (ops.active) {
                        rc = appldata_diag(APPLDATA_RECORD_OS_ID,
@@ -165,8 +164,7 @@ static int __init appldata_os_init(void)
 {
        int rc, max_size;
 
-       max_size = sizeof(struct appldata_os_data) +
-                  (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
+       max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus());
        if (max_size > APPLDATA_MAX_REC_SIZE) {
                pr_err("Maximum OS record size %i exceeds the maximum "
                       "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
new file mode 100644 (file)
index 0000000..11f615e
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_CONST_H
+#define _ASM_S390_ASM_CONST_H
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)  __VA_ARGS__
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)        #__VA_ARGS__
+#  define stringify_in_c(...)  __stringify_in_c(__VA_ARGS__) " "
+#endif
+#endif /* _ASM_S390_ASM_CONST_H */
index 491ad53..cae473a 100644 (file)
@@ -15,8 +15,6 @@
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
 
-#define ATOMIC_INIT(i)  { (i) }
-
 static inline int atomic_read(const atomic_t *v)
 {
        int c;
index 3101340..17a2626 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/time.h>
 #include <linux/refcount.h>
-#include <uapi/asm/debug.h>
+#include <linux/fs.h>
 
 #define DEBUG_MAX_LEVEL                   6  /* debug levels range from 0 to 6 */
 #define DEBUG_OFF_LEVEL                   -1 /* level where debug is switched off */
 #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
                                              /* the entry information */
 
+#define __DEBUG_FEATURE_VERSION           2  /* version of debug feature */
+
+struct __debug_entry {
+       union {
+               struct {
+                       unsigned long clock     : 52;
+                       unsigned long exception :  1;
+                       unsigned long level     :  3;
+                       unsigned long cpuid     :  8;
+               } fields;
+               unsigned long stck;
+       } id;
+       void *caller;
+} __packed;
+
 typedef struct __debug_entry debug_entry_t;
 
 struct debug_view;
@@ -82,7 +97,6 @@ struct debug_view {
 };
 
 extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
 extern struct debug_view debug_sprintf_view;
 
 /* do NOT use the _common functions */
index ae27f75..3beb294 100644 (file)
@@ -1,12 +1,20 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __S390_EXTABLE_H
 #define __S390_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of three addresses:
+ *
+ * - Address of an instruction that is allowed to fault.
+ * - Address at which the program should continue.
+ * - Optional address of handler that takes pt_regs * argument and runs in
+ *   interrupt context.
+ *
+ * No registers are modified, so it is entirely up to the continuation code
+ * to figure out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -17,6 +25,7 @@
 struct exception_table_entry
 {
        int insn, fixup;
+       long handler;
 };
 
 extern struct exception_table_entry *__start_dma_ex_table;
@@ -29,6 +38,39 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
        return (unsigned long)&x->fixup + x->fixup;
 }
 
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+                            struct pt_regs *);
+
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+       if (likely(!x->handler))
+               return NULL;
+       return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+static inline bool ex_handle(const struct exception_table_entry *x,
+                            struct pt_regs *regs)
+{
+       ex_handler_t handler = ex_fixup_handler(x);
+
+       if (unlikely(handler))
+               return handler(x, regs);
+       regs->psw.addr = extable_fixup(x);
+       return true;
+}
+
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+                                      struct exception_table_entry *b,
+                                      struct exception_table_entry tmp,
+                                      int delta)
+{
+       a->fixup = b->fixup + delta;
+       b->fixup = tmp.fixup - delta;
+       a->handler = b->handler + delta;
+       b->handler = tmp.handler - delta;
+}
+
 #endif
index 7f22262..a0a7a2c 100644 (file)
@@ -2,38 +2,27 @@
 #ifndef __ASM_LINKAGE_H
 #define __ASM_LINKAGE_H
 
+#include <asm/asm-const.h>
 #include <linux/stringify.h>
 
 #define __ALIGN .align 4, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 
-#ifndef __ASSEMBLY__
-
 /*
  * Helper macro for exception table entries
  */
-#define EX_TABLE(_fault, _target)      \
-       ".section __ex_table,\"a\"\n"   \
-       ".align 4\n"                    \
-       ".long  (" #_fault ") - .\n"    \
-       ".long  (" #_target ") - .\n"   \
-       ".previous\n"
-
-#else /* __ASSEMBLY__ */
 
-#define EX_TABLE(_fault, _target)      \
-       .section __ex_table,"a" ;       \
-       .align  4 ;                     \
-       .long   (_fault) - . ;          \
-       .long   (_target) - . ;         \
-       .previous
+#define __EX_TABLE(_section, _fault, _target)                          \
+       stringify_in_c(.section _section,"a";)                          \
+       stringify_in_c(.align   8;)                                     \
+       stringify_in_c(.long    (_fault) - .;)                          \
+       stringify_in_c(.long    (_target) - .;)                         \
+       stringify_in_c(.quad    0;)                                     \
+       stringify_in_c(.previous)
 
-#define EX_TABLE_DMA(_fault, _target)  \
-       .section .dma.ex_table, "a" ;   \
-       .align  4 ;                     \
-       .long   (_fault) - . ;          \
-       .long   (_target) - . ;         \
-       .previous
+#define EX_TABLE(_fault, _target)                                      \
+       __EX_TABLE(__ex_table, _fault, _target)
+#define EX_TABLE_DMA(_fault, _target)                                  \
+       __EX_TABLE(.dma.ex_table, _fault, _target)
 
-#endif /* __ASSEMBLY__ */
 #endif
index 419fac7..f62cd3e 100644 (file)
@@ -131,12 +131,6 @@ static inline void validate_st_entry(unsigned long *entry)
        *entry |= ZPCI_TABLE_VALID;
 }
 
-static inline void invalidate_table_entry(unsigned long *entry)
-{
-       *entry &= ~ZPCI_TABLE_VALID_MASK;
-       *entry |= ZPCI_TABLE_INVALID;
-}
-
 static inline void invalidate_pt_entry(unsigned long *entry)
 {
        WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
@@ -173,11 +167,6 @@ static inline int pt_entry_isvalid(unsigned long entry)
        return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
 }
 
-static inline int entry_isprotected(unsigned long entry)
-{
-       return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
-}
-
 static inline unsigned long *get_rt_sto(unsigned long entry)
 {
        return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
index 19d603b..7eb01a5 100644 (file)
@@ -1669,7 +1669,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
 #define kern_addr_valid(addr)   (1)
 
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
-extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern void vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
 extern int s390_enable_skey(void);
 extern void s390_reset_cmma(struct mm_struct *mm);
index f009a13..16b3e43 100644 (file)
@@ -184,5 +184,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
        return regs->gprs[15];
 }
 
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+       regs->gprs[2] = rc;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* _S390_PTRACE_H */
index 7326f11..7e155fb 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <asm/sigp.h>
 #include <asm/lowcore.h>
+#include <asm/processor.h>
 
 #define raw_smp_processor_id() (S390_lowcore.cpu_nr)
 
@@ -54,6 +55,10 @@ static inline int smp_get_base_cpu(int cpu)
        return cpu - (cpu % (smp_cpu_mtid + 1));
 }
 
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+}
+
 extern int smp_rescan_cpus(void);
 extern void __noreturn cpu_die(void);
 extern void __cpu_die(unsigned int cpu);
index 3c3d6fe..1320f42 100644 (file)
@@ -30,7 +30,7 @@
 })
 
 #define __S390_SYS_STUBx(x, name, ...)                                 \
-       asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
+       asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
        ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO);                 \
        asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
        {                                                               \
@@ -46,7 +46,7 @@
 #define COMPAT_SYSCALL_DEFINE0(sname)                                  \
        SYSCALL_METADATA(_##sname, 0);                                  \
        asmlinkage long __s390_compat_sys_##sname(void);                \
-       ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO);       \
+       ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO);        \
        asmlinkage long __s390_compat_sys_##sname(void)
 
 #define SYSCALL_DEFINE0(sname)                                         \
@@ -72,7 +72,7 @@
        asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));        \
        asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
                __attribute__((alias(__stringify(__se_compat_sys##name))));     \
-       ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO);                         \
+       ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO);                  \
        static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
        asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));  \
        asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))   \
index e582fbe..13a04fc 100644 (file)
@@ -24,7 +24,6 @@
 #ifndef __ASSEMBLY__
 #include <asm/lowcore.h>
 #include <asm/page.h>
-#include <asm/processor.h>
 
 #define STACK_INIT_OFFSET \
        (THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
index 6bf3a45..289aaff 100644 (file)
@@ -49,11 +49,6 @@ static inline void set_clock_comparator(__u64 time)
        asm volatile("sckc %0" : : "Q" (time));
 }
 
-static inline void store_clock_comparator(__u64 *time)
-{
-       asm volatile("stckc %0" : "=Q" (*time));
-}
-
 void clock_comparator_work(void);
 
 void __init time_early_init(void);
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
deleted file mode 100644 (file)
index c7c564d..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *   S/390 debug facility
- *
- *    Copyright IBM Corp. 1999, 2000
- */
-
-#ifndef _UAPIDEBUG_H
-#define _UAPIDEBUG_H
-
-#include <linux/fs.h>
-
-/* Note:
- * struct __debug_entry must be defined outside of #ifdef __KERNEL__ 
- * in order to allow a user program to analyze the 'raw'-view.
- */
-
-struct __debug_entry{
-        union {
-                struct {
-                        unsigned long long clock:52;
-                        unsigned long long exception:1;
-                        unsigned long long level:3;
-                        unsigned long long cpuid:8;
-                } fields;
-
-                unsigned long long stck;
-        } id;
-        void* caller;
-} __attribute__((packed));
-
-
-#define __DEBUG_FEATURE_VERSION      2  /* version of debug feature */
-
-#endif /* _UAPIDEBUG_H */
index 5a2177e..22fd202 100644 (file)
  * - length(n_modulus) = inputdatalength
  */
 struct ica_rsa_modexpo {
-       char __user  *inputdata;
-       unsigned int  inputdatalength;
-       char __user  *outputdata;
-       unsigned int  outputdatalength;
-       char __user  *b_key;
-       char __user  *n_modulus;
+       __u8 __user  *inputdata;
+       __u32         inputdatalength;
+       __u8 __user  *outputdata;
+       __u32         outputdatalength;
+       __u8 __user  *b_key;
+       __u8 __user  *n_modulus;
 };
 
 /**
@@ -59,15 +59,15 @@ struct ica_rsa_modexpo {
  * - length(u_mult_inv) = inputdatalength/2 + 8
  */
 struct ica_rsa_modexpo_crt {
-       char __user  *inputdata;
-       unsigned int  inputdatalength;
-       char __user  *outputdata;
-       unsigned int  outputdatalength;
-       char __user  *bp_key;
-       char __user  *bq_key;
-       char __user  *np_prime;
-       char __user  *nq_prime;
-       char __user  *u_mult_inv;
+       __u8 __user  *inputdata;
+       __u32         inputdatalength;
+       __u8 __user  *outputdata;
+       __u32         outputdatalength;
+       __u8 __user  *bp_key;
+       __u8 __user  *bq_key;
+       __u8 __user  *np_prime;
+       __u8 __user  *nq_prime;
+       __u8 __user  *u_mult_inv;
 };
 
 /**
@@ -83,67 +83,67 @@ struct ica_rsa_modexpo_crt {
  *         key block
  */
 struct CPRBX {
-       unsigned short  cprb_len;       /* CPRB length        220        */
-       unsigned char   cprb_ver_id;    /* CPRB version id.   0x02       */
-       unsigned char   pad_000[3];     /* Alignment pad bytes           */
-       unsigned char   func_id[2];     /* function id        0x5432     */
-       unsigned char   cprb_flags[4];  /* Flags                         */
-       unsigned int    req_parml;      /* request parameter buffer len  */
-       unsigned int    req_datal;      /* request data buffer           */
-       unsigned int    rpl_msgbl;      /* reply  message block length   */
-       unsigned int    rpld_parml;     /* replied parameter block len   */
-       unsigned int    rpl_datal;      /* reply data block len          */
-       unsigned int    rpld_datal;     /* replied data block len        */
-       unsigned int    req_extbl;      /* request extension block len   */
-       unsigned char   pad_001[4];     /* reserved                      */
-       unsigned int    rpld_extbl;     /* replied extension block len   */
-       unsigned char   padx000[16 - sizeof(char *)];
-       unsigned char  *req_parmb;      /* request parm block 'address'  */
-       unsigned char   padx001[16 - sizeof(char *)];
-       unsigned char  *req_datab;      /* request data block 'address'  */
-       unsigned char   padx002[16 - sizeof(char *)];
-       unsigned char  *rpl_parmb;      /* reply parm block 'address'    */
-       unsigned char   padx003[16 - sizeof(char *)];
-       unsigned char  *rpl_datab;      /* reply data block 'address'    */
-       unsigned char   padx004[16 - sizeof(char *)];
-       unsigned char  *req_extb;       /* request extension block 'addr'*/
-       unsigned char   padx005[16 - sizeof(char *)];
-       unsigned char  *rpl_extb;       /* reply extension block 'address'*/
-       unsigned short  ccp_rtcode;     /* server return code            */
-       unsigned short  ccp_rscode;     /* server reason code            */
-       unsigned int    mac_data_len;   /* Mac Data Length               */
-       unsigned char   logon_id[8];    /* Logon Identifier              */
-       unsigned char   mac_value[8];   /* Mac Value                     */
-       unsigned char   mac_content_flgs;/* Mac content flag byte        */
-       unsigned char   pad_002;        /* Alignment                     */
-       unsigned short  domain;         /* Domain                        */
-       unsigned char   usage_domain[4];/* Usage domain                  */
-       unsigned char   cntrl_domain[4];/* Control domain                */
-       unsigned char   S390enf_mask[4];/* S/390 enforcement mask        */
-       unsigned char   pad_004[36];    /* reserved                      */
+       __u16        cprb_len;          /* CPRB length        220        */
+       __u8         cprb_ver_id;       /* CPRB version id.   0x02       */
+       __u8         pad_000[3];        /* Alignment pad bytes           */
+       __u8         func_id[2];        /* function id        0x5432     */
+       __u8         cprb_flags[4];     /* Flags                         */
+       __u32        req_parml;         /* request parameter buffer len  */
+       __u32        req_datal;         /* request data buffer           */
+       __u32        rpl_msgbl;         /* reply  message block length   */
+       __u32        rpld_parml;        /* replied parameter block len   */
+       __u32        rpl_datal;         /* reply data block len          */
+       __u32        rpld_datal;        /* replied data block len        */
+       __u32        req_extbl;         /* request extension block len   */
+       __u8         pad_001[4];        /* reserved                      */
+       __u32        rpld_extbl;        /* replied extension block len   */
+       __u8         padx000[16 - sizeof(__u8 *)];
+       __u8 __user *req_parmb;         /* request parm block 'address'  */
+       __u8         padx001[16 - sizeof(__u8 *)];
+       __u8 __user *req_datab;         /* request data block 'address'  */
+       __u8         padx002[16 - sizeof(__u8 *)];
+       __u8 __user *rpl_parmb;         /* reply parm block 'address'    */
+       __u8         padx003[16 - sizeof(__u8 *)];
+       __u8 __user *rpl_datab;         /* reply data block 'address'    */
+       __u8         padx004[16 - sizeof(__u8 *)];
+       __u8 __user *req_extb;          /* request extension block 'addr'*/
+       __u8         padx005[16 - sizeof(__u8 *)];
+       __u8 __user *rpl_extb;          /* reply extension block 'address'*/
+       __u16        ccp_rtcode;        /* server return code            */
+       __u16        ccp_rscode;        /* server reason code            */
+       __u32        mac_data_len;      /* Mac Data Length               */
+       __u8         logon_id[8];       /* Logon Identifier              */
+       __u8         mac_value[8];      /* Mac Value                     */
+       __u8         mac_content_flgs;  /* Mac content flag byte         */
+       __u8         pad_002;           /* Alignment                     */
+       __u16        domain;            /* Domain                        */
+       __u8         usage_domain[4];   /* Usage domain                  */
+       __u8         cntrl_domain[4];   /* Control domain                */
+       __u8         S390enf_mask[4];   /* S/390 enforcement mask        */
+       __u8         pad_004[36];       /* reserved                      */
 } __attribute__((packed));
 
 /**
  * xcRB
  */
 struct ica_xcRB {
-       unsigned short  agent_ID;
-       unsigned int    user_defined;
-       unsigned short  request_ID;
-       unsigned int    request_control_blk_length;
-       unsigned char   padding1[16 - sizeof(char *)];
-       char __user    *request_control_blk_addr;
-       unsigned int    request_data_length;
-       char            padding2[16 - sizeof(char *)];
-       char __user    *request_data_address;
-       unsigned int    reply_control_blk_length;
-       char            padding3[16 - sizeof(char *)];
-       char __user    *reply_control_blk_addr;
-       unsigned int    reply_data_length;
-       char            padding4[16 - sizeof(char *)];
-       char __user    *reply_data_addr;
-       unsigned short  priority_window;
-       unsigned int    status;
+       __u16         agent_ID;
+       __u32         user_defined;
+       __u16         request_ID;
+       __u32         request_control_blk_length;
+       __u8          _padding1[16 - sizeof(__u8 *)];
+       __u8 __user  *request_control_blk_addr;
+       __u32         request_data_length;
+       __u8          _padding2[16 - sizeof(__u8 *)];
+       __u8 __user  *request_data_address;
+       __u32         reply_control_blk_length;
+       __u8          _padding3[16 - sizeof(__u8 *)];
+       __u8 __user  *reply_control_blk_addr;
+       __u32         reply_data_length;
+       __u8          __padding4[16 - sizeof(__u8 *)];
+       __u8 __user  *reply_data_addr;
+       __u16         priority_window;
+       __u32         status;
 } __attribute__((packed));
 
 /**
index f96a585..c42ce34 100644 (file)
@@ -549,8 +549,7 @@ static int get_mem_chunk_cnt(void)
        int cnt = 0;
        u64 idx;
 
-       for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
-                          MEMBLOCK_NONE, NULL, NULL, NULL)
+       for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
                cnt++;
        return cnt;
 }
@@ -563,8 +562,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
        phys_addr_t start, end;
        u64 idx;
 
-       for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
-                          MEMBLOCK_NONE, &start, &end, NULL) {
+       for_each_physmem_range(idx, &oldmem_type, &start, &end) {
                phdr->p_filesz = end - start;
                phdr->p_type = PT_LOAD;
                phdr->p_offset = start;
index 263075a..beb4b44 100644 (file)
@@ -90,27 +90,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
                                size_t user_buf_size, loff_t *offset);
 static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
                                     char *out_buf, const char *in_buf);
-static int debug_raw_format_fn(debug_info_t *id,
-                              struct debug_view *view, char *out_buf,
-                              const char *in_buf);
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
-                              int area, debug_entry_t *entry, char *out_buf);
-
 static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
                                   char *out_buf, debug_sprintf_entry_t *curr_event);
 
 /* globals */
 
-struct debug_view debug_raw_view = {
-       "raw",
-       NULL,
-       &debug_raw_header_fn,
-       &debug_raw_format_fn,
-       NULL,
-       NULL
-};
-EXPORT_SYMBOL(debug_raw_view);
-
 struct debug_view debug_hex_ascii_view = {
        "hex_ascii",
        NULL,
@@ -1385,32 +1369,6 @@ out:
        return rc;              /* number of input characters */
 }
 
-/*
- * prints debug header in raw format
- */
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
-                              int area, debug_entry_t *entry, char *out_buf)
-{
-       int rc;
-
-       rc = sizeof(debug_entry_t);
-       memcpy(out_buf, entry, sizeof(debug_entry_t));
-       return rc;
-}
-
-/*
- * prints debug data in raw format
- */
-static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view,
-                              char *out_buf, const char *in_buf)
-{
-       int rc;
-
-       rc = id->buf_size;
-       memcpy(out_buf, in_buf, id->buf_size);
-       return rc;
-}
-
 /*
  * prints debug data in hex/ascii format
  */
index 969b35b..23edf19 100644 (file)
@@ -370,7 +370,7 @@ EXPORT_SYMBOL(sie_exit)
 
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
+ * are entered with interrupts disabled.
  */
 
 ENTRY(system_call)
index 0d7fbdf..88bb42c 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/sched/cputime.h>
+#include <trace/events/power.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include "entry.h"
@@ -32,11 +33,12 @@ void enabled_wait(void)
                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
        clear_cpu_flag(CIF_NOHZ_DELAY);
 
+       trace_cpu_idle_rcuidle(1, smp_processor_id());
        local_irq_save(flags);
        /* Call the assembler magic in entry.S */
        psw_idle(idle, psw_mask);
        local_irq_restore(flags);
-
+       trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 
        /* Account time spent with enabled wait psw loaded as idle time. */
        write_seqcount_begin(&idle->seqcount);
index 548d0ea..d2a71d8 100644 (file)
@@ -523,10 +523,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
                 * zero, try to fix up.
                 */
                entry = s390_search_extables(regs->psw.addr);
-               if (entry) {
-                       regs->psw.addr = extable_fixup(entry);
+               if (entry && ex_handle(entry, regs))
                        return 1;
-               }
 
                /*
                 * fixup_exception() could not handle it,
index 452502f..3b89597 100644 (file)
@@ -167,7 +167,7 @@ static struct timer_list lgr_timer;
  */
 static void lgr_timer_set(void)
 {
-       mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+       mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
 }
 
 /*
index 07aa15b..0c4194d 100644 (file)
@@ -1127,14 +1127,6 @@ void __init setup_arch(char **cmdline_p)
        free_mem_detect_info();
        remove_oldmem();
 
-       /*
-        * Make sure all chunks are MAX_ORDER aligned so we don't need the
-        * extra checks that HOLES_IN_ZONE would require.
-        *
-        * Is this still required?
-        */
-       memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
-
        if (is_prot_virt_host())
                setup_uv();
        setup_memory_end();
index e6be63f..f685a38 100644 (file)
@@ -1012,10 +1012,6 @@ void __init smp_prepare_boot_cpu(void)
        smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
 }
 
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
 void __init smp_setup_processor_id(void)
 {
        pcpu_devices[0].address = stap();
@@ -1145,6 +1141,7 @@ static int smp_cpu_online(unsigned int cpu)
 
        return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
 }
+
 static int smp_cpu_pre_down(unsigned int cpu)
 {
        struct device *s = &per_cpu(cpu_device, cpu)->dev;
index b1113b5..513e59d 100644 (file)
@@ -237,7 +237,7 @@ static u64 read_tod_clock(struct clocksource *cs)
        preempt_disable(); /* protect from changes to steering parameters */
        now = get_tod_clock();
        adj = tod_steering_end - now;
-       if (unlikely((s64) adj >= 0))
+       if (unlikely((s64) adj > 0))
                /*
                 * manually steer by 1 cycle every 2^16 cycles. This
                 * corresponds to shifting the tod delta by 15. 1s is
@@ -253,7 +253,7 @@ static struct clocksource clocksource_tod = {
        .name           = "tod",
        .rating         = 400,
        .read           = read_tod_clock,
-       .mask           = -1ULL,
+       .mask           = CLOCKSOURCE_MASK(64),
        .mult           = 1000,
        .shift          = 12,
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -669,7 +669,7 @@ static void stp_work_fn(struct work_struct *work)
                 * There is a usable clock but the synchonization failed.
                 * Retry after a second.
                 */
-               mod_timer(&stp_timer, jiffies + HZ);
+               mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
 
 out_unlock:
        mutex_unlock(&stp_work_mutex);
@@ -683,7 +683,7 @@ static struct bus_type stp_subsys = {
        .dev_name       = "stp",
 };
 
-static ssize_t stp_ctn_id_show(struct device *dev,
+static ssize_t ctn_id_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -693,9 +693,9 @@ static ssize_t stp_ctn_id_show(struct device *dev,
                       *(unsigned long long *) stp_info.ctnid);
 }
 
-static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+static DEVICE_ATTR_RO(ctn_id);
 
-static ssize_t stp_ctn_type_show(struct device *dev,
+static ssize_t ctn_type_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -704,9 +704,9 @@ static ssize_t stp_ctn_type_show(struct device *dev,
        return sprintf(buf, "%i\n", stp_info.ctn);
 }
 
-static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+static DEVICE_ATTR_RO(ctn_type);
 
-static ssize_t stp_dst_offset_show(struct device *dev,
+static ssize_t dst_offset_show(struct device *dev,
                                   struct device_attribute *attr,
                                   char *buf)
 {
@@ -715,9 +715,9 @@ static ssize_t stp_dst_offset_show(struct device *dev,
        return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
 }
 
-static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+static DEVICE_ATTR_RO(dst_offset);
 
-static ssize_t stp_leap_seconds_show(struct device *dev,
+static ssize_t leap_seconds_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
 {
@@ -726,9 +726,9 @@ static ssize_t stp_leap_seconds_show(struct device *dev,
        return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
 }
 
-static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+static DEVICE_ATTR_RO(leap_seconds);
 
-static ssize_t stp_stratum_show(struct device *dev,
+static ssize_t stratum_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -737,9 +737,9 @@ static ssize_t stp_stratum_show(struct device *dev,
        return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
 }
 
-static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+static DEVICE_ATTR_RO(stratum);
 
-static ssize_t stp_time_offset_show(struct device *dev,
+static ssize_t time_offset_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -748,9 +748,9 @@ static ssize_t stp_time_offset_show(struct device *dev,
        return sprintf(buf, "%i\n", (int) stp_info.tto);
 }
 
-static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+static DEVICE_ATTR_RO(time_offset);
 
-static ssize_t stp_time_zone_offset_show(struct device *dev,
+static ssize_t time_zone_offset_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -759,10 +759,9 @@ static ssize_t stp_time_zone_offset_show(struct device *dev,
        return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
 }
 
-static DEVICE_ATTR(time_zone_offset, 0400,
-                        stp_time_zone_offset_show, NULL);
+static DEVICE_ATTR_RO(time_zone_offset);
 
-static ssize_t stp_timing_mode_show(struct device *dev,
+static ssize_t timing_mode_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -771,9 +770,9 @@ static ssize_t stp_timing_mode_show(struct device *dev,
        return sprintf(buf, "%i\n", stp_info.tmd);
 }
 
-static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+static DEVICE_ATTR_RO(timing_mode);
 
-static ssize_t stp_timing_state_show(struct device *dev,
+static ssize_t timing_state_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
@@ -782,16 +781,16 @@ static ssize_t stp_timing_state_show(struct device *dev,
        return sprintf(buf, "%i\n", stp_info.tst);
 }
 
-static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+static DEVICE_ATTR_RO(timing_state);
 
-static ssize_t stp_online_show(struct device *dev,
+static ssize_t online_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
 {
        return sprintf(buf, "%i\n", stp_online);
 }
 
-static ssize_t stp_online_store(struct device *dev,
+static ssize_t online_store(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t count)
 {
@@ -817,18 +816,14 @@ static ssize_t stp_online_store(struct device *dev,
  * Can't use DEVICE_ATTR because the attribute should be named
  * stp/online but dev_attr_online already exists in this file ..
  */
-static struct device_attribute dev_attr_stp_online = {
-       .attr = { .name = "online", .mode = 0600 },
-       .show   = stp_online_show,
-       .store  = stp_online_store,
-};
+static DEVICE_ATTR_RW(online);
 
 static struct device_attribute *stp_attributes[] = {
        &dev_attr_ctn_id,
        &dev_attr_ctn_type,
        &dev_attr_dst_offset,
        &dev_attr_leap_seconds,
-       &dev_attr_stp_online,
+       &dev_attr_online,
        &dev_attr_stratum,
        &dev_attr_time_offset,
        &dev_attr_time_zone_offset,
index 332b542..ca47141 100644 (file)
@@ -356,9 +356,9 @@ static atomic_t topology_poll = ATOMIC_INIT(0);
 static void set_topology_timer(void)
 {
        if (atomic_add_unless(&topology_poll, -1, 0))
-               mod_timer(&topology_timer, jiffies + HZ / 10);
+               mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
        else
-               mod_timer(&topology_timer, jiffies + HZ * 60);
+               mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
 }
 
 void topology_expect_change(void)
index ff9cc4c..8d1e8a1 100644 (file)
@@ -50,11 +50,8 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
         } else {
                 const struct exception_table_entry *fixup;
                fixup = s390_search_extables(regs->psw.addr);
-                if (fixup)
-                       regs->psw.addr = extable_fixup(fixup);
-               else {
+               if (!fixup || !ex_handle(fixup, regs))
                        die(regs, str);
-               }
         }
 }
 
@@ -251,7 +248,7 @@ void monitor_event_exception(struct pt_regs *regs)
        case BUG_TRAP_TYPE_NONE:
                fixup = s390_search_extables(regs->psw.addr);
                if (fixup)
-                       regs->psw.addr = extable_fixup(fixup);
+                       ex_handle(fixup, regs);
                break;
        case BUG_TRAP_TYPE_WARN:
                break;
index 28fd66d..6783339 100644 (file)
@@ -14,3 +14,5 @@ KASAN_SANITIZE_uaccess.o := n
 
 obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
 CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c
new file mode 100644 (file)
index 0000000..8c9d4da
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <asm/ptrace.h>
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+       /*
+        * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some
+        * kernel function.
+        */
+       regs->psw.addr = regs->gprs[14];
+}
+NOKPROBE_SYMBOL(override_function_with_return);
index 36bce72..5c15ae3 100644 (file)
@@ -189,7 +189,7 @@ static void cmm_set_timer(void)
                        del_timer(&cmm_timer);
                return;
        }
-       mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
+       mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
 }
 
 static void cmm_timer_fn(struct timer_list *unused)
index 9e0aa7a..5060956 100644 (file)
@@ -313,15 +313,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
                goto out_free;
        }
 
-       rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
-
-       if (rc)
-               goto out_free;
-
        seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
        if (seg->res == NULL) {
                rc = -ENOMEM;
-               goto out_shared;
+               goto out_free;
        }
        seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
        seg->res->start = seg->start_addr;
@@ -335,12 +330,17 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
        if (rc == SEG_TYPE_SC ||
            ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
                seg->res->flags |= IORESOURCE_READONLY;
+
+       /* Check for overlapping resources before adding the mapping. */
        if (request_resource(&iomem_resource, seg->res)) {
                rc = -EBUSY;
-               kfree(seg->res);
-               goto out_shared;
+               goto out_free_resource;
        }
 
+       rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+       if (rc)
+               goto out_resource;
+
        if (do_nonshared)
                diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
                                &start_addr, &end_addr);
@@ -351,14 +351,14 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
                dcss_diag(&purgeseg_scode, seg->dcss_name,
                                &dummy, &dummy);
                rc = diag_cc;
-               goto out_resource;
+               goto out_mapping;
        }
        if (diag_cc > 1) {
                pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
                rc = dcss_diag_translate_rc(end_addr);
                dcss_diag(&purgeseg_scode, seg->dcss_name,
                                &dummy, &dummy);
-               goto out_resource;
+               goto out_mapping;
        }
        seg->start_addr = start_addr;
        seg->end = end_addr;
@@ -377,11 +377,12 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
                        (void*) seg->end, segtype_string[seg->vm_segtype]);
        }
        goto out;
+ out_mapping:
+       vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
  out_resource:
        release_resource(seg->res);
+ out_free_resource:
        kfree(seg->res);
- out_shared:
-       vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
  out_free:
        kfree(seg);
  out:
@@ -400,8 +401,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
  * -EIO     : could not perform query or load diagnose
  * -ENOENT  : no such segment
  * -EOPNOTSUPP: multi-part segment cannot be used with linux
- * -ENOSPC  : segment cannot be used (overlaps with storage)
- * -EBUSY   : segment can temporarily not be used (overlaps with dcss)
+ * -EBUSY   : segment cannot be used (overlaps with dcss or storage)
  * -ERANGE  : segment cannot be used (exceeds kernel mapping range)
  * -EPERM   : segment is currently loaded with incompatible permissions
  * -ENOMEM  : out of memory
@@ -626,10 +626,6 @@ void segment_warning(int rc, char *seg_name)
                pr_err("DCSS %s has multiple page ranges and cannot be "
                       "loaded or queried\n", seg_name);
                break;
-       case -ENOSPC:
-               pr_err("DCSS %s overlaps with used storage and cannot "
-                      "be loaded\n", seg_name);
-               break;
        case -EBUSY:
                pr_err("%s needs used memory resources and cannot be "
                       "loaded or queried\n", seg_name);
index d53c2e2..aebf918 100644 (file)
@@ -255,10 +255,8 @@ static noinline void do_no_context(struct pt_regs *regs)
 
        /* Are we prepared to handle this kernel fault?  */
        fixup = s390_search_extables(regs->psw.addr);
-       if (fixup) {
-               regs->psw.addr = extable_fixup(fixup);
+       if (fixup && ex_handle(fixup, regs))
                return;
-       }
 
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
@@ -376,7 +374,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
  * routines.
  *
  * interruption code (int_code):
- *   04       Protection           ->  Write-Protection  (suprression)
+ *   04       Protection           ->  Write-Protection  (suppression)
  *   10       Segment translation  ->  Not present       (nullification)
  *   11       Page translation     ->  Not present       (nullification)
  *   3b       Region third trans.  ->  Not present       (nullification)
index 8b6282c..1aed1a4 100644 (file)
 
 static DEFINE_MUTEX(vmem_mutex);
 
-struct memory_segment {
-       struct list_head list;
-       unsigned long start;
-       unsigned long size;
-};
-
-static LIST_HEAD(mem_segs);
-
 static void __ref *vmem_alloc_pages(unsigned int order)
 {
        unsigned long size = PAGE_SIZE << order;
@@ -37,6 +29,15 @@ static void __ref *vmem_alloc_pages(unsigned int order)
        return (void *) memblock_phys_alloc(size, size);
 }
 
+static void vmem_free_pages(unsigned long addr, int order)
+{
+       /* We don't expect boot memory to be removed ever. */
+       if (!slab_is_available() ||
+           WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+               return;
+       free_pages(addr, order);
+}
+
 void *vmem_crst_alloc(unsigned long val)
 {
        unsigned long *table;
@@ -62,332 +63,486 @@ pte_t __ref *vmem_pte_alloc(void)
        return pte;
 }
 
+static void vmem_pte_free(unsigned long *table)
+{
+       /* We don't expect boot memory to be removed ever. */
+       if (!slab_is_available() ||
+           WARN_ON_ONCE(PageReserved(virt_to_page(table))))
+               return;
+       page_table_free(&init_mm, table);
+}
+
+#define PAGE_UNUSED 0xFD
+
 /*
- * Add a physical memory range to the 1:1 mapping.
+ * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
+ * from unused_pmd_start to next PMD_SIZE boundary.
  */
-static int vmem_add_mem(unsigned long start, unsigned long size)
+static unsigned long unused_pmd_start;
+
+static void vmemmap_flush_unused_pmd(void)
 {
-       unsigned long pgt_prot, sgt_prot, r3_prot;
-       unsigned long pages4k, pages1m, pages2g;
-       unsigned long end = start + size;
-       unsigned long address = start;
-       pgd_t *pg_dir;
-       p4d_t *p4_dir;
-       pud_t *pu_dir;
-       pmd_t *pm_dir;
-       pte_t *pt_dir;
-       int ret = -ENOMEM;
+       if (!unused_pmd_start)
+               return;
+       memset(__va(unused_pmd_start), PAGE_UNUSED,
+              ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
+       unused_pmd_start = 0;
+}
+
+static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+       /*
+        * As we expect to add in the same granularity as we remove, it's
+        * sufficient to mark only some piece used to block the memmap page from
+        * getting removed (just in case the memmap never gets initialized,
+        * e.g., because the memory block never gets onlined).
+        */
+       memset(__va(start), 0, sizeof(struct page));
+}
 
-       pgt_prot = pgprot_val(PAGE_KERNEL);
-       sgt_prot = pgprot_val(SEGMENT_KERNEL);
-       r3_prot = pgprot_val(REGION3_KERNEL);
-       if (!MACHINE_HAS_NX) {
-               pgt_prot &= ~_PAGE_NOEXEC;
-               sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
-               r3_prot &= ~_REGION_ENTRY_NOEXEC;
+static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+       /*
+        * We only optimize if the new used range directly follows the
+        * previously unused range (esp., when populating consecutive sections).
+        */
+       if (unused_pmd_start == start) {
+               unused_pmd_start = end;
+               if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
+                       unused_pmd_start = 0;
+               return;
        }
-       pages4k = pages1m = pages2g = 0;
-       while (address < end) {
-               pg_dir = pgd_offset_k(address);
-               if (pgd_none(*pg_dir)) {
-                       p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
-                       if (!p4_dir)
-                               goto out;
-                       pgd_populate(&init_mm, pg_dir, p4_dir);
-               }
-               p4_dir = p4d_offset(pg_dir, address);
-               if (p4d_none(*p4_dir)) {
-                       pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
-                       if (!pu_dir)
-                               goto out;
-                       p4d_populate(&init_mm, p4_dir, pu_dir);
-               }
-               pu_dir = pud_offset(p4_dir, address);
-               if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
-                   !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
-                    !debug_pagealloc_enabled()) {
-                       pud_val(*pu_dir) = address | r3_prot;
-                       address += PUD_SIZE;
-                       pages2g++;
-                       continue;
-               }
-               if (pud_none(*pu_dir)) {
-                       pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
-                       if (!pm_dir)
-                               goto out;
-                       pud_populate(&init_mm, pu_dir, pm_dir);
-               }
-               pm_dir = pmd_offset(pu_dir, address);
-               if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
-                   !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
-                   !debug_pagealloc_enabled()) {
-                       pmd_val(*pm_dir) = address | sgt_prot;
-                       address += PMD_SIZE;
-                       pages1m++;
+       vmemmap_flush_unused_pmd();
+       __vmemmap_use_sub_pmd(start, end);
+}
+
+static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
+{
+       void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+       vmemmap_flush_unused_pmd();
+
+       /* Could be our memmap page is filled with PAGE_UNUSED already ... */
+       __vmemmap_use_sub_pmd(start, end);
+
+       /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
+       if (!IS_ALIGNED(start, PMD_SIZE))
+               memset(page, PAGE_UNUSED, start - __pa(page));
+       /*
+        * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
+        * consecutive sections. Remember for the last added PMD the last
+        * unused range in the populated PMD.
+        */
+       if (!IS_ALIGNED(end, PMD_SIZE))
+               unused_pmd_start = end;
+}
+
+/* Returns true if the PMD is completely unused and can be freed. */
+static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
+{
+       void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+       vmemmap_flush_unused_pmd();
+       memset(__va(start), PAGE_UNUSED, end - start);
+       return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
+                                 unsigned long end, bool add, bool direct)
+{
+       unsigned long prot, pages = 0;
+       int ret = -ENOMEM;
+       pte_t *pte;
+
+       prot = pgprot_val(PAGE_KERNEL);
+       if (!MACHINE_HAS_NX)
+               prot &= ~_PAGE_NOEXEC;
+
+       pte = pte_offset_kernel(pmd, addr);
+       for (; addr < end; addr += PAGE_SIZE, pte++) {
+               if (!add) {
+                       if (pte_none(*pte))
+                               continue;
+                       if (!direct)
+                               vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+                       pte_clear(&init_mm, addr, pte);
+               } else if (pte_none(*pte)) {
+                       if (!direct) {
+                               void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+
+                               if (!new_page)
+                                       goto out;
+                               pte_val(*pte) = __pa(new_page) | prot;
+                       } else {
+                               pte_val(*pte) = addr | prot;
+                       }
+               } else {
                        continue;
                }
-               if (pmd_none(*pm_dir)) {
-                       pt_dir = vmem_pte_alloc();
-                       if (!pt_dir)
-                               goto out;
-                       pmd_populate(&init_mm, pm_dir, pt_dir);
-               }
-
-               pt_dir = pte_offset_kernel(pm_dir, address);
-               pte_val(*pt_dir) = address | pgt_prot;
-               address += PAGE_SIZE;
-               pages4k++;
+               pages++;
        }
        ret = 0;
 out:
-       update_page_count(PG_DIRECT_MAP_4K, pages4k);
-       update_page_count(PG_DIRECT_MAP_1M, pages1m);
-       update_page_count(PG_DIRECT_MAP_2G, pages2g);
+       if (direct)
+               update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
        return ret;
 }
 
-/*
- * Remove a physical memory range from the 1:1 mapping.
- * Currently only invalidates page table entries.
- */
-static void vmem_remove_range(unsigned long start, unsigned long size)
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
 {
-       unsigned long pages4k, pages1m, pages2g;
-       unsigned long end = start + size;
-       unsigned long address = start;
-       pgd_t *pg_dir;
-       p4d_t *p4_dir;
-       pud_t *pu_dir;
-       pmd_t *pm_dir;
-       pte_t *pt_dir;
-
-       pages4k = pages1m = pages2g = 0;
-       while (address < end) {
-               pg_dir = pgd_offset_k(address);
-               if (pgd_none(*pg_dir)) {
-                       address += PGDIR_SIZE;
-                       continue;
-               }
-               p4_dir = p4d_offset(pg_dir, address);
-               if (p4d_none(*p4_dir)) {
-                       address += P4D_SIZE;
-                       continue;
-               }
-               pu_dir = pud_offset(p4_dir, address);
-               if (pud_none(*pu_dir)) {
-                       address += PUD_SIZE;
-                       continue;
-               }
-               if (pud_large(*pu_dir)) {
-                       pud_clear(pu_dir);
-                       address += PUD_SIZE;
-                       pages2g++;
-                       continue;
-               }
-               pm_dir = pmd_offset(pu_dir, address);
-               if (pmd_none(*pm_dir)) {
-                       address += PMD_SIZE;
-                       continue;
-               }
-               if (pmd_large(*pm_dir)) {
-                       pmd_clear(pm_dir);
-                       address += PMD_SIZE;
-                       pages1m++;
-                       continue;
-               }
-               pt_dir = pte_offset_kernel(pm_dir, address);
-               pte_clear(&init_mm, address, pt_dir);
-               address += PAGE_SIZE;
-               pages4k++;
+       pte_t *pte;
+       int i;
+
+       /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
+       pte = pte_offset_kernel(pmd, start);
+       for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+               if (!pte_none(*pte))
+                       return;
        }
-       flush_tlb_kernel_range(start, end);
-       update_page_count(PG_DIRECT_MAP_4K, -pages4k);
-       update_page_count(PG_DIRECT_MAP_1M, -pages1m);
-       update_page_count(PG_DIRECT_MAP_2G, -pages2g);
+       vmem_pte_free(__va(pmd_deref(*pmd)));
+       pmd_clear(pmd);
 }
 
-/*
- * Add a backed mem_map array to the virtual mem_map array.
- */
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
-               struct vmem_altmap *altmap)
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
+                                 unsigned long end, bool add, bool direct)
 {
-       unsigned long pgt_prot, sgt_prot;
-       unsigned long address = start;
-       pgd_t *pg_dir;
-       p4d_t *p4_dir;
-       pud_t *pu_dir;
-       pmd_t *pm_dir;
-       pte_t *pt_dir;
+       unsigned long next, prot, pages = 0;
        int ret = -ENOMEM;
+       pmd_t *pmd;
+       pte_t *pte;
 
-       pgt_prot = pgprot_val(PAGE_KERNEL);
-       sgt_prot = pgprot_val(SEGMENT_KERNEL);
-       if (!MACHINE_HAS_NX) {
-               pgt_prot &= ~_PAGE_NOEXEC;
-               sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
-       }
-       for (address = start; address < end;) {
-               pg_dir = pgd_offset_k(address);
-               if (pgd_none(*pg_dir)) {
-                       p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
-                       if (!p4_dir)
-                               goto out;
-                       pgd_populate(&init_mm, pg_dir, p4_dir);
-               }
+       prot = pgprot_val(SEGMENT_KERNEL);
+       if (!MACHINE_HAS_NX)
+               prot &= ~_SEGMENT_ENTRY_NOEXEC;
 
-               p4_dir = p4d_offset(pg_dir, address);
-               if (p4d_none(*p4_dir)) {
-                       pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
-                       if (!pu_dir)
-                               goto out;
-                       p4d_populate(&init_mm, p4_dir, pu_dir);
-               }
+       pmd = pmd_offset(pud, addr);
+       for (; addr < end; addr = next, pmd++) {
+               next = pmd_addr_end(addr, end);
+               if (!add) {
+                       if (pmd_none(*pmd))
+                               continue;
+                       if (pmd_large(*pmd) && !add) {
+                               if (IS_ALIGNED(addr, PMD_SIZE) &&
+                                   IS_ALIGNED(next, PMD_SIZE)) {
+                                       if (!direct)
+                                               vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+                                       pmd_clear(pmd);
+                                       pages++;
+                               } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
+                                       vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+                                       pmd_clear(pmd);
+                               }
+                               continue;
+                       }
+               } else if (pmd_none(*pmd)) {
+                       if (IS_ALIGNED(addr, PMD_SIZE) &&
+                           IS_ALIGNED(next, PMD_SIZE) &&
+                           MACHINE_HAS_EDAT1 && addr && direct &&
+                           !debug_pagealloc_enabled()) {
+                               pmd_val(*pmd) = addr | prot;
+                               pages++;
+                               continue;
+                       } else if (!direct && MACHINE_HAS_EDAT1) {
+                               void *new_page;
 
-               pu_dir = pud_offset(p4_dir, address);
-               if (pud_none(*pu_dir)) {
-                       pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
-                       if (!pm_dir)
+                               /*
+                                * Use 1MB frames for vmemmap if available. We
+                                * always use large frames even if they are only
+                                * partially used. Otherwise we would have also
+                                * page tables since vmemmap_populate gets
+                                * called for each section separately.
+                                */
+                               new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+                               if (new_page) {
+                                       pmd_val(*pmd) = __pa(new_page) | prot;
+                                       if (!IS_ALIGNED(addr, PMD_SIZE) ||
+                                           !IS_ALIGNED(next, PMD_SIZE)) {
+                                               vmemmap_use_new_sub_pmd(addr, next);
+                                       }
+                                       continue;
+                               }
+                       }
+                       pte = vmem_pte_alloc();
+                       if (!pte)
                                goto out;
-                       pud_populate(&init_mm, pu_dir, pm_dir);
+                       pmd_populate(&init_mm, pmd, pte);
+               } else if (pmd_large(*pmd)) {
+                       if (!direct)
+                               vmemmap_use_sub_pmd(addr, next);
+                       continue;
                }
+               ret = modify_pte_table(pmd, addr, next, add, direct);
+               if (ret)
+                       goto out;
+               if (!add)
+                       try_free_pte_table(pmd, addr & PMD_MASK);
+       }
+       ret = 0;
+out:
+       if (direct)
+               update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
+       return ret;
+}
 
-               pm_dir = pmd_offset(pu_dir, address);
-               if (pmd_none(*pm_dir)) {
-                       /* Use 1MB frames for vmemmap if available. We always
-                        * use large frames even if they are only partially
-                        * used.
-                        * Otherwise we would have also page tables since
-                        * vmemmap_populate gets called for each section
-                        * separately. */
-                       if (MACHINE_HAS_EDAT1) {
-                               void *new_page;
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
+{
+       const unsigned long end = start + PUD_SIZE;
+       pmd_t *pmd;
+       int i;
+
+       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+       if (end > VMALLOC_START)
+               return;
+#ifdef CONFIG_KASAN
+       if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+               return;
+#endif
+       pmd = pmd_offset(pud, start);
+       for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
+               if (!pmd_none(*pmd))
+                       return;
+       vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+       pud_clear(pud);
+}
 
-                               new_page = vmemmap_alloc_block(PMD_SIZE, node);
-                               if (!new_page)
-                                       goto out;
-                               pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
-                               address = (address + PMD_SIZE) & PMD_MASK;
+static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+                           bool add, bool direct)
+{
+       unsigned long next, prot, pages = 0;
+       int ret = -ENOMEM;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       prot = pgprot_val(REGION3_KERNEL);
+       if (!MACHINE_HAS_NX)
+               prot &= ~_REGION_ENTRY_NOEXEC;
+       pud = pud_offset(p4d, addr);
+       for (; addr < end; addr = next, pud++) {
+               next = pud_addr_end(addr, end);
+               if (!add) {
+                       if (pud_none(*pud))
+                               continue;
+                       if (pud_large(*pud)) {
+                               if (IS_ALIGNED(addr, PUD_SIZE) &&
+                                   IS_ALIGNED(next, PUD_SIZE)) {
+                                       pud_clear(pud);
+                                       pages++;
+                               }
+                               continue;
+                       }
+               } else if (pud_none(*pud)) {
+                       if (IS_ALIGNED(addr, PUD_SIZE) &&
+                           IS_ALIGNED(next, PUD_SIZE) &&
+                           MACHINE_HAS_EDAT2 && addr && direct &&
+                           !debug_pagealloc_enabled()) {
+                               pud_val(*pud) = addr | prot;
+                               pages++;
                                continue;
                        }
-                       pt_dir = vmem_pte_alloc();
-                       if (!pt_dir)
+                       pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+                       if (!pmd)
                                goto out;
-                       pmd_populate(&init_mm, pm_dir, pt_dir);
-               } else if (pmd_large(*pm_dir)) {
-                       address = (address + PMD_SIZE) & PMD_MASK;
+                       pud_populate(&init_mm, pud, pmd);
+               } else if (pud_large(*pud)) {
                        continue;
                }
+               ret = modify_pmd_table(pud, addr, next, add, direct);
+               if (ret)
+                       goto out;
+               if (!add)
+                       try_free_pmd_table(pud, addr & PUD_MASK);
+       }
+       ret = 0;
+out:
+       if (direct)
+               update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
+       return ret;
+}
 
-               pt_dir = pte_offset_kernel(pm_dir, address);
-               if (pte_none(*pt_dir)) {
-                       void *new_page;
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+{
+       const unsigned long end = start + P4D_SIZE;
+       pud_t *pud;
+       int i;
+
+       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+       if (end > VMALLOC_START)
+               return;
+#ifdef CONFIG_KASAN
+       if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+               return;
+#endif
+
+       pud = pud_offset(p4d, start);
+       for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+               if (!pud_none(*pud))
+                       return;
+       }
+       vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+       p4d_clear(p4d);
+}
 
-                       new_page = vmemmap_alloc_block(PAGE_SIZE, node);
-                       if (!new_page)
+static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
+                           bool add, bool direct)
+{
+       unsigned long next;
+       int ret = -ENOMEM;
+       p4d_t *p4d;
+       pud_t *pud;
+
+       p4d = p4d_offset(pgd, addr);
+       for (; addr < end; addr = next, p4d++) {
+               next = p4d_addr_end(addr, end);
+               if (!add) {
+                       if (p4d_none(*p4d))
+                               continue;
+               } else if (p4d_none(*p4d)) {
+                       pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+                       if (!pud)
                                goto out;
-                       pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
                }
-               address += PAGE_SIZE;
+               ret = modify_pud_table(p4d, addr, next, add, direct);
+               if (ret)
+                       goto out;
+               if (!add)
+                       try_free_pud_table(p4d, addr & P4D_MASK);
        }
        ret = 0;
 out:
        return ret;
 }
 
-void vmemmap_free(unsigned long start, unsigned long end,
-               struct vmem_altmap *altmap)
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
 {
+       const unsigned long end = start + PGDIR_SIZE;
+       p4d_t *p4d;
+       int i;
+
+       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+       if (end > VMALLOC_START)
+               return;
+#ifdef CONFIG_KASAN
+       if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+               return;
+#endif
+
+       p4d = p4d_offset(pgd, start);
+       for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+               if (!p4d_none(*p4d))
+                       return;
+       }
+       vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+       pgd_clear(pgd);
 }
 
-/*
- * Add memory segment to the segment list if it doesn't overlap with
- * an already present segment.
- */
-static int insert_memory_segment(struct memory_segment *seg)
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
+                           bool direct)
 {
-       struct memory_segment *tmp;
+       unsigned long addr, next;
+       int ret = -ENOMEM;
+       pgd_t *pgd;
+       p4d_t *p4d;
 
-       if (seg->start + seg->size > VMEM_MAX_PHYS ||
-           seg->start + seg->size < seg->start)
-               return -ERANGE;
+       if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
+               return -EINVAL;
+       for (addr = start; addr < end; addr = next) {
+               next = pgd_addr_end(addr, end);
+               pgd = pgd_offset_k(addr);
 
-       list_for_each_entry(tmp, &mem_segs, list) {
-               if (seg->start >= tmp->start + tmp->size)
-                       continue;
-               if (seg->start + seg->size <= tmp->start)
-                       continue;
-               return -ENOSPC;
+               if (!add) {
+                       if (pgd_none(*pgd))
+                               continue;
+               } else if (pgd_none(*pgd)) {
+                       p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+                       if (!p4d)
+                               goto out;
+                       pgd_populate(&init_mm, pgd, p4d);
+               }
+               ret = modify_p4d_table(pgd, addr, next, add, direct);
+               if (ret)
+                       goto out;
+               if (!add)
+                       try_free_p4d_table(pgd, addr & PGDIR_MASK);
        }
-       list_add(&seg->list, &mem_segs);
-       return 0;
+       ret = 0;
+out:
+       if (!add)
+               flush_tlb_kernel_range(start, end);
+       return ret;
+}
+
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+       return modify_pagetable(start, end, true, direct);
+}
+
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+       return modify_pagetable(start, end, false, direct);
 }
 
 /*
- * Remove memory segment from the segment list.
+ * Add a physical memory range to the 1:1 mapping.
  */
-static void remove_memory_segment(struct memory_segment *seg)
+static int vmem_add_range(unsigned long start, unsigned long size)
 {
-       list_del(&seg->list);
+       return add_pagetable(start, start + size, true);
 }
 
-static void __remove_shared_memory(struct memory_segment *seg)
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
 {
-       remove_memory_segment(seg);
-       vmem_remove_range(seg->start, seg->size);
+       remove_pagetable(start, start + size, true);
 }
 
-int vmem_remove_mapping(unsigned long start, unsigned long size)
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+                              struct vmem_altmap *altmap)
 {
-       struct memory_segment *seg;
        int ret;
 
        mutex_lock(&vmem_mutex);
+       /* We don't care about the node, just use NUMA_NO_NODE on allocations */
+       ret = add_pagetable(start, end, false);
+       if (ret)
+               remove_pagetable(start, end, false);
+       mutex_unlock(&vmem_mutex);
+       return ret;
+}
 
-       ret = -ENOENT;
-       list_for_each_entry(seg, &mem_segs, list) {
-               if (seg->start == start && seg->size == size)
-                       break;
-       }
-
-       if (seg->start != start || seg->size != size)
-               goto out;
+void vmemmap_free(unsigned long start, unsigned long end,
+                 struct vmem_altmap *altmap)
+{
+       mutex_lock(&vmem_mutex);
+       remove_pagetable(start, end, false);
+       mutex_unlock(&vmem_mutex);
+}
 
-       ret = 0;
-       __remove_shared_memory(seg);
-       kfree(seg);
-out:
+void vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+       mutex_lock(&vmem_mutex);
+       vmem_remove_range(start, size);
        mutex_unlock(&vmem_mutex);
-       return ret;
 }
 
 int vmem_add_mapping(unsigned long start, unsigned long size)
 {
-       struct memory_segment *seg;
        int ret;
 
-       mutex_lock(&vmem_mutex);
-       ret = -ENOMEM;
-       seg = kzalloc(sizeof(*seg), GFP_KERNEL);
-       if (!seg)
-               goto out;
-       seg->start = start;
-       seg->size = size;
-
-       ret = insert_memory_segment(seg);
-       if (ret)
-               goto out_free;
+       if (start + size > VMEM_MAX_PHYS ||
+           start + size < start)
+               return -ERANGE;
 
-       ret = vmem_add_mem(start, size);
+       mutex_lock(&vmem_mutex);
+       ret = vmem_add_range(start, size);
        if (ret)
-               goto out_remove;
-       goto out;
-
-out_remove:
-       __remove_shared_memory(seg);
-out_free:
-       kfree(seg);
-out:
+               vmem_remove_range(start, size);
        mutex_unlock(&vmem_mutex);
        return ret;
 }
@@ -402,7 +557,7 @@ void __init vmem_map_init(void)
        struct memblock_region *reg;
 
        for_each_memblock(memory, reg)
-               vmem_add_mem(reg->base, reg->size);
+               vmem_add_range(reg->base, reg->size);
        __set_memory((unsigned long)_stext,
                     (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
                     SET_MEMORY_RO | SET_MEMORY_X);
@@ -421,27 +576,3 @@ void __init vmem_map_init(void)
        pr_info("Write protected kernel read-only data: %luk\n",
                (unsigned long)(__end_rodata - _stext) >> 10);
 }
-
-/*
- * Convert memblock.memory  to a memory segment list so there is a single
- * list that contains all memory segments.
- */
-static int __init vmem_convert_memory_chunk(void)
-{
-       struct memblock_region *reg;
-       struct memory_segment *seg;
-
-       mutex_lock(&vmem_mutex);
-       for_each_memblock(memory, reg) {
-               seg = kzalloc(sizeof(*seg), GFP_KERNEL);
-               if (!seg)
-                       panic("Out of memory...\n");
-               seg->start = reg->base;
-               seg->size = reg->size;
-               insert_memory_segment(seg);
-       }
-       mutex_unlock(&vmem_mutex);
-       return 0;
-}
-
-core_initcall(vmem_convert_memory_chunk);
index f4242b8..8fe7bdf 100644 (file)
@@ -49,6 +49,7 @@ struct bpf_jit {
        int r1_thunk_ip;        /* Address of expoline thunk for 'br %r1' */
        int r14_thunk_ip;       /* Address of expoline thunk for 'br %r14' */
        int tail_call_start;    /* Tail call start offset */
+       int excnt;              /* Number of exception table entries */
        int labels[1];          /* Labels for local jumps */
 };
 
@@ -588,6 +589,84 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
        }
 }
 
+static int get_probe_mem_regno(const u8 *insn)
+{
+       /*
+        * insn must point to llgc, llgh, llgf or lg, which have destination
+        * register at the same position.
+        */
+       if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+               return -1;
+       if (insn[5] != 0x90 && /* llgc */
+           insn[5] != 0x91 && /* llgh */
+           insn[5] != 0x16 && /* llgf */
+           insn[5] != 0x04) /* lg */
+               return -1;
+       return insn[1] >> 4;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+                          struct pt_regs *regs)
+{
+       int regno;
+       u8 *insn;
+
+       regs->psw.addr = extable_fixup(x);
+       insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16);
+       regno = get_probe_mem_regno(insn);
+       if (WARN_ON_ONCE(regno < 0))
+               /* JIT bug - unexpected instruction. */
+               return false;
+       regs->gprs[regno] = 0;
+       return true;
+}
+
+static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
+                            int probe_prg, int nop_prg)
+{
+       struct exception_table_entry *ex;
+       s64 delta;
+       u8 *insn;
+       int prg;
+       int i;
+
+       if (!fp->aux->extable)
+               /* Do nothing during early JIT passes. */
+               return 0;
+       insn = jit->prg_buf + probe_prg;
+       if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0))
+               /* JIT bug - unexpected probe instruction. */
+               return -1;
+       if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+               /* JIT bug - gap between probe and nop instructions. */
+               return -1;
+       for (i = 0; i < 2; i++) {
+               if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
+                       /* Verifier bug - not enough entries. */
+                       return -1;
+               ex = &fp->aux->extable[jit->excnt];
+               /* Add extable entries for probe and nop instructions. */
+               prg = i == 0 ? probe_prg : nop_prg;
+               delta = jit->prg_buf + prg - (u8 *)&ex->insn;
+               if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+                       /* JIT bug - code and extable must be close. */
+                       return -1;
+               ex->insn = delta;
+               /*
+                * Always land on the nop. Note that extable infrastructure
+                * ignores fixup field, it is handled by ex_handler_bpf().
+                */
+               delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+               if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+                       /* JIT bug - landing pad and extable must be close. */
+                       return -1;
+               ex->fixup = delta;
+               ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+               jit->excnt++;
+       }
+       return 0;
+}
+
 /*
  * Compile one eBPF instruction into s390x code
  *
@@ -604,7 +683,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
        u32 *addrs = jit->addrs;
        s32 imm = insn->imm;
        s16 off = insn->off;
+       int probe_prg = -1;
        unsigned int mask;
+       int nop_prg;
+       int err;
+
+       if (BPF_CLASS(insn->code) == BPF_LDX &&
+           BPF_MODE(insn->code) == BPF_PROBE_MEM)
+               probe_prg = jit->prg;
 
        switch (insn->code) {
        /*
@@ -1119,6 +1205,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
         * BPF_LDX
         */
        case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+       case BPF_LDX | BPF_PROBE_MEM | BPF_B:
                /* llgc %dst,0(off,%src) */
                EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
                jit->seen |= SEEN_MEM;
@@ -1126,6 +1213,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
                        insn_count = 2;
                break;
        case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+       case BPF_LDX | BPF_PROBE_MEM | BPF_H:
                /* llgh %dst,0(off,%src) */
                EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
                jit->seen |= SEEN_MEM;
@@ -1133,6 +1221,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
                        insn_count = 2;
                break;
        case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+       case BPF_LDX | BPF_PROBE_MEM | BPF_W:
                /* llgf %dst,off(%src) */
                jit->seen |= SEEN_MEM;
                EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
@@ -1140,6 +1229,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
                        insn_count = 2;
                break;
        case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+       case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
                /* lg %dst,0(off,%src) */
                jit->seen |= SEEN_MEM;
                EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
@@ -1485,6 +1575,23 @@ branch_oc:
                pr_err("Unknown opcode %02x\n", insn->code);
                return -1;
        }
+
+       if (probe_prg != -1) {
+               /*
+                * Handlers of certain exceptions leave psw.addr pointing to
+                * the instruction directly after the failing one. Therefore,
+                * create two exception table entries and also add a nop in
+                * case two probing instructions come directly after each
+                * other.
+                */
+               nop_prg = jit->prg;
+               /* bcr 0,%0 */
+               _EMIT2(0x0700);
+               err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
+               if (err < 0)
+                       return err;
+       }
+
        return insn_count;
 }
 
@@ -1527,6 +1634,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
        jit->lit32 = jit->lit32_start;
        jit->lit64 = jit->lit64_start;
        jit->prg = 0;
+       jit->excnt = 0;
 
        bpf_jit_prologue(jit, stack_depth);
        if (bpf_set_addr(jit, 0) < 0)
@@ -1551,6 +1659,12 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
                jit->lit64_start = ALIGN(jit->lit64_start, 8);
        jit->size = jit->lit64_start + lit64_size;
        jit->size_prg = jit->prg;
+
+       if (WARN_ON_ONCE(fp->aux->extable &&
+                        jit->excnt != fp->aux->num_exentries))
+               /* Verifier bug - too many entries. */
+               return -1;
+
        return 0;
 }
 
@@ -1565,6 +1679,29 @@ struct s390_jit_data {
        int pass;
 };
 
+static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
+                                              struct bpf_prog *fp)
+{
+       struct bpf_binary_header *header;
+       u32 extable_size;
+       u32 code_size;
+
+       /* We need two entries per insn. */
+       fp->aux->num_exentries *= 2;
+
+       code_size = roundup(jit->size,
+                           __alignof__(struct exception_table_entry));
+       extable_size = fp->aux->num_exentries *
+               sizeof(struct exception_table_entry);
+       header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
+                                     8, jit_fill_hole);
+       if (!header)
+               return NULL;
+       fp->aux->extable = (struct exception_table_entry *)
+               (jit->prg_buf + code_size);
+       return header;
+}
+
 /*
  * Compile eBPF program "fp"
  */
@@ -1631,7 +1768,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
        /*
         * Final pass: Allocate and generate program
         */
-       header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
+       header = bpf_jit_alloc(&jit, fp);
        if (!header) {
                fp = orig_fp;
                goto free_addrs;
index 38efa3e..401cf67 100644 (file)
@@ -155,10 +155,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
                return -EINVAL;
 
        /*
-        * Only support read access to MIO capable devices on a MIO enabled
-        * system. Otherwise we would have to check for every address if it is
-        * a special ZPCI_ADDR and we would have to do a get_pfn() which we
-        * don't need for MIO capable devices.
+        * We only support write access to MIO capable devices if we are on
+        * a MIO enabled system. Otherwise we would have to check for every
+        * address if it is a special ZPCI_ADDR and would have to do
+        * a get_pfn() which we don't need for MIO capable devices.  Currently
+        * ISM devices are the only devices without MIO support and there is no
+        * known need for accessing these from userspace.
         */
        if (static_branch_likely(&have_mio)) {
                ret = __memcpy_toio_inuser((void  __iomem *) mmio_addr,
@@ -282,10 +284,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
                return -EINVAL;
 
        /*
-        * Only support write access to MIO capable devices on a MIO enabled
-        * system. Otherwise we would have to check for every address if it is
-        * a special ZPCI_ADDR and we would have to do a get_pfn() which we
-        * don't need for MIO capable devices.
+        * We only support read access to MIO capable devices if we are on
+        * a MIO enabled system. Otherwise we would have to check for every
+        * address if it is a special ZPCI_ADDR and would have to do
+        * a get_pfn() which we don't need for MIO capable devices.  Currently
+        * ISM devices are the only devices without MIO support and there is no
+        * known need for accessing these from userspace.
         */
        if (static_branch_likely(&have_mio)) {
                ret = __memcpy_fromio_inuser(
index f37b95a..7c2a8a7 100644 (file)
@@ -19,8 +19,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define ATOMIC_INIT(i) { (i) }
-
 #define atomic_read(v)         READ_ONCE((v)->counter)
 #define atomic_set(v,i)                WRITE_ONCE((v)->counter, (i))
 
index 22d968b..d770da3 100644 (file)
@@ -12,6 +12,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
 extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address);
 extern void pmd_free(struct mm_struct *mm, pmd_t *pmd);
+#define __pmd_free_tlb(tlb, pmdp, addr)                pmd_free((tlb)->mm, (pmdp))
 #endif
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@@ -33,13 +34,4 @@ do {                                                 \
        tlb_remove_page((tlb), (pte));                  \
 } while (0)
 
-#if CONFIG_PGTABLE_LEVELS > 2
-#define __pmd_free_tlb(tlb, pmdp, addr)                        \
-do {                                                   \
-       struct page *page = virt_to_page(pmdp);         \
-       pgtable_pmd_page_dtor(page);                    \
-       tlb_remove_page((tlb), page);                   \
-} while (0);
-#endif
-
 #endif /* __ASM_SH_PGALLOC_H */
index 956a7a0..9bac5bb 100644 (file)
@@ -199,7 +199,7 @@ syscall_trace_entry:
        mov.l   @(OFF_R7,r15), r7   ! arg3
        mov.l   @(OFF_R3,r15), r3   ! syscall_nr
        !
-       mov.l   2f, r10                 ! Number of syscalls
+       mov.l   6f, r10                 ! Number of syscalls
        cmp/hs  r10, r3
        bf      syscall_call
        mov     #-ENOSYS, r0
@@ -353,7 +353,7 @@ ENTRY(system_call)
        tst     r9, r8
        bf      syscall_trace_entry
        !
-       mov.l   2f, r8                  ! Number of syscalls
+       mov.l   6f, r8                  ! Number of syscalls
        cmp/hs  r8, r3
        bt      syscall_badsys
        !
@@ -392,7 +392,7 @@ syscall_exit:
 #if !defined(CONFIG_CPU_SH2)
 1:     .long   TRA
 #endif
-2:     .long   NR_syscalls
+6:     .long   NR_syscalls
 3:     .long   sys_call_table
 7:     .long   do_syscall_trace_enter
 8:     .long   do_syscall_trace_leave
index 286bc8e..ca2547d 100644 (file)
@@ -156,7 +156,7 @@ static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
        return 0;
 }
 
-static struct shash_alg sha256 = {
+static struct shash_alg sha256_alg = {
        .digestsize     =       SHA256_DIGEST_SIZE,
        .init           =       sha256_sparc64_init,
        .update         =       sha256_sparc64_update,
@@ -174,7 +174,7 @@ static struct shash_alg sha256 = {
        }
 };
 
-static struct shash_alg sha224 = {
+static struct shash_alg sha224_alg = {
        .digestsize     =       SHA224_DIGEST_SIZE,
        .init           =       sha224_sparc64_init,
        .update         =       sha256_sparc64_update,
@@ -206,13 +206,13 @@ static bool __init sparc64_has_sha256_opcode(void)
 static int __init sha256_sparc64_mod_init(void)
 {
        if (sparc64_has_sha256_opcode()) {
-               int ret = crypto_register_shash(&sha224);
+               int ret = crypto_register_shash(&sha224_alg);
                if (ret < 0)
                        return ret;
 
-               ret = crypto_register_shash(&sha256);
+               ret = crypto_register_shash(&sha256_alg);
                if (ret < 0) {
-                       crypto_unregister_shash(&sha224);
+                       crypto_unregister_shash(&sha224_alg);
                        return ret;
                }
 
@@ -225,8 +225,8 @@ static int __init sha256_sparc64_mod_init(void)
 
 static void __exit sha256_sparc64_mod_fini(void)
 {
-       crypto_unregister_shash(&sha224);
-       crypto_unregister_shash(&sha256);
+       crypto_unregister_shash(&sha224_alg);
+       crypto_unregister_shash(&sha256_alg);
 }
 
 module_init(sha256_sparc64_mod_init);
index 94c930f..efad553 100644 (file)
@@ -18,8 +18,6 @@
 #include <asm/barrier.h>
 #include <asm-generic/atomic64.h>
 
-#define ATOMIC_INIT(i)  { (i) }
-
 int atomic_add_return(int, atomic_t *);
 int atomic_fetch_add(int, atomic_t *);
 int atomic_fetch_and(int, atomic_t *);
index b604483..6b235d3 100644 (file)
@@ -12,7 +12,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define ATOMIC_INIT(i)         { (i) }
 #define ATOMIC64_INIT(i)       { (i) }
 
 #define atomic_read(v)         READ_ONCE((v)->counter)
index 32ef6f0..a8786a4 100644 (file)
@@ -4,7 +4,9 @@
 
 #include <linux/compiler.h>
 
+#ifndef BUILD_VDSO
 register unsigned long __local_per_cpu_offset asm("g5");
+#endif
 
 #ifdef CONFIG_SMP
 
index 0f6d0c4..ace0d48 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _SPARC_TRAP_BLOCK_H
 #define _SPARC_TRAP_BLOCK_H
 
+#include <linux/threads.h>
+
 #include <asm/hypervisor.h>
 #include <asm/asi.h>
 
diff --git a/arch/unicore32/.gitignore b/arch/unicore32/.gitignore
deleted file mode 100644 (file)
index e82f3fb..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Generated include files
-#
-include/generated
-#
-# Generated ld script file
-#
-kernel/vmlinux.lds
-#
-# Generated images in boot
-#
-boot/Image
-boot/zImage
-boot/uImage
-#
-# Generated files in boot/compressed
-#
-boot/compressed/piggy.S
-boot/compressed/piggy.gzip
-boot/compressed/vmlinux
-boot/compressed/vmlinux.lds
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
deleted file mode 100644 (file)
index 11ba183..0000000
+++ /dev/null
@@ -1,200 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config UNICORE32
-       def_bool y
-       select ARCH_32BIT_OFF_T
-       select ARCH_HAS_DEVMEM_IS_ALLOWED
-       select ARCH_HAS_KEEPINITRD
-       select ARCH_MIGHT_HAVE_PC_PARPORT
-       select ARCH_MIGHT_HAVE_PC_SERIO
-       select HAVE_KERNEL_GZIP
-       select HAVE_KERNEL_BZIP2
-       select GENERIC_ATOMIC64
-       select HAVE_KERNEL_LZO
-       select HAVE_KERNEL_LZMA
-       select HAVE_PCI
-       select VIRT_TO_BUS
-       select ARCH_HAVE_CUSTOM_GPIO_H
-       select GENERIC_FIND_FIRST_BIT
-       select GENERIC_IRQ_PROBE
-       select GENERIC_IRQ_SHOW
-       select ARCH_WANT_FRAME_POINTERS
-       select GENERIC_IOMAP
-       select MODULES_USE_ELF_REL
-       select NEED_DMA_MAP_STATE
-       select MMU_GATHER_NO_RANGE if MMU
-       help
-         UniCore-32 is 32-bit Instruction Set Architecture,
-         including a series of low-power-consumption RISC chip
-         designs licensed by PKUnity Ltd.
-         Please see web page at <http://www.pkunity.com/>.
-
-config GENERIC_CSUM
-       def_bool y
-
-config NO_IOPORT_MAP
-       bool
-
-config STACKTRACE_SUPPORT
-       def_bool y
-
-config LOCKDEP_SUPPORT
-       def_bool y
-
-config ARCH_HAS_ILOG2_U32
-       bool
-
-config ARCH_HAS_ILOG2_U64
-       bool
-
-config GENERIC_HWEIGHT
-       def_bool y
-
-config GENERIC_CALIBRATE_DELAY
-       def_bool y
-
-config ARCH_MAY_HAVE_PC_FDC
-       bool
-
-config ZONE_DMA
-       def_bool y
-
-menu "System Type"
-
-config MMU
-       def_bool y
-
-config ARCH_FPGA
-       bool
-
-config ARCH_PUV3
-       def_bool y
-       select CPU_UCV2
-       select GENERIC_CLOCKEVENTS
-       select HAVE_LEGACY_CLK
-       select GPIOLIB
-
-# CONFIGs for ARCH_PUV3
-
-if ARCH_PUV3
-
-choice
-       prompt "Board Selection"
-       default PUV3_DB0913
-
-config PUV3_FPGA_DLX200
-       select ARCH_FPGA
-       bool "FPGA board"
-
-config PUV3_DB0913
-       bool "DEBUG board (0913)"
-
-config PUV3_NB0916
-       bool "NetBook board (0916)"
-       select PWM
-       select PWM_PUV3
-
-config PUV3_SMW0919
-       bool "Security Mini-Workstation board (0919)"
-
-endchoice
-
-config PUV3_PM
-       def_bool y if !ARCH_FPGA
-
-endif
-
-source "arch/unicore32/mm/Kconfig"
-
-comment "Floating point support"
-
-config UNICORE_FPU_F64
-       def_bool y if !ARCH_FPGA
-
-endmenu
-
-menu "Kernel Features"
-
-source "kernel/Kconfig.hz"
-
-config LEDS
-       def_bool y
-       depends on GPIOLIB
-
-config ALIGNMENT_TRAP
-       def_bool y
-       help
-         Unicore processors can not fetch/store information which is not
-         naturally aligned on the bus, i.e., a 4 byte fetch must start at an
-         address divisible by 4. On 32-bit Unicore processors, these non-aligned
-         fetch/store instructions will be emulated in software if you say
-         here, which has a severe performance impact. This is necessary for
-         correct operation of some network protocols. With an IP-only
-         configuration it is safe to say N, otherwise say Y.
-
-endmenu
-
-menu "Boot options"
-
-config CMDLINE
-       string "Default kernel command string"
-       default ""
-
-config CMDLINE_FORCE
-       bool "Always use the default kernel command string"
-       depends on CMDLINE != ""
-       help
-         Always use the default kernel command string, even if the boot
-         loader passes other arguments to the kernel.
-         This is useful if you cannot or don't want to change the
-         command-line options your boot loader passes to the kernel.
-
-         If unsure, say N.
-
-endmenu
-
-menu "Power management options"
-
-source "kernel/power/Kconfig"
-
-source "drivers/cpufreq/Kconfig"
-
-config ARCH_SUSPEND_POSSIBLE
-       def_bool y if !ARCH_FPGA
-
-config ARCH_HIBERNATION_POSSIBLE
-       def_bool y if !ARCH_FPGA
-
-endmenu
-
-if ARCH_PUV3
-
-config PUV3_GPIO
-       bool
-       depends on !ARCH_FPGA
-       select GPIO_SYSFS
-       default y
-
-if PUV3_NB0916
-
-menu "PKUnity NetBook-0916 Features"
-
-config I2C_BATTERY_BQ27200
-       tristate "I2C Battery BQ27200 Support"
-       select I2C_PUV3
-       select POWER_SUPPLY
-       select BATTERY_BQ27XXX
-
-config I2C_EEPROM_AT24
-       tristate "I2C EEPROMs AT24 support"
-       select I2C_PUV3
-       select EEPROM_AT24
-
-config LCD_BACKLIGHT
-       tristate "LCD Backlight support"
-       select BACKLIGHT_PWM
-
-endmenu
-
-endif
-
-endif
diff --git a/arch/unicore32/Kconfig.debug b/arch/unicore32/Kconfig.debug
deleted file mode 100644 (file)
index ca0ff97..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-config EARLY_PRINTK
-       def_bool DEBUG_OCD
-       help
-         Write kernel log output directly into the ocd or to a serial port.
-
-         This is useful for kernel debugging when your machine crashes very
-         early before the console code is initialized. For normal operation
-         it is not recommended because it looks ugly and doesn't cooperate
-         with klogd/syslogd or the X server. You should normally N here,
-         unless you want to debug such a crash.
-
-# These options are only for real kernel hackers who want to get their hands dirty.
-config DEBUG_LL
-       bool "Kernel low-level debugging functions"
-       depends on DEBUG_KERNEL
-       help
-         Say Y here to include definitions of printascii, printch, printhex
-         in the kernel.  This is helpful if you are debugging code that
-         executes before the console is initialized.
-
-config DEBUG_OCD
-       bool "Kernel low-level debugging via On-Chip-Debugger"
-       depends on DEBUG_LL
-       default y
-       help
-         Say Y here if you want the debug print routines to direct their
-         output to the UniCore On-Chip-Debugger channel using CP #1.
diff --git a/arch/unicore32/Makefile b/arch/unicore32/Makefile
deleted file mode 100644 (file)
index 3908199..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-#
-# arch/unicore32/Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies.
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 2002~2010 by Guan Xue-tao
-#
-ifneq ($(SUBARCH),$(ARCH))
-       ifeq ($(CROSS_COMPILE),)
-               CROSS_COMPILE := $(call cc-cross-prefix, unicore32-linux-)
-       endif
-endif
-
-LDFLAGS_vmlinux                := -p --no-undefined -X
-
-OBJCOPYFLAGS           := -O binary -R .note -R .note.gnu.build-id -R .comment -S
-
-# Never generate .eh_frame
-KBUILD_CFLAGS          += $(call cc-option,-fno-dwarf2-cfi-asm)
-
-# Never use hard float in kernel
-KBUILD_CFLAGS          += -msoft-float
-
-ifeq ($(CONFIG_FRAME_POINTER),y)
-KBUILD_CFLAGS          += -mno-sched-prolog
-endif
-
-CHECKFLAGS             += -D__unicore32__
-
-head-y                 := arch/unicore32/kernel/head.o
-
-core-y                 += arch/unicore32/kernel/
-core-y                 += arch/unicore32/mm/
-
-libs-y                 += arch/unicore32/lib/
-
-boot                   := arch/unicore32/boot
-
-# Default target when executing plain make
-KBUILD_IMAGE           := $(boot)/zImage
-
-all:   zImage
-
-zImage Image uImage: vmlinux
-       $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-
-archclean:
-       $(Q)$(MAKE) $(clean)=$(boot)
-
-define archhelp
-  echo  '* zImage        - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
-  echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
-  echo  '  uImage        - U-Boot wrapped zImage'
-endef
diff --git a/arch/unicore32/boot/Makefile b/arch/unicore32/boot/Makefile
deleted file mode 100644 (file)
index 8288550..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# arch/unicore32/boot/Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies.
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 2001~2010 GUAN Xue-tao
-#
-
-targets := Image zImage uImage
-
-$(obj)/Image: vmlinux FORCE
-       $(call if_changed,objcopy)
-       @echo '  Kernel: $@ is ready'
-
-$(obj)/compressed/vmlinux: $(obj)/Image FORCE
-       $(Q)$(MAKE) $(build)=$(obj)/compressed $@
-
-$(obj)/zImage: $(obj)/compressed/vmlinux FORCE
-       $(call if_changed,objcopy)
-       @echo '  Kernel: $@ is ready'
-
-UIMAGE_ARCH = unicore
-UIMAGE_LOADADDR = 0x0
-
-$(obj)/uImage: $(obj)/zImage FORCE
-       $(call if_changed,uimage)
-       @echo '  Image $@ is ready'
-
-PHONY += initrd
-initrd:
-       @test "$(INITRD)" != "" || \
-       (echo You must specify INITRD; exit -1)
-
-subdir- := compressed
diff --git a/arch/unicore32/boot/compressed/Makefile b/arch/unicore32/boot/compressed/Makefile
deleted file mode 100644 (file)
index 150fafc..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# linux/arch/unicore32/boot/compressed/Makefile
-#
-# create a compressed vmlinuz image from the original vmlinux
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 2001~2010 GUAN Xue-tao
-#
-
-ccflags-y      := -fpic -fno-builtin
-asflags-y      := -Wa,-march=all
-
-OBJS           := misc.o
-
-# font.c and font.o
-CFLAGS_font.o  := -Dstatic=
-$(obj)/font.c: $(srctree)/lib/fonts/font_8x8.c
-       $(call cmd,shipped)
-
-# piggy.S and piggy.o
-suffix_$(CONFIG_KERNEL_GZIP)   := gzip
-suffix_$(CONFIG_KERNEL_BZIP2)  := bz2
-suffix_$(CONFIG_KERNEL_LZO)    := lzo
-suffix_$(CONFIG_KERNEL_LZMA)   := lzma
-
-$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
-       $(call if_changed,$(suffix_y))
-
-SEDFLAGS_piggy = s/DECOMP_SUFFIX/$(suffix_y)/
-$(obj)/piggy.S: $(obj)/piggy.S.in
-       @sed "$(SEDFLAGS_piggy)" < $< > $@
-
-$(obj)/piggy.o:  $(obj)/piggy.$(suffix_y) $(obj)/piggy.S FORCE
-
-targets                := vmlinux vmlinux.lds font.o font.c head.o misc.o \
-                       piggy.$(suffix_y) piggy.o piggy.S \
-
-# Make sure files are removed during clean
-extra-y                += piggy.gzip piggy.bz2 piggy.lzo piggy.lzma
-
-# ?
-LDFLAGS_vmlinux += -p
-# Report unresolved symbol references
-LDFLAGS_vmlinux += --no-undefined
-# Delete all temporary local symbols
-LDFLAGS_vmlinux += -X
-# Next argument is a linker script
-LDFLAGS_vmlinux += -T
-
-# For uidivmod
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head.o $(obj)/piggy.o \
-               $(obj)/misc.o FORCE
-       $(call if_changed,ld)
-
-# We now have a PIC decompressor implementation.  Decompressors running
-# from RAM should not define ZTEXTADDR.  Decompressors running directly
-# from ROM or Flash must define ZTEXTADDR (preferably via the config)
-ZTEXTADDR      := 0x03000000
-ZBSSADDR       := ALIGN(4)
-
-CPPFLAGS_vmlinux.lds = -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
diff --git a/arch/unicore32/boot/compressed/head.S b/arch/unicore32/boot/compressed/head.S
deleted file mode 100644 (file)
index 5f72662..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/boot/compressed/head.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <mach/memory.h>
-
-#define csub   cmpsub
-#define cand   cmpand
-#define nop8   nop; nop; nop; nop; nop; nop; nop; nop
-
-               .section ".start", #alloc, #execinstr
-               .text
-start:
-               .type   start,#function
-
-               /* Initialize ASR, PRIV mode and INTR off */
-               mov     r0, #0xD3
-               mov.a   asr, r0
-
-               adr     r0, LC0
-               ldm     (r1, r2, r3, r5, r6, r7, r8), [r0]+
-               ldw     sp, [r0+], #28
-               sub.a   r0, r0, r1              @ calculate the delta offset
-
-               /*
-                * if delta is zero, we are running at the address
-                * we were linked at.
-                */
-               beq     not_relocated
-
-               /*
-                * We're running at a different address.  We need to fix
-                * up various pointers:
-                *   r5 - zImage base address (_start)
-                *   r7 - GOT start
-                *   r8 - GOT end
-                */
-               add     r5, r5, r0
-               add     r7, r7, r0
-               add     r8, r8, r0
-
-               /*
-                * we need to fix up pointers into the BSS region.
-                *   r2 - BSS start
-                *   r3 - BSS end
-                *   sp - stack pointer
-                */
-               add     r2, r2, r0
-               add     r3, r3, r0
-               add     sp, sp, r0
-
-               /*
-                * Relocate all entries in the GOT table.
-                * This fixes up the C references.
-                *   r7 - GOT start
-                *   r8 - GOT end
-                */
-1001:          ldw     r1, [r7+], #0
-               add     r1, r1, r0
-               stw.w   r1, [r7]+, #4
-               csub.a  r7, r8
-               bub     1001b
-
-not_relocated:
-               /*
-                * Clear BSS region.
-                *   r2 - BSS start
-                *   r3 - BSS end
-                */
-               mov     r0, #0
-1002:          stw.w   r0, [r2]+, #4
-               csub.a  r2, r3
-               bub     1002b
-
-               /*
-                * Turn on the cache.
-                */
-                mov     r0, #0
-                movc    p0.c5, r0, #28         @ cache invalidate all
-                nop8
-                movc    p0.c6, r0, #6          @ tlb invalidate all
-                nop8
-
-                mov     r0, #0x1c              @ en icache and wb dcache
-                movc    p0.c1, r0, #0
-                nop8
-
-               /*
-                * Set up some pointers, for starting decompressing.
-                */
-
-               mov     r1, sp                  @ malloc space above stack
-               add     r2, sp, #0x10000        @ 64k max
-
-               /*
-                * Check to see if we will overwrite ourselves.
-                *   r4 = final kernel address
-                *   r5 = start of this image
-                *   r6 = size of decompressed image
-                *   r2 = end of malloc space (and therefore this image)
-                * We basically want:
-                *   r4 >= r2 -> OK
-                *   r4 + image length <= r5 -> OK
-                */
-               ldw     r4, =KERNEL_IMAGE_START
-               csub.a  r4, r2
-               bea     wont_overwrite
-               add     r0, r4, r6
-               csub.a  r0, r5
-               beb     wont_overwrite
-
-               /*
-                * If overwrite, just print error message
-                */
-               b       __error_overwrite
-
-               /*
-                * We're not in danger of overwriting ourselves.
-                * Do this the simple way.
-                */
-wont_overwrite:
-               /*
-                * decompress_kernel:
-                *   r0: output_start
-                *   r1: free_mem_ptr_p
-                *   r2: free_mem_ptr_end_p
-                */
-               mov     r0, r4
-               b.l     decompress_kernel       @ C functions
-
-               /*
-                * Clean and flush the cache to maintain consistency.
-                */
-               mov     r0, #0
-                movc    p0.c5, r0, #14         @ flush dcache
-               nop8
-                movc    p0.c5, r0, #20         @ icache invalidate all
-                nop8
-
-               /*
-                * Turn off the Cache and MMU.
-                */
-               mov     r0, #0                  @ disable i/d cache and MMU
-               movc    p0.c1, r0, #0
-                nop8
-
-               mov     r0, #0                  @ must be zero
-               ldw     r4, =KERNEL_IMAGE_START
-               mov     pc, r4                  @ call kernel
-
-
-               .align  2
-               .type   LC0, #object
-LC0:           .word   LC0                     @ r1
-               .word   __bss_start             @ r2
-               .word   _end                    @ r3
-               .word   _start                  @ r5
-               .word   _image_size             @ r6
-               .word   _got_start              @ r7
-               .word   _got_end                @ r8
-               .word   decompress_stack_end    @ sp
-               .size   LC0, . - LC0
-
-print_string:
-#ifdef CONFIG_DEBUG_OCD
-2001:          ldb.w   r1, [r0]+, #1
-               csub.a  r1, #0
-               bne     2002f
-               mov     pc, lr
-2002:
-               movc    r2, p1.c0, #0
-               cand.a  r2, #2
-               bne     2002b
-               movc    p1.c1, r1, #1
-               csub.a  r1, #'\n'
-               cmoveq  r1, #'\r'
-               beq     2002b
-               b       2001b
-#else
-               mov     pc, lr
-#endif
-
-__error_overwrite:
-               adr     r0, str_error
-               b.l     print_string
-2001:          nop8
-               b       2001b
-str_error:     .asciz  "\nError: Kernel address OVERWRITE\n"
-               .align
-
-               .ltorg
-
-               .align  4
-               .section ".stack", "aw", %nobits
-decompress_stack:      .space  4096
-decompress_stack_end:
diff --git a/arch/unicore32/boot/compressed/misc.c b/arch/unicore32/boot/compressed/misc.c
deleted file mode 100644 (file)
index 450d335..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/boot/compressed/misc.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <asm/unaligned.h>
-#include <mach/uncompress.h>
-
-/*
- * gzip delarations
- */
-unsigned char *output_data;
-unsigned long output_ptr;
-
-unsigned int free_mem_ptr;
-unsigned int free_mem_end_ptr;
-
-#define STATIC static
-#define STATIC_RW_DATA /* non-static please */
-
-/*
- * arch-dependent implementations
- */
-#ifndef ARCH_HAVE_DECOMP_ERROR
-#define arch_decomp_error(x)
-#endif
-
-#ifndef ARCH_HAVE_DECOMP_SETUP
-#define arch_decomp_setup()
-#endif
-
-#ifndef ARCH_HAVE_DECOMP_PUTS
-#define arch_decomp_puts(p)
-#endif
-
-void *memcpy(void *dest, const void *src, size_t n)
-{
-       int i = 0;
-       unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
-
-       for (i = n >> 3; i > 0; i--) {
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-       }
-
-       if (n & 1 << 2) {
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-               *d++ = *s++;
-       }
-
-       if (n & 1 << 1) {
-               *d++ = *s++;
-               *d++ = *s++;
-       }
-
-       if (n & 1)
-               *d++ = *s++;
-
-       return dest;
-}
-
-void error(char *x)
-{
-       arch_decomp_puts("\n\n");
-       arch_decomp_puts(x);
-       arch_decomp_puts("\n\n -- System halted");
-
-       arch_decomp_error(x);
-
-       for (;;)
-               ; /* Halt */
-}
-
-/* Heap size should be adjusted for different decompress method */
-#ifdef CONFIG_KERNEL_GZIP
-#include "../../../../lib/decompress_inflate.c"
-#endif
-
-#ifdef CONFIG_KERNEL_BZIP2
-#include "../../../../lib/decompress_bunzip2.c"
-#endif
-
-#ifdef CONFIG_KERNEL_LZO
-#include "../../../../lib/decompress_unlzo.c"
-#endif
-
-#ifdef CONFIG_KERNEL_LZMA
-#include "../../../../lib/decompress_unlzma.c"
-#endif
-
-unsigned long decompress_kernel(unsigned long output_start,
-               unsigned long free_mem_ptr_p,
-               unsigned long free_mem_ptr_end_p)
-{
-       unsigned char *tmp;
-
-       output_data             = (unsigned char *)output_start;
-       free_mem_ptr            = free_mem_ptr_p;
-       free_mem_end_ptr        = free_mem_ptr_end_p;
-
-       arch_decomp_setup();
-
-       tmp = (unsigned char *) (((unsigned long)input_data_end) - 4);
-       output_ptr = get_unaligned_le32(tmp);
-
-       arch_decomp_puts("Uncompressing Linux...");
-       __decompress(input_data, input_data_end - input_data, NULL, NULL,
-                       output_data, 0, NULL, error);
-       arch_decomp_puts(" done, booting the kernel.\n");
-       return output_ptr;
-}
diff --git a/arch/unicore32/boot/compressed/piggy.S.in b/arch/unicore32/boot/compressed/piggy.S.in
deleted file mode 100644 (file)
index b79704d..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-       .section .piggydata,#alloc
-       .globl  input_data
-input_data:
-       .incbin "arch/unicore32/boot/compressed/piggy.DECOMP_SUFFIX"
-       .globl  input_data_end
-input_data_end:
diff --git a/arch/unicore32/boot/compressed/vmlinux.lds.S b/arch/unicore32/boot/compressed/vmlinux.lds.S
deleted file mode 100644 (file)
index edda4dd..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore/boot/compressed/vmlinux.lds.in
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-OUTPUT_ARCH(unicore32)
-ENTRY(_start)
-SECTIONS
-{
-  /DISCARD/ : {
-    /*
-     * Discard any r/w data - this produces a link error if we have any,
-     * which is required for PIC decompression.  Local data generates
-     * GOTOFF relocations, which prevents it being relocated independently
-     * of the text/got segments.
-     */
-    *(.data)
-  }
-
-  . = TEXT_START;
-  _text = .;
-
-  .text : {
-    _start = .;
-    *(.start)
-    *(.text)
-    *(.text.*)
-    *(.fixup)
-    *(.gnu.warning)
-    *(.rodata)
-    *(.rodata.*)
-    *(.piggydata)
-    . = ALIGN(4);
-  }
-
-  _etext = .;
-
-  /* Assume size of decompressed image is 4x the compressed image */
-  _image_size = (_etext - _text) * 4;
-
-  _got_start = .;
-  .got                 : { *(.got) }
-  _got_end = .;
-  .got.plt             : { *(.got.plt) }
-  _edata = .;
-
-  . = BSS_START;
-  __bss_start = .;
-  .bss                 : { *(.bss) }
-  _end = .;
-
-  .stack               : { *(.stack) }
-  .comment 0           : { *(.comment) }
-}
-
diff --git a/arch/unicore32/configs/defconfig b/arch/unicore32/configs/defconfig
deleted file mode 100644 (file)
index 360cc9a..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-### General setup
-CONFIG_EXPERIMENTAL=y
-CONFIG_LOCALVERSION="-unicore32"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_HOTPLUG=y
-#      Initial RAM filesystem and RAM disk (initramfs/initrd) support
-#CONFIG_BLK_DEV_INITRD=y
-#CONFIG_INITRAMFS_SOURCE="arch/unicore/ramfs/ramfs_config"
-
-### Enable loadable module support
-CONFIG_MODULES=n
-CONFIG_MODULE_UNLOAD=y
-
-### System Type
-CONFIG_ARCH_PUV3=y
-#      Board Selection
-CONFIG_PUV3_NB0916=y
-#      Processor Features
-CONFIG_CPU_DCACHE_LINE_DISABLE=y
-CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE=n
-
-### Bus support
-CONFIG_PCI=y
-CONFIG_PCI_LEGACY=n
-
-### Boot options
-#      for debug, adding: earlyprintk=ocd,keep initcall_debug
-#      others support: test_suspend=mem root=/dev/sda
-#      hibernate support: resume=/dev/sda3
-CONFIG_CMDLINE="earlyprintk=ocd,keep ignore_loglevel"
-# TODO: mem=512M video=unifb:1024x600-16@75
-# for nfs: root=/dev/nfs rw nfsroot=192.168.10.88:/home/udb/nfs/,rsize=1024,wsize=1024
-#      ip=192.168.10.83:192.168.10.88:192.168.10.1:255.255.255.0::eth0:off
-CONFIG_CMDLINE_FORCE=y
-
-### Power management options
-CONFIG_PM=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_STD_PARTITION="/dev/sda3"
-CONFIG_CPU_FREQ=n
-CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-
-### Networking support
-CONFIG_NET=y
-#      Networking options
-CONFIG_PACKET=m
-CONFIG_UNIX=m
-#      TCP/IP networking
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IPV6=n
-#      Wireless
-CONFIG_WIRELESS=y
-CONFIG_WIRELESS_EXT=y
-CONFIG_MAC80211=m
-
-### PKUnity SoC Features
-CONFIG_USB_WLAN_HED_AQ3=n
-CONFIG_USB_CMMB_INNOFIDEI=n
-CONFIG_I2C_BATTERY_BQ27200=n
-CONFIG_I2C_EEPROM_AT24=n
-CONFIG_LCD_BACKLIGHT=n
-
-CONFIG_PUV3_UMAL=y
-CONFIG_PUV3_MUSB=n
-CONFIG_PUV3_AC97=n
-CONFIG_PUV3_NAND=n
-CONFIG_PUV3_MMC=n
-CONFIG_PUV3_UART=n
-
-### Device Drivers
-#      Memory Technology Device (MTD) support
-CONFIG_MTD=m
-CONFIG_MTD_UBI=m
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=m
-CONFIG_MTD_BLKDEVS=m
-#      RAM/ROM/Flash chip drivers
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_CFI_AMDSTD=m
-#      Mapping drivers for chip access
-CONFIG_MTD_PHYSMAP=m
-
-#      Block devices
-CONFIG_BLK_DEV_LOOP=m
-
-#      SCSI device support
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-
-#      Serial ATA (prod) and Parallel ATA (experimental) drivers
-CONFIG_ATA=y
-CONFIG_SATA_VIA=y
-
-#      Network device support
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NETDEV_1000=y
-#      Wireless LAN
-CONFIG_WLAN_80211=n
-CONFIG_RT2X00=n
-CONFIG_RT73USB=n
-
-#      Input device support
-CONFIG_INPUT_EVDEV=m
-#      Keyboards
-CONFIG_KEYBOARD_GPIO=m
-
-#      I2C support
-CONFIG_I2C=y
-CONFIG_I2C_PUV3=y
-
-#      Hardware Monitoring support
-#CONFIG_SENSORS_LM75=m
-#      Generic Thermal sysfs driver
-#CONFIG_THERMAL=y
-#CONFIG_THERMAL_HWMON=y
-
-#      Multimedia support
-CONFIG_MEDIA_SUPPORT=n
-CONFIG_VIDEO_DEV=n
-CONFIG_USB_VIDEO_CLASS=n
-
-#      Graphics support
-CONFIG_FB=y
-CONFIG_FB_PUV3_UNIGFX=y
-#      Console display driver support
-CONFIG_VGA_CONSOLE=n
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-#      Bootup logo
-CONFIG_LOGO=n
-
-#      Sound card support
-CONFIG_SOUND=m
-#      Advanced Linux Sound Architecture
-CONFIG_SND=m
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-
-#      USB support
-CONFIG_USB_ARCH_HAS_HCD=n
-CONFIG_USB=n
-CONFIG_USB_PRINTER=n
-CONFIG_USB_STORAGE=n
-#      Inventra Highspeed Dual Role Controller
-CONFIG_USB_MUSB_HDRC=n
-
-#      LED Support
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
-#      LED Triggers
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=y
-CONFIG_LEDS_TRIGGER_DISK=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-
-#      Real Time Clock
-CONFIG_RTC_LIB=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_PUV3=y
-
-### File systems
-CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
-CONFIG_FUSE_FS=m
-#      CD-ROM/DVD Filesystems
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_UDF_FS=m
-#      DOS/FAT/NT Filesystems
-CONFIG_VFAT_FS=m
-#      Pseudo filesystems
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-#      Miscellaneous filesystems
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_JFFS2_FS=m
-CONFIG_UBIFS_FS=m
-#      Network File Systems
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-#      Partition Types
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MSDOS_PARTITION=y
-#      Native language support
-CONFIG_NLS=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_UTF8=m
-
-### Kernel hacking
-CONFIG_FRAME_WARN=8096
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_PROVE_LOCKING=n
-CONFIG_DEBUG_BUGVERBOSE=y
-CONFIG_FRAME_POINTER=y
-CONFIG_DEBUG_LL=y
-
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
deleted file mode 100644 (file)
index 55026e8..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += extable.h
-generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
-generic-y += parport.h
-generic-y += syscalls.h
-generic-y += user.h
diff --git a/arch/unicore32/include/asm/assembler.h b/arch/unicore32/include/asm/assembler.h
deleted file mode 100644 (file)
index 3de843d..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/assembler.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  Do not include any C declarations in this file - it is included by
- *  assembler source.
- */
-#ifndef __ASSEMBLY__
-#error "Only include this from assembly code"
-#endif
-
-#include <asm/ptrace.h>
-
-/*
- * Little Endian independent macros for shifting bytes within registers.
- */
-#define pull            >>
-#define push            <<
-#define get_byte_0      << #0
-#define get_byte_1     >> #8
-#define get_byte_2     >> #16
-#define get_byte_3     >> #24
-#define put_byte_0      << #0
-#define put_byte_1     << #8
-#define put_byte_2     << #16
-#define put_byte_3     << #24
-
-#define cadd           cmpadd
-#define cand           cmpand
-#define csub           cmpsub
-#define cxor           cmpxor
-
-/*
- * Enable and disable interrupts
- */
-       .macro disable_irq, temp
-       mov     \temp, asr
-       andn     \temp, \temp, #0xFF
-       or      \temp, \temp, #PSR_I_BIT | PRIV_MODE
-       mov.a   asr, \temp
-       .endm
-
-       .macro enable_irq, temp
-       mov     \temp, asr
-       andn     \temp, \temp, #0xFF
-       or      \temp, \temp, #PRIV_MODE
-       mov.a   asr, \temp
-       .endm
-
-#define USER(x...)                             \
-9999:  x;                                      \
-       .pushsection __ex_table, "a";           \
-       .align  3;                              \
-       .long   9999b, 9001f;                   \
-       .popsection
-
-       .macro  notcond, cond, nexti = .+8
-       .ifc    \cond, eq
-               bne     \nexti
-       .else;  .ifc    \cond, ne
-               beq     \nexti
-       .else;  .ifc    \cond, ea
-               bub     \nexti
-       .else;  .ifc    \cond, ub
-               bea     \nexti
-       .else;  .ifc    \cond, fs
-               bns     \nexti
-       .else;  .ifc    \cond, ns
-               bfs     \nexti
-       .else;  .ifc    \cond, fv
-               bnv     \nexti
-       .else;  .ifc    \cond, nv
-               bfv     \nexti
-       .else;  .ifc    \cond, ua
-               beb     \nexti
-       .else;  .ifc    \cond, eb
-               bua     \nexti
-       .else;  .ifc    \cond, eg
-               bsl     \nexti
-       .else;  .ifc    \cond, sl
-               beg     \nexti
-       .else;  .ifc    \cond, sg
-               bel     \nexti
-       .else;  .ifc    \cond, el
-               bsg     \nexti
-       .else;  .ifnc   \cond, al
-               .error  "Unknown cond in notcond macro argument"
-       .endif; .endif; .endif; .endif; .endif; .endif; .endif
-       .endif; .endif; .endif; .endif; .endif; .endif; .endif
-       .endif
-       .endm
-
-       .macro  usracc, instr, reg, ptr, inc, cond, rept, abort
-       .rept   \rept
-       notcond \cond, .+8
-9999 :
-       .if     \inc == 1
-       \instr\()b.u \reg, [\ptr], #\inc
-       .elseif \inc == 4
-       \instr\()w.u \reg, [\ptr], #\inc
-       .else
-       .error  "Unsupported inc macro argument"
-       .endif
-
-       .pushsection __ex_table, "a"
-       .align  3
-       .long   9999b, \abort
-       .popsection
-       .endr
-       .endm
-
-       .macro  strusr, reg, ptr, inc, cond = al, rept = 1, abort = 9001f
-       usracc  st, \reg, \ptr, \inc, \cond, \rept, \abort
-       .endm
-
-       .macro  ldrusr, reg, ptr, inc, cond = al, rept = 1, abort = 9001f
-       usracc  ld, \reg, \ptr, \inc, \cond, \rept, \abort
-       .endm
-
-       .macro  nop8
-       .rept   8
-               nop
-       .endr
-       .endm
diff --git a/arch/unicore32/include/asm/barrier.h b/arch/unicore32/include/asm/barrier.h
deleted file mode 100644 (file)
index efb81de..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Memory barrier implementations for PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2012 GUAN Xue-tao
- */
-#ifndef __UNICORE_BARRIER_H__
-#define __UNICORE_BARRIER_H__
-
-#define isb() __asm__ __volatile__ ("" : : : "memory")
-#define dsb() __asm__ __volatile__ ("" : : : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
-
-#include <asm-generic/barrier.h>
-
-#endif /* __UNICORE_BARRIER_H__ */
diff --git a/arch/unicore32/include/asm/bitops.h b/arch/unicore32/include/asm/bitops.h
deleted file mode 100644 (file)
index deeb216..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/bitops.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_BITOPS_H__
-#define __UNICORE_BITOPS_H__
-
-#define _ASM_GENERIC_BITOPS_FLS_H_
-#define _ASM_GENERIC_BITOPS___FLS_H_
-#define _ASM_GENERIC_BITOPS_FFS_H_
-#define _ASM_GENERIC_BITOPS___FFS_H_
-/*
- * On UNICORE, those functions can be implemented around
- * the cntlz instruction for much better code efficiency.
- */
-
-static inline int fls(unsigned int x)
-{
-       int ret;
-
-       asm("cntlz\t%0, %1" : "=r" (ret) : "r" (x) : "cc");
-       ret = 32 - ret;
-
-       return ret;
-}
-
-#define __fls(x) (fls(x) - 1)
-#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); })
-#define __ffs(x) (ffs(x) - 1)
-
-#include <asm-generic/bitops.h>
-
-/* following definitions: to avoid using codes in lib/find_*.c */
-#define find_next_bit          find_next_bit
-#define find_next_zero_bit     find_next_zero_bit
-#define find_first_bit         find_first_bit
-#define find_first_zero_bit    find_first_zero_bit
-
-#include <asm-generic/bitops/find.h>
-
-#endif /* __UNICORE_BITOPS_H__ */
diff --git a/arch/unicore32/include/asm/bug.h b/arch/unicore32/include/asm/bug.h
deleted file mode 100644 (file)
index 99acea8..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Bug handling for PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2012 GUAN Xue-tao
- */
-#ifndef __UNICORE_BUG_H__
-#define __UNICORE_BUG_H__
-
-#include <asm-generic/bug.h>
-
-struct pt_regs;
-struct siginfo;
-
-extern void die(const char *msg, struct pt_regs *regs, int err);
-extern void uc32_notify_die(const char *str, struct pt_regs *regs,
-               int sig, int code, void __user *addr,
-               unsigned long err, unsigned long trap);
-
-#endif /* __UNICORE_BUG_H__ */
diff --git a/arch/unicore32/include/asm/cache.h b/arch/unicore32/include/asm/cache.h
deleted file mode 100644 (file)
index 44ecd1f..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/cache.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_CACHE_H__
-#define __UNICORE_CACHE_H__
-
-#define L1_CACHE_SHIFT         (5)
-#define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
-
-/*
- * Memory returned by kmalloc() may be used for DMA, so we must make
- * sure that all such allocations are cache aligned. Otherwise,
- * unrelated code may cause parts of the buffer to be read into the
- * cache before the transfer is done, causing old data to be seen by
- * the CPU.
- */
-#define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
-
-#endif
diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h
deleted file mode 100644 (file)
index ff0be92..0000000
+++ /dev/null
@@ -1,186 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/cacheflush.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_CACHEFLUSH_H__
-#define __UNICORE_CACHEFLUSH_H__
-
-#include <linux/mm.h>
-
-#include <asm/shmparam.h>
-
-#define CACHE_COLOUR(vaddr)    ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
-
-/*
- * This flag is used to indicate that the page pointed to by a pte is clean
- * and does not require cleaning before returning it to the user.
- */
-#define PG_dcache_clean PG_arch_1
-
-/*
- *     MM Cache Management
- *     ===================
- *
- *     The arch/unicore32/mm/cache.S files implement these methods.
- *
- *     Start addresses are inclusive and end addresses are exclusive;
- *     start addresses should be rounded down, end addresses up.
- *
- *     See Documentation/core-api/cachetlb.rst for more information.
- *     Please note that the implementation of these, and the required
- *     effects are cache-type (VIVT/VIPT/PIPT) specific.
- *
- *     flush_icache_all()
- *
- *             Unconditionally clean and invalidate the entire icache.
- *             Currently only needed for cache-v6.S and cache-v7.S, see
- *             __flush_icache_all for the generic implementation.
- *
- *     flush_kern_all()
- *
- *             Unconditionally clean and invalidate the entire cache.
- *
- *     flush_user_all()
- *
- *             Clean and invalidate all user space cache entries
- *             before a change of page tables.
- *
- *     flush_user_range(start, end, flags)
- *
- *             Clean and invalidate a range of cache entries in the
- *             specified address space before a change of page tables.
- *             - start - user start address (inclusive, page aligned)
- *             - end   - user end address   (exclusive, page aligned)
- *             - flags - vma->vm_flags field
- *
- *     coherent_kern_range(start, end)
- *
- *             Ensure coherency between the Icache and the Dcache in the
- *             region described by start, end.  If you have non-snooping
- *             Harvard caches, you need to implement this function.
- *             - start  - virtual start address
- *             - end    - virtual end address
- *
- *     coherent_user_range(start, end)
- *
- *             Ensure coherency between the Icache and the Dcache in the
- *             region described by start, end.  If you have non-snooping
- *             Harvard caches, you need to implement this function.
- *             - start  - virtual start address
- *             - end    - virtual end address
- *
- *     flush_kern_dcache_area(kaddr, size)
- *
- *             Ensure that the data held in page is written back.
- *             - kaddr  - page address
- *             - size   - region size
- *
- *     DMA Cache Coherency
- *     ===================
- *
- *     dma_flush_range(start, end)
- *
- *             Clean and invalidate the specified virtual address range.
- *             - start  - virtual start address
- *             - end    - virtual end address
- */
-
-extern void __cpuc_flush_icache_all(void);
-extern void __cpuc_flush_kern_all(void);
-extern void __cpuc_flush_user_all(void);
-extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
-extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);
-extern void __cpuc_coherent_user_range(unsigned long, unsigned long);
-extern void __cpuc_flush_dcache_area(void *, size_t);
-extern void __cpuc_flush_kern_dcache_area(void *addr, size_t size);
-
-/*
- * Copy user data from/to a page which is mapped into a different
- * processes address space.  Really, we want to allow our "user
- * space" model to handle this.
- */
-extern void copy_to_user_page(struct vm_area_struct *, struct page *,
-       unsigned long, void *, const void *, unsigned long);
-#define copy_from_user_page(vma, page, vaddr, dst, src, len)   \
-       do {                                                    \
-               memcpy(dst, src, len);                          \
-       } while (0)
-
-/*
- * Convert calls to our calling convention.
- */
-/* Invalidate I-cache */
-static inline void __flush_icache_all(void)
-{
-       asm("movc       p0.c5, %0, #20;\n"
-           "nop; nop; nop; nop; nop; nop; nop; nop\n"
-           :
-           : "r" (0));
-}
-
-#define flush_cache_all()              __cpuc_flush_kern_all()
-
-extern void flush_cache_mm(struct mm_struct *mm);
-extern void flush_cache_range(struct vm_area_struct *vma,
-               unsigned long start, unsigned long end);
-extern void flush_cache_page(struct vm_area_struct *vma,
-               unsigned long user_addr, unsigned long pfn);
-
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
-
-/*
- * Perform necessary cache operations to ensure that data previously
- * stored within this range of addresses can be executed by the CPU.
- */
-#define flush_icache_range(s, e)       __cpuc_coherent_kern_range(s, e)
-
-/*
- * Perform necessary cache operations to ensure that the TLB will
- * see data written in the specified area.
- */
-#define clean_dcache_area(start, size) cpu_dcache_clean_area(start, size)
-
-/*
- * flush_dcache_page is used when the kernel has written to the page
- * cache page at virtual address page->virtual.
- *
- * If this page isn't mapped (ie, page_mapping == NULL), or it might
- * have userspace mappings, then we _must_ always clean + invalidate
- * the dcache entries associated with the kernel mapping.
- *
- * Otherwise we can defer the operation, and clean the cache when we are
- * about to change to user space.  This is the same method as used on SPARC64.
- * See update_mmu_cache for the user space part.
- */
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *);
-
-#define flush_dcache_mmap_lock(mapping)                do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)      do { } while (0)
-
-/*
- * We don't appear to need to do anything here.  In fact, if we did, we'd
- * duplicate cache flushing elsewhere performed by flush_dcache_page().
- */
-#define flush_icache_page(vma, page)   do { } while (0)
-
-/*
- * flush_cache_vmap() is used when creating mappings (eg, via vmap,
- * vmalloc, ioremap etc) in kernel space for pages.  On non-VIPT
- * caches, since the direct-mappings of these pages may contain cached
- * data, we need to do a full cache flush to ensure that writebacks
- * don't corrupt data placed into these pages via the new mappings.
- */
-static inline void flush_cache_vmap(unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
-{
-}
-
-#endif
diff --git a/arch/unicore32/include/asm/checksum.h b/arch/unicore32/include/asm/checksum.h
deleted file mode 100644 (file)
index e774ca2..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/checksum.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * IP checksum routines
- */
-#ifndef __UNICORE_CHECKSUM_H__
-#define __UNICORE_CHECKSUM_H__
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
-                  __u8 proto, __wsum sum)
-{
-       __asm__(
-       "add.a  %0, %1, %2\n"
-       "addc.a %0, %0, %3\n"
-       "addc.a %0, %0, %4 << #8\n"
-       "addc.a %0, %0, %5\n"
-       "addc   %0, %0, #0\n"
-       : "=&r"(sum)
-       : "r" (sum), "r" (daddr), "r" (saddr), "r" (len), "Ir" (htons(proto))
-       : "cc");
-       return sum;
-}
-#define csum_tcpudp_nofold     csum_tcpudp_nofold
-
-#include <asm-generic/checksum.h>
-
-#endif
diff --git a/arch/unicore32/include/asm/cmpxchg.h b/arch/unicore32/include/asm/cmpxchg.h
deleted file mode 100644 (file)
index 87f960a..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Atomics xchg/cmpxchg for PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2012 GUAN Xue-tao
- */
-#ifndef __UNICORE_CMPXCHG_H__
-#define __UNICORE_CMPXCHG_H__
-
-/*
- * Generate a link failure on undefined symbol if the pointer points to a value
- * of unsupported size.
- */
-extern void __xchg_bad_pointer(void);
-
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
-               int size)
-{
-       unsigned long ret;
-
-       switch (size) {
-       case 1:
-               asm volatile("swapb     %0, %1, [%2]"
-                       : "=&r" (ret)
-                       : "r" (x), "r" (ptr)
-                       : "memory", "cc");
-               break;
-       case 4:
-               asm volatile("swapw     %0, %1, [%2]"
-                       : "=&r" (ret)
-                       : "r" (x), "r" (ptr)
-                       : "memory", "cc");
-               break;
-       default:
-               __xchg_bad_pointer();
-       }
-
-       return ret;
-}
-
-#define xchg(ptr, x) \
-       ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)                                       \
-               ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr),     \
-               (unsigned long)(o), (unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n)                                     \
-               __cmpxchg64_local_generic((ptr), (o), (n))
-
-#include <asm-generic/cmpxchg.h>
-
-#endif /* __UNICORE_CMPXCHG_H__ */
diff --git a/arch/unicore32/include/asm/cpu-single.h b/arch/unicore32/include/asm/cpu-single.h
deleted file mode 100644 (file)
index 1b419d6..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/cpu-single.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_CPU_SINGLE_H__
-#define __UNICORE_CPU_SINGLE_H__
-
-#include <asm/page.h>
-#include <asm/memory.h>
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-#define cpu_switch_mm(pgd, mm) cpu_do_switch_mm(virt_to_phys(pgd), mm)
-
-#define cpu_get_pgd()                                  \
-       ({                                              \
-               unsigned long pg;                       \
-               __asm__("movc   %0, p0.c2, #0"          \
-                        : "=r" (pg) : : "cc");         \
-               pg &= ~0x0fff;                          \
-               (pgd_t *)phys_to_virt(pg);              \
-       })
-
-struct mm_struct;
-
-/* declare all the functions as extern */
-extern void cpu_proc_fin(void);
-extern int cpu_do_idle(void);
-extern void cpu_dcache_clean_area(void *, int);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
-extern void cpu_set_pte(pte_t *ptep, pte_t pte);
-extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
-
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
-#endif /* __UNICORE_CPU_SINGLE_H__ */
diff --git a/arch/unicore32/include/asm/cputype.h b/arch/unicore32/include/asm/cputype.h
deleted file mode 100644 (file)
index 08a47e3..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/cputype.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_CPUTYPE_H__
-#define __UNICORE_CPUTYPE_H__
-
-#include <linux/stringify.h>
-
-#define CPUID_CPUID    0
-#define CPUID_CACHETYPE        1
-
-#define read_cpuid(reg)                                                        \
-       ({                                                              \
-               unsigned int __val;                                     \
-               asm("movc       %0, p0.c0, #" __stringify(reg)          \
-                   : "=r" (__val)                                      \
-                   :                                                   \
-                   : "cc");                                            \
-               __val;                                                  \
-       })
-
-#define uc32_cpuid             read_cpuid(CPUID_CPUID)
-#define uc32_cachetype         read_cpuid(CPUID_CACHETYPE)
-
-#endif
diff --git a/arch/unicore32/include/asm/delay.h b/arch/unicore32/include/asm/delay.h
deleted file mode 100644 (file)
index 934193e..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/delay.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * Delay routines, using a pre-computed "loops_per_second" value.
- */
-#ifndef __UNICORE_DELAY_H__
-#define __UNICORE_DELAY_H__
-
-#include <asm/param.h> /* HZ */
-
-extern void __delay(int loops);
-
-/*
- * This function intentionally does not exist; if you see references to
- * it, it means that you're calling udelay() with an out of range value.
- *
- * With currently imposed limits, this means that we support a max delay
- * of 2000us. Further limits: HZ<=1000 and bogomips<=3355
- */
-extern void __bad_udelay(void);
-
-/*
- * division by multiplication: you don't have to worry about
- * loss of precision.
- *
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)
- */
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
-
-#define udelay(n)                                                      \
-       (__builtin_constant_p(n) ?                                      \
-         ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :              \
-                       __const_udelay((n) * ((2199023U*HZ)>>11))) :    \
-         __udelay(n))
-
-#endif /* __UNICORE_DELAY_H__ */
-
diff --git a/arch/unicore32/include/asm/dma.h b/arch/unicore32/include/asm/dma.h
deleted file mode 100644 (file)
index 1326310..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/dma.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_DMA_H__
-#define __UNICORE_DMA_H__
-
-#include <asm/memory.h>
-#include <asm-generic/dma.h>
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#endif
-
-#endif /* __UNICORE_DMA_H__ */
diff --git a/arch/unicore32/include/asm/elf.h b/arch/unicore32/include/asm/elf.h
deleted file mode 100644 (file)
index a464ed5..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/elf.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_ELF_H__
-#define __UNICORE_ELF_H__
-
-#include <asm/hwcap.h>
-
-/*
- * ELF register definitions..
- */
-#include <asm/ptrace.h>
-#include <linux/elf-em.h>
-
-typedef unsigned long elf_greg_t;
-typedef unsigned long elf_freg_t[3];
-
-#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct fp_state elf_fpregset_t;
-
-#define R_UNICORE_NONE         0
-#define R_UNICORE_PC24         1
-#define R_UNICORE_ABS32                2
-#define R_UNICORE_CALL         28
-#define R_UNICORE_JUMP24       29
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS      ELFCLASS32
-#define ELF_DATA       ELFDATA2LSB
-#define ELF_ARCH       EM_UNICORE
-
-/*
- * This yields a string that ld.so will use to load implementation
- * specific libraries for optimization.  This is more specific in
- * intent than poking at uname or /proc/cpuinfo.
- *
- */
-#define ELF_PLATFORM_SIZE 8
-#define ELF_PLATFORM   (elf_platform)
-
-extern char elf_platform[];
-
-struct elf32_hdr;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-extern int elf_check_arch(const struct elf32_hdr *);
-#define elf_check_arch elf_check_arch
-
-struct task_struct;
-int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
-#define ELF_CORE_COPY_TASK_REGS dump_task_regs
-
-#define ELF_EXEC_PAGESIZE      4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE        (2 * TASK_SIZE / 3)
-
-/* When the program starts, a1 contains a pointer to a function to be
-   registered with atexit, as per the SVR4 ABI.  A value of 0 means we
-   have no such handler.  */
-#define ELF_PLAT_INIT(_r, load_addr)   {(_r)->UCreg_00 = 0; }
-
-extern void elf_set_personality(const struct elf32_hdr *);
-#define SET_PERSONALITY(ex)    elf_set_personality(&(ex))
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
-extern int vectors_user_mapping(void);
-#define arch_setup_additional_pages(bprm, uses_interp) vectors_user_mapping()
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
-
-#endif
diff --git a/arch/unicore32/include/asm/fpstate.h b/arch/unicore32/include/asm/fpstate.h
deleted file mode 100644 (file)
index 5811293..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/fpstate.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_FPSTATE_H__
-#define __UNICORE_FPSTATE_H__
-
-#ifndef __ASSEMBLY__
-
-#define FP_REGS_NUMBER         33
-
-struct fp_state {
-       unsigned int regs[FP_REGS_NUMBER];
-} __attribute__((aligned(8)));
-
-#endif
-
-#endif
diff --git a/arch/unicore32/include/asm/fpu-ucf64.h b/arch/unicore32/include/asm/fpu-ucf64.h
deleted file mode 100644 (file)
index 7a0c8a9..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/fpu-ucf64.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-#define FPSCR                  s31
-
-/* FPSCR bits */
-#define FPSCR_DEFAULT_NAN      (1<<25)
-
-#define FPSCR_CMPINSTR_BIT     (1<<31)
-
-#define FPSCR_CON              (1<<29)
-#define FPSCR_TRAP             (1<<27)
-
-/* RND mode */
-#define FPSCR_ROUND_NEAREST    (0<<0)
-#define FPSCR_ROUND_PLUSINF    (2<<0)
-#define FPSCR_ROUND_MINUSINF   (3<<0)
-#define FPSCR_ROUND_TOZERO     (1<<0)
-#define FPSCR_RMODE_BIT                (0)
-#define FPSCR_RMODE_MASK       (7 << FPSCR_RMODE_BIT)
-
-/* trap enable */
-#define FPSCR_IOE              (1<<16)
-#define FPSCR_OFE              (1<<14)
-#define FPSCR_UFE              (1<<13)
-#define FPSCR_IXE              (1<<12)
-#define FPSCR_HIE              (1<<11)
-#define FPSCR_NDE              (1<<10) /* non denomal */
-
-/* flags */
-#define FPSCR_IDC              (1<<24)
-#define FPSCR_HIC              (1<<23)
-#define FPSCR_IXC              (1<<22)
-#define FPSCR_OFC              (1<<21)
-#define FPSCR_UFC              (1<<20)
-#define FPSCR_IOC              (1<<19)
-
-/* stick bits */
-#define FPSCR_IOS              (1<<9)
-#define FPSCR_OFS              (1<<7)
-#define FPSCR_UFS              (1<<6)
-#define FPSCR_IXS              (1<<5)
-#define FPSCR_HIS              (1<<4)
-#define FPSCR_NDS              (1<<3)  /*non denomal */
diff --git a/arch/unicore32/include/asm/gpio.h b/arch/unicore32/include/asm/gpio.h
deleted file mode 100644 (file)
index dfad04c..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/gpio.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_GPIO_H__
-#define __UNICORE_GPIO_H__
-
-#include <linux/io.h>
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm-generic/gpio.h>
-
-#define GPI_OTP_INT             0
-#define GPI_PCI_INTA            1
-#define GPI_PCI_INTB            2
-#define GPI_PCI_INTC            3
-#define GPI_PCI_INTD            4
-#define GPI_BAT_DET             5
-#define GPI_SD_CD               6
-#define GPI_SOFF_REQ            7
-#define GPI_SD_WP               8
-#define GPI_LCD_CASE_OFF        9
-#define GPO_WIFI_EN             10
-#define GPO_HDD_LED             11
-#define GPO_VGA_EN              12
-#define GPO_LCD_EN              13
-#define GPO_LED_DATA            14
-#define GPO_LED_CLK             15
-#define GPO_CAM_PWR_EN          16
-#define GPO_LCD_VCC_EN          17
-#define GPO_SOFT_OFF            18
-#define GPO_BT_EN               19
-#define GPO_FAN_ON              20
-#define GPO_SPKR                21
-#define GPO_SET_V1              23
-#define GPO_SET_V2              24
-#define GPO_CPU_HEALTH          25
-#define GPO_LAN_SEL             26
-
-#ifdef CONFIG_PUV3_NB0916
-#define GPI_BTN_TOUCH          14
-#define GPIO_IN                        0x000043ff /* 1 for input */
-#define GPIO_OUT               0x0fffbc00 /* 1 for output */
-#endif /* CONFIG_PUV3_NB0916 */
-
-#ifdef CONFIG_PUV3_SMW0919
-#define GPIO_IN                        0x000003ff /* 1 for input */
-#define GPIO_OUT               0x0ffffc00 /* 1 for output */
-#endif  /* CONFIG_PUV3_SMW0919 */
-
-#ifdef CONFIG_PUV3_DB0913
-#define GPIO_IN                        0x000001df /* 1 for input */
-#define GPIO_OUT               0x03fee800 /* 1 for output */
-#endif  /* CONFIG_PUV3_DB0913 */
-
-#define GPIO_DIR                (~((GPIO_IN) | 0xf0000000))
-                               /* 0 input, 1 output */
-
-static inline int gpio_get_value(unsigned gpio)
-{
-       if (__builtin_constant_p(gpio) && (gpio <= GPIO_MAX))
-               return readl(GPIO_GPLR) & GPIO_GPIO(gpio);
-       else
-               return __gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned gpio, int value)
-{
-       if (__builtin_constant_p(gpio) && (gpio <= GPIO_MAX))
-               if (value)
-                       writel(GPIO_GPIO(gpio), GPIO_GPSR);
-               else
-                       writel(GPIO_GPIO(gpio), GPIO_GPCR);
-       else
-               __gpio_set_value(gpio, value);
-}
-
-#define gpio_cansleep  __gpio_cansleep
-
-static inline unsigned gpio_to_irq(unsigned gpio)
-{
-       if ((gpio < IRQ_GPIOHIGH) && (FIELD(1, 1, gpio) & readl(GPIO_GPIR)))
-               return IRQ_GPIOLOW0 + gpio;
-       else
-               return IRQ_GPIO0 + gpio;
-}
-
-static inline unsigned irq_to_gpio(unsigned irq)
-{
-       if (irq < IRQ_GPIOHIGH)
-               return irq - IRQ_GPIOLOW0;
-       else
-               return irq - IRQ_GPIO0;
-}
-
-#endif /* __UNICORE_GPIO_H__ */
diff --git a/arch/unicore32/include/asm/hwcap.h b/arch/unicore32/include/asm/hwcap.h
deleted file mode 100644 (file)
index 2e15ffb..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/hwcap.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_HWCAP_H__
-#define __UNICORE_HWCAP_H__
-
-/*
- * HWCAP flags
- */
-#define HWCAP_MSP              1
-#define HWCAP_UNICORE16                2
-#define HWCAP_CMOV             4
-#define HWCAP_UNICORE_F64       8
-#define HWCAP_TLS              0x80
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-/*
- * This yields a mask that user programs can use to figure out what
- * instruction set this cpu supports.
- */
-#define ELF_HWCAP              (HWCAP_CMOV | HWCAP_UNICORE_F64)
-#endif
-
-#endif
diff --git a/arch/unicore32/include/asm/hwdef-copro.h b/arch/unicore32/include/asm/hwdef-copro.h
deleted file mode 100644 (file)
index 2db8cf8..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Co-processor register definitions for PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2012 GUAN Xue-tao
- */
-#ifndef __UNICORE_HWDEF_COPRO_H__
-#define __UNICORE_HWDEF_COPRO_H__
-
-/*
- * Control Register bits (CP#0 CR1)
- */
-#define CR_M   (1 << 0)        /* MMU enable                           */
-#define CR_A   (1 << 1)        /* Alignment abort enable               */
-#define CR_D   (1 << 2)        /* Dcache enable                        */
-#define CR_I   (1 << 3)        /* Icache enable                        */
-#define CR_B   (1 << 4)        /* Dcache write mechanism: write back   */
-#define CR_T   (1 << 5)        /* Burst enable                         */
-#define CR_V   (1 << 13)       /* Vectors relocated to 0xffff0000      */
-
-#ifndef __ASSEMBLY__
-
-#define vectors_high()         (cr_alignment & CR_V)
-
-extern unsigned long cr_no_alignment;  /* defined in entry.S */
-extern unsigned long cr_alignment;     /* defined in entry.S */
-
-static inline unsigned int get_cr(void)
-{
-       unsigned int val;
-       asm("movc %0, p0.c1, #0" : "=r" (val) : : "cc");
-       return val;
-}
-
-static inline void set_cr(unsigned int val)
-{
-       asm volatile("movc p0.c1, %0, #0" : : "r" (val) : "cc");
-       isb();
-}
-
-extern void adjust_cr(unsigned long mask, unsigned long set);
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __UNICORE_HWDEF_COPRO_H__ */
diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h
deleted file mode 100644 (file)
index bd4e7c3..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/io.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_IO_H__
-#define __UNICORE_IO_H__
-
-#ifdef __KERNEL__
-
-#include <asm/byteorder.h>
-#include <asm/memory.h>
-
-#define PCI_IOBASE     PKUNITY_PCILIO_BASE
-#include <asm-generic/io.h>
-
-/*
- * __uc32_ioremap takes CPU physical address.
- */
-extern void __iomem *__uc32_ioremap(unsigned long, size_t);
-extern void __uc32_iounmap(volatile void __iomem *addr);
-
-/*
- * ioremap and friends.
- *
- * ioremap takes a PCI memory address, as specified in
- * Documentation/driver-api/io-mapping.rst.
- *
- */
-#define ioremap(cookie, size)          __uc32_ioremap(cookie, size)
-#define iounmap(cookie)                        __uc32_iounmap(cookie)
-
-#define readb_relaxed readb
-#define readw_relaxed readw
-#define readl_relaxed readl
-
-#define HAVE_ARCH_PIO_SIZE
-#define PIO_OFFSET             (unsigned int)(PCI_IOBASE)
-#define PIO_MASK               (unsigned int)(IO_SPACE_LIMIT)
-#define PIO_RESERVED           (PIO_OFFSET + PIO_MASK + 1)
-
-#ifdef CONFIG_STRICT_DEVMEM
-
-#include <linux/ioport.h>
-#include <linux/mm.h>
-
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain
- * address is valid. The argument is a physical page number.
- * We mimic x86 here by disallowing access to system RAM as well as
- * device-exclusive MMIO regions. This effectively disable read()/write()
- * on /dev/mem.
- */
-static inline int devmem_is_allowed(unsigned long pfn)
-{
-       if (iomem_is_exclusive(pfn << PAGE_SHIFT))
-               return 0;
-       if (!page_is_ram(pfn))
-               return 1;
-       return 0;
-}
-
-#endif /* CONFIG_STRICT_DEVMEM */
-
-#endif /* __KERNEL__ */
-#endif /* __UNICORE_IO_H__ */
diff --git a/arch/unicore32/include/asm/irq.h b/arch/unicore32/include/asm/irq.h
deleted file mode 100644 (file)
index 3f7f07c..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/irq.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_IRQ_H__
-#define __UNICORE_IRQ_H__
-
-#include <asm-generic/irq.h>
-
-#define        IRQ_GPIOLOW0            0x00
-#define        IRQ_GPIOLOW1            0x01
-#define        IRQ_GPIOLOW2            0x02
-#define        IRQ_GPIOLOW3            0x03
-#define        IRQ_GPIOLOW4            0x04
-#define        IRQ_GPIOLOW5            0x05
-#define        IRQ_GPIOLOW6            0x06
-#define        IRQ_GPIOLOW7            0x07
-#define IRQ_GPIOHIGH           0x08
-#define IRQ_USB                        0x09
-#define IRQ_SDC                        0x0a
-#define IRQ_AC97               0x0b
-#define IRQ_SATA               0x0c
-#define IRQ_MME                        0x0d
-#define IRQ_PCI_BRIDGE         0x0e
-#define        IRQ_DDR                 0x0f
-#define        IRQ_SPI                 0x10
-#define        IRQ_UNIGFX              0x11
-#define        IRQ_I2C                 0x11
-#define        IRQ_UART1               0x12
-#define        IRQ_UART0               0x13
-#define IRQ_UMAL               0x14
-#define IRQ_NAND               0x15
-#define IRQ_PS2_KBD            0x16
-#define IRQ_PS2_AUX            0x17
-#define IRQ_DMA                        0x18
-#define IRQ_DMAERR             0x19
-#define        IRQ_TIMER0              0x1a
-#define        IRQ_TIMER1              0x1b
-#define        IRQ_TIMER2              0x1c
-#define        IRQ_TIMER3              0x1d
-#define        IRQ_RTC                 0x1e
-#define        IRQ_RTCAlarm            0x1f
-
-#define        IRQ_GPIO0               0x20
-#define        IRQ_GPIO1               0x21
-#define        IRQ_GPIO2               0x22
-#define        IRQ_GPIO3               0x23
-#define        IRQ_GPIO4               0x24
-#define        IRQ_GPIO5               0x25
-#define        IRQ_GPIO6               0x26
-#define        IRQ_GPIO7               0x27
-#define IRQ_GPIO8              0x28
-#define IRQ_GPIO9              0x29
-#define IRQ_GPIO10             0x2a
-#define IRQ_GPIO11             0x2b
-#define IRQ_GPIO12             0x2c
-#define IRQ_GPIO13             0x2d
-#define IRQ_GPIO14             0x2e
-#define IRQ_GPIO15             0x2f
-#define IRQ_GPIO16             0x30
-#define IRQ_GPIO17             0x31
-#define IRQ_GPIO18             0x32
-#define IRQ_GPIO19             0x33
-#define IRQ_GPIO20             0x34
-#define IRQ_GPIO21             0x35
-#define IRQ_GPIO22             0x36
-#define IRQ_GPIO23             0x37
-#define IRQ_GPIO24             0x38
-#define IRQ_GPIO25             0x39
-#define IRQ_GPIO26             0x3a
-#define IRQ_GPIO27             0x3b
-
-#ifdef CONFIG_ARCH_FPGA
-#define IRQ_PCIINTA             IRQ_GPIOLOW2
-#define IRQ_PCIINTB             IRQ_GPIOLOW1
-#define IRQ_PCIINTC             IRQ_GPIOLOW0
-#define IRQ_PCIINTD             IRQ_GPIOLOW6
-#endif
-
-#if defined(CONFIG_PUV3_DB0913) || defined(CONFIG_PUV3_NB0916) \
-       || defined(CONFIG_PUV3_SMW0919)
-#define IRQ_PCIINTA             IRQ_GPIOLOW1
-#define IRQ_PCIINTB             IRQ_GPIOLOW2
-#define IRQ_PCIINTC             IRQ_GPIOLOW3
-#define IRQ_PCIINTD             IRQ_GPIOLOW4
-#endif
-
-#define IRQ_SD_CD               IRQ_GPIO6 /* falling or rising trigger */
-
-#ifndef __ASSEMBLY__
-struct pt_regs;
-
-extern void asm_do_IRQ(unsigned int, struct pt_regs *);
-
-#endif
-
-#endif
-
diff --git a/arch/unicore32/include/asm/irqflags.h b/arch/unicore32/include/asm/irqflags.h
deleted file mode 100644 (file)
index f64c82e..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/irqflags.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_IRQFLAGS_H__
-#define __UNICORE_IRQFLAGS_H__
-
-#ifdef __KERNEL__
-
-#include <asm/ptrace.h>
-
-#define ARCH_IRQ_DISABLED      (PRIV_MODE | PSR_I_BIT)
-#define ARCH_IRQ_ENABLED       (PRIV_MODE)
-
-/*
- * Save the current interrupt enable state.
- */
-static inline unsigned long arch_local_save_flags(void)
-{
-       unsigned long temp;
-
-       asm volatile("mov %0, asr" : "=r" (temp) : : "memory", "cc");
-
-       return temp & PSR_c;
-}
-
-/*
- * restore saved IRQ state
- */
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-       unsigned long temp;
-
-       asm volatile(
-               "mov    %0, asr\n"
-               "mov.a  asr, %1\n"
-               "mov.f  asr, %0"
-               : "=&r" (temp)
-               : "r" (flags)
-               : "memory", "cc");
-}
-
-#include <asm-generic/irqflags.h>
-
-#endif
-#endif
diff --git a/arch/unicore32/include/asm/linkage.h b/arch/unicore32/include/asm/linkage.h
deleted file mode 100644 (file)
index 8e341ba..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/linkage.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_LINKAGE_H__
-#define __UNICORE_LINKAGE_H__
-
-#define __ALIGN .align 0
-#define __ALIGN_STR ".align 0"
-
-#define ENDPROC(name) \
-       .type name, %function; \
-       END(name)
-
-#endif
diff --git a/arch/unicore32/include/asm/memblock.h b/arch/unicore32/include/asm/memblock.h
deleted file mode 100644 (file)
index eb56a6d..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/memblock.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_MEMBLOCK_H__
-#define __UNICORE_MEMBLOCK_H__
-
-/*
- * Memory map description
- */
-# define NR_BANKS 8
-
-struct membank {
-       unsigned long start;
-       unsigned long size;
-       unsigned int highmem;
-};
-
-struct meminfo {
-       int nr_banks;
-       struct membank bank[NR_BANKS];
-};
-
-extern struct meminfo meminfo;
-
-#define for_each_bank(iter, mi)                                \
-       for (iter = 0; iter < (mi)->nr_banks; iter++)
-
-#define bank_pfn_start(bank)   __phys_to_pfn((bank)->start)
-#define bank_pfn_end(bank)     __phys_to_pfn((bank)->start + (bank)->size)
-#define bank_pfn_size(bank)    ((bank)->size >> PAGE_SHIFT)
-#define bank_phys_start(bank)  ((bank)->start)
-#define bank_phys_end(bank)    ((bank)->start + (bank)->size)
-#define bank_phys_size(bank)   ((bank)->size)
-
-extern void uc32_memblock_init(struct meminfo *);
-
-#endif
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
deleted file mode 100644 (file)
index 6628517..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/memory.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  Note: this file should not be included by non-asm/.h files
- */
-#ifndef __UNICORE_MEMORY_H__
-#define __UNICORE_MEMORY_H__
-
-#include <linux/compiler.h>
-#include <linux/const.h>
-#include <linux/sizes.h>
-#include <mach/memory.h>
-
-/*
- * PAGE_OFFSET - the virtual address of the start of the kernel image
- * TASK_SIZE - the maximum size of a user space task.
- * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
- */
-#define PAGE_OFFSET            UL(0xC0000000)
-#define TASK_SIZE              (PAGE_OFFSET - UL(0x41000000))
-#define TASK_UNMAPPED_BASE     (PAGE_OFFSET / 3)
-
-/*
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 32MB of the kernel text.
- */
-#define MODULES_VADDR          (PAGE_OFFSET - 16*1024*1024)
-#if TASK_SIZE > MODULES_VADDR
-#error Top of user space clashes with start of module space
-#endif
-
-#define MODULES_END            (PAGE_OFFSET)
-
-/*
- * Allow 16MB-aligned ioremap pages
- */
-#define IOREMAP_MAX_ORDER      24
-
-/*
- * Physical vs virtual RAM address space conversion.  These are
- * private definitions which should NOT be used outside memory.h
- * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-#ifndef __virt_to_phys
-#define __virt_to_phys(x)      ((x) - PAGE_OFFSET + PHYS_OFFSET)
-#define __phys_to_virt(x)      ((x) - PHYS_OFFSET + PAGE_OFFSET)
-#endif
-
-/*
- * Convert a page to/from a physical address
- */
-#define page_to_phys(page)     (__pfn_to_phys(page_to_pfn(page)))
-#define phys_to_page(phys)     (pfn_to_page(__phys_to_pfn(phys)))
-
-#ifndef __ASSEMBLY__
-
-#ifndef arch_adjust_zones
-#define arch_adjust_zones(max_zone_pfn) do { } while (0)
-#endif
-
-/*
- * PFNs are used to describe any physical page; this means
- * PFN 0 == physical address 0.
- *
- * This is the PFN of the first RAM page in the kernel
- * direct-mapped view.  We assume this is the first page
- * of RAM in the mem_map as well.
- */
-#define PHYS_PFN_OFFSET        (PHYS_OFFSET >> PAGE_SHIFT)
-
-/*
- * Drivers should NOT use these either.
- */
-#define __pa(x)                        __virt_to_phys((unsigned long)(x))
-#define __va(x)                        ((void *)__phys_to_virt((unsigned long)(x)))
-#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
-
-/*
- * Conversion between a struct page and a physical address.
- *
- *  page_to_pfn(page)  convert a struct page * to a PFN number
- *  pfn_to_page(pfn)   convert a _valid_ PFN number to struct page *
- *
- *  virt_to_page(k)    convert a _valid_ virtual address to struct page *
- *  virt_addr_valid(k) indicates whether a virtual address is valid
- */
-#define ARCH_PFN_OFFSET                PHYS_PFN_OFFSET
-
-#define virt_to_page(kaddr)    pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && \
-               (unsigned long)(kaddr) < (unsigned long)high_memory)
-
-#endif
-
-#include <asm-generic/memory_model.h>
-
-#endif
diff --git a/arch/unicore32/include/asm/mmu.h b/arch/unicore32/include/asm/mmu.h
deleted file mode 100644 (file)
index 8ad4e7e..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/mmu.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_MMU_H__
-#define __UNICORE_MMU_H__
-
-typedef        unsigned long mm_context_t;
-
-#endif
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
deleted file mode 100644 (file)
index 388c0c8..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/mmu_context.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_MMU_CONTEXT_H__
-#define __UNICORE_MMU_CONTEXT_H__
-
-#include <linux/compiler.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmacache.h>
-#include <linux/io.h>
-
-#include <asm/cacheflush.h>
-#include <asm/cpu-single.h>
-
-#define init_new_context(tsk, mm)      0
-
-#define destroy_context(mm)            do { } while (0)
-
-/*
- * This is called when "tsk" is about to enter lazy TLB mode.
- *
- * mm:  describes the currently active mm context
- * tsk: task which is entering lazy tlb
- * cpu: cpu number which is entering lazy tlb
- *
- * tsk->mm will be NULL
- */
-static inline void
-enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
-/*
- * This is the actual mm switch as far as the scheduler
- * is concerned.  No registers are touched.  We avoid
- * calling the CPU specific function when the mm hasn't
- * actually changed.
- */
-static inline void
-switch_mm(struct mm_struct *prev, struct mm_struct *next,
-         struct task_struct *tsk)
-{
-       unsigned int cpu = smp_processor_id();
-
-       if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
-               cpu_switch_mm(next->pgd, next);
-}
-
-#define deactivate_mm(tsk, mm) do { } while (0)
-#define activate_mm(prev, next)        switch_mm(prev, next, NULL)
-
-/*
- * We are inserting a "fake" vma for the user-accessible vector page so
- * gdb and friends can get to it through ptrace and /proc/<pid>/mem.
- * But we also want to remove it before the generic code gets to see it
- * during process exit or the unmapping of it would  cause total havoc.
- * (the macro is used as remove_vma() is static to mm/mmap.c)
- */
-#define arch_exit_mmap(mm) \
-do { \
-       struct vm_area_struct *high_vma = find_vma(mm, 0xffff0000); \
-       if (high_vma) { \
-               BUG_ON(high_vma->vm_next);  /* it should be last */ \
-               if (high_vma->vm_prev) \
-                       high_vma->vm_prev->vm_next = NULL; \
-               else \
-                       mm->mmap = NULL; \
-               rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
-               vmacache_invalidate(mm); \
-               mm->map_count--; \
-               remove_vma(high_vma); \
-       } \
-} while (0)
-
-static inline int arch_dup_mmap(struct mm_struct *oldmm,
-                               struct mm_struct *mm)
-{
-       return 0;
-}
-
-static inline void arch_unmap(struct mm_struct *mm,
-                       unsigned long start, unsigned long end)
-{
-}
-
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-               bool write, bool execute, bool foreign)
-{
-       /* by default, allow everything */
-       return true;
-}
-#endif
diff --git a/arch/unicore32/include/asm/page.h b/arch/unicore32/include/asm/page.h
deleted file mode 100644 (file)
index 96d6bdf..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/page.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_PAGE_H__
-#define __UNICORE_PAGE_H__
-
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT             12
-#define PAGE_SIZE              (_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK              (~(PAGE_SIZE-1))
-
-#ifndef __ASSEMBLY__
-
-struct page;
-struct vm_area_struct;
-
-#define clear_page(page)       memset((void *)(page), 0, PAGE_SIZE)
-extern void copy_page(void *to, const void *from);
-
-#define clear_user_page(page, vaddr, pg)       clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)    copy_page(to, from)
-
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-
-#define pte_val(x)      ((x).pte)
-#define pgd_val(x)     ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
-
-#define __pte(x)        ((pte_t) { (x) })
-#define __pgd(x)       ((pgd_t) { (x) })
-#define __pgprot(x)     ((pgprot_t) { (x) })
-
-#else
-/*
- * .. while these make it easier on the compiler
- */
-typedef unsigned long pte_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)      (x)
-#define pgd_val(x)      (x)
-#define pgprot_val(x)   (x)
-
-#define __pte(x)        (x)
-#define __pgd(x)       (x)
-#define __pgprot(x)     (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
-typedef struct page *pgtable_t;
-
-extern int pfn_valid(unsigned long);
-
-#include <asm/memory.h>
-
-#endif /* !__ASSEMBLY__ */
-
-#include <asm-generic/getorder.h>
-
-#endif
diff --git a/arch/unicore32/include/asm/pci.h b/arch/unicore32/include/asm/pci.h
deleted file mode 100644 (file)
index 3efa8ee..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/pci.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_PCI_H__
-#define __UNICORE_PCI_H__
-
-#ifdef __KERNEL__
-#include <asm-generic/pci.h>
-#include <mach/hardware.h> /* for PCIBIOS_MIN_* */
-
-#define HAVE_PCI_MMAP
-#define ARCH_GENERIC_PCI_MMAP_RESOURCE
-
-#endif /* __KERNEL__ */
-#endif
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
deleted file mode 100644 (file)
index ba1c9a7..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/pgalloc.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_PGALLOC_H__
-#define __UNICORE_PGALLOC_H__
-
-#include <asm/pgtable-hwdef.h>
-#include <asm/processor.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
-#define __HAVE_ARCH_PTE_ALLOC_ONE
-#include <asm-generic/pgalloc.h>
-
-#define _PAGE_USER_TABLE       (PMD_TYPE_TABLE | PMD_PRESENT)
-#define _PAGE_KERNEL_TABLE     (PMD_TYPE_TABLE | PMD_PRESENT)
-
-extern pgd_t *get_pgd_slow(struct mm_struct *mm);
-extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
-
-#define pgd_alloc(mm)                  get_pgd_slow(mm)
-#define pgd_free(mm, pgd)              free_pgd_slow(mm, pgd)
-
-/*
- * Allocate one PTE table.
- */
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm)
-{
-       pte_t *pte = __pte_alloc_one_kernel(mm);
-
-       if (pte)
-               clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
-
-       return pte;
-}
-
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
-{
-       struct page *pte;
-
-       pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
-       if (!pte)
-               return NULL;
-       if (!PageHighMem(pte))
-               clean_pte_table(page_address(pte));
-       return pte;
-}
-
-static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
-{
-       set_pmd(pmdp, __pmd(pmdval));
-       flush_pmd_entry(pmdp);
-}
-
-/*
- * Populate the pmdp entry with a pointer to the pte.  This pmd is part
- * of the mm address space.
- */
-static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
-{
-       unsigned long pte_ptr = (unsigned long)ptep;
-
-       /*
-        * The pmd must be loaded with the physical
-        * address of the PTE table
-        */
-       __pmd_populate(pmdp, __pa(pte_ptr) | _PAGE_KERNEL_TABLE);
-}
-
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
-{
-       __pmd_populate(pmdp,
-                       page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
-}
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
-#endif
diff --git a/arch/unicore32/include/asm/pgtable-hwdef.h b/arch/unicore32/include/asm/pgtable-hwdef.h
deleted file mode 100644 (file)
index f28b58c..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/pgtable-hwdef.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_PGTABLE_HWDEF_H__
-#define __UNICORE_PGTABLE_HWDEF_H__
-
-/*
- * Hardware page table definitions.
- *
- * + Level 1 descriptor (PMD)
- *   - common
- */
-#define PMD_TYPE_MASK          (3 << 0)
-#define PMD_TYPE_TABLE         (0 << 0)
-/*#define PMD_TYPE_LARGE       (1 << 0) */
-#define PMD_TYPE_INVALID       (2 << 0)
-#define PMD_TYPE_SECT          (3 << 0)
-
-#define PMD_PRESENT            (1 << 2)
-#define PMD_YOUNG              (1 << 3)
-
-/*#define PMD_SECT_DIRTY       (1 << 4) */
-#define PMD_SECT_CACHEABLE     (1 << 5)
-#define PMD_SECT_EXEC          (1 << 6)
-#define PMD_SECT_WRITE         (1 << 7)
-#define PMD_SECT_READ          (1 << 8)
-
-/*
- * + Level 2 descriptor (PTE)
- *   - common
- */
-#define PTE_TYPE_MASK          (3 << 0)
-#define PTE_TYPE_SMALL         (0 << 0)
-#define PTE_TYPE_MIDDLE                (1 << 0)
-#define PTE_TYPE_LARGE         (2 << 0)
-#define PTE_TYPE_INVALID       (3 << 0)
-
-#define PTE_PRESENT            (1 << 2)
-#define PTE_YOUNG              (1 << 3)
-#define PTE_DIRTY              (1 << 4)
-#define PTE_CACHEABLE          (1 << 5)
-#define PTE_EXEC               (1 << 6)
-#define PTE_WRITE              (1 << 7)
-#define PTE_READ               (1 << 8)
-
-#endif
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
deleted file mode 100644 (file)
index 97f564c..0000000
+++ /dev/null
@@ -1,267 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/pgtable.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_PGTABLE_H__
-#define __UNICORE_PGTABLE_H__
-
-#include <asm-generic/pgtable-nopmd.h>
-#include <asm/cpu-single.h>
-
-#include <asm/memory.h>
-#include <asm/pgtable-hwdef.h>
-
-/*
- * Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts.  That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- *
- * Note that platforms may override VMALLOC_START, but they must provide
- * VMALLOC_END.  VMALLOC_END defines the (exclusive) limit of this space,
- * which may not overlap IO space.
- */
-#ifndef VMALLOC_START
-#define VMALLOC_OFFSET         SZ_8M
-#define VMALLOC_START          (((unsigned long)high_memory + VMALLOC_OFFSET) \
-                                       & ~(VMALLOC_OFFSET-1))
-#define VMALLOC_END            (0xff000000UL)
-#endif
-
-#define PTRS_PER_PTE           1024
-#define PTRS_PER_PGD           1024
-
-/*
- * PGDIR_SHIFT determines what a third-level page table entry can map
- */
-#define PGDIR_SHIFT            22
-
-#ifndef __ASSEMBLY__
-extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pgd_error(const char *file, int line, unsigned long val);
-
-#define pte_ERROR(pte)         __pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pgd_ERROR(pgd)         __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
-#endif /* !__ASSEMBLY__ */
-
-#define PGDIR_SIZE             (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK             (~(PGDIR_SIZE-1))
-
-/*
- * This is the lowest virtual address we can permit any user space
- * mapping to be mapped at.  This is particularly important for
- * non-high vector CPUs.
- */
-#define FIRST_USER_ADDRESS     PAGE_SIZE
-
-#define FIRST_USER_PGD_NR      1
-#define USER_PTRS_PER_PGD      ((TASK_SIZE/PGDIR_SIZE) - FIRST_USER_PGD_NR)
-
-/*
- * section address mask and size definitions.
- */
-#define SECTION_SHIFT          22
-#define SECTION_SIZE           (1UL << SECTION_SHIFT)
-#define SECTION_MASK           (~(SECTION_SIZE-1))
-
-#ifndef __ASSEMBLY__
-
-/*
- * The pgprot_* and protection_map entries will be fixed up in runtime
- * to include the cachable bits based on memory policy, as well as any
- * architecture dependent bits.
- */
-#define _PTE_DEFAULT           (PTE_PRESENT | PTE_YOUNG | PTE_CACHEABLE)
-
-extern pgprot_t pgprot_user;
-extern pgprot_t pgprot_kernel;
-
-#define PAGE_NONE              pgprot_user
-#define PAGE_SHARED            __pgprot(pgprot_val(pgprot_user | PTE_READ \
-                                                               | PTE_WRITE))
-#define PAGE_SHARED_EXEC       __pgprot(pgprot_val(pgprot_user | PTE_READ \
-                                                               | PTE_WRITE \
-                                                               | PTE_EXEC))
-#define PAGE_COPY              __pgprot(pgprot_val(pgprot_user | PTE_READ)
-#define PAGE_COPY_EXEC         __pgprot(pgprot_val(pgprot_user | PTE_READ \
-                                                               | PTE_EXEC))
-#define PAGE_READONLY          __pgprot(pgprot_val(pgprot_user | PTE_READ))
-#define PAGE_READONLY_EXEC     __pgprot(pgprot_val(pgprot_user | PTE_READ \
-                                                               | PTE_EXEC))
-#define PAGE_KERNEL            pgprot_kernel
-#define PAGE_KERNEL_EXEC       __pgprot(pgprot_val(pgprot_kernel | PTE_EXEC))
-
-#define __PAGE_NONE            __pgprot(_PTE_DEFAULT)
-#define __PAGE_SHARED          __pgprot(_PTE_DEFAULT | PTE_READ \
-                                                       | PTE_WRITE)
-#define __PAGE_SHARED_EXEC     __pgprot(_PTE_DEFAULT | PTE_READ \
-                                                       | PTE_WRITE \
-                                                       | PTE_EXEC)
-#define __PAGE_COPY            __pgprot(_PTE_DEFAULT | PTE_READ)
-#define __PAGE_COPY_EXEC       __pgprot(_PTE_DEFAULT | PTE_READ \
-                                                       | PTE_EXEC)
-#define __PAGE_READONLY                __pgprot(_PTE_DEFAULT | PTE_READ)
-#define __PAGE_READONLY_EXEC   __pgprot(_PTE_DEFAULT | PTE_READ \
-                                                       | PTE_EXEC)
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * The table below defines the page protection levels that we insert into our
- * Linux page table version.  These get translated into the best that the
- * architecture can perform.  Note that on UniCore hardware:
- *  1) We cannot do execute protection
- *  2) If we could do execute protection, then read is implied
- *  3) write implies read permissions
- */
-#define __P000  __PAGE_NONE
-#define __P001  __PAGE_READONLY
-#define __P010  __PAGE_COPY
-#define __P011  __PAGE_COPY
-#define __P100  __PAGE_READONLY_EXEC
-#define __P101  __PAGE_READONLY_EXEC
-#define __P110  __PAGE_COPY_EXEC
-#define __P111  __PAGE_COPY_EXEC
-
-#define __S000  __PAGE_NONE
-#define __S001  __PAGE_READONLY
-#define __S010  __PAGE_SHARED
-#define __S011  __PAGE_SHARED
-#define __S100  __PAGE_READONLY_EXEC
-#define __S101  __PAGE_READONLY_EXEC
-#define __S110  __PAGE_SHARED_EXEC
-#define __S111  __PAGE_SHARED_EXEC
-
-#ifndef __ASSEMBLY__
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr)               (empty_zero_page)
-
-#define pte_pfn(pte)                   (pte_val(pte) >> PAGE_SHIFT)
-#define pfn_pte(pfn, prot)             (__pte(((pfn) << PAGE_SHIFT) \
-                                               | pgprot_val(prot)))
-
-#define pte_none(pte)                  (!pte_val(pte))
-#define pte_clear(mm, addr, ptep)      set_pte(ptep, __pte(0))
-#define pte_page(pte)                  (pfn_to_page(pte_pfn(pte)))
-
-#define set_pte(ptep, pte)     cpu_set_pte(ptep, pte)
-
-#define set_pte_at(mm, addr, ptep, pteval)     \
-       do {                                    \
-               set_pte(ptep, pteval);          \
-       } while (0)
-
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-#define pte_present(pte)       (pte_val(pte) & PTE_PRESENT)
-#define pte_write(pte)         (pte_val(pte) & PTE_WRITE)
-#define pte_dirty(pte)         (pte_val(pte) & PTE_DIRTY)
-#define pte_young(pte)         (pte_val(pte) & PTE_YOUNG)
-#define pte_exec(pte)          (pte_val(pte) & PTE_EXEC)
-
-#define PTE_BIT_FUNC(fn, op) \
-static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(wrprotect, &= ~PTE_WRITE);
-PTE_BIT_FUNC(mkwrite,   |= PTE_WRITE);
-PTE_BIT_FUNC(mkclean,   &= ~PTE_DIRTY);
-PTE_BIT_FUNC(mkdirty,   |= PTE_DIRTY);
-PTE_BIT_FUNC(mkold,     &= ~PTE_YOUNG);
-PTE_BIT_FUNC(mkyoung,   |= PTE_YOUNG);
-
-/*
- * Mark the prot value as uncacheable.
- */
-#define pgprot_noncached(prot)         \
-       __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
-#define pgprot_writecombine(prot)      \
-       __pgprot(pgprot_val(prot) & ~PTE_CACHEABLE)
-
-#define pmd_none(pmd)          (!pmd_val(pmd))
-#define pmd_present(pmd)       (pmd_val(pmd) & PMD_PRESENT)
-#define pmd_bad(pmd)           (((pmd_val(pmd) &               \
-                               (PMD_PRESENT | PMD_TYPE_MASK))  \
-                               != (PMD_PRESENT | PMD_TYPE_TABLE)))
-
-#define set_pmd(pmdpd, pmdval)         \
-       do {                            \
-               *(pmdpd) = pmdval;      \
-       } while (0)
-
-#define pmd_clear(pmdp)                        \
-       do {                            \
-               set_pmd(pmdp, __pmd(0));\
-               clean_pmd_entry(pmdp);  \
-       } while (0)
-
-#define pmd_page_vaddr(pmd) ((pte_t *)__va(pmd_val(pmd) & PAGE_MASK))
-#define pmd_page(pmd)          pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
-
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page, prot)     pfn_pte(page_to_pfn(page), prot)
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-       const unsigned long mask = PTE_EXEC | PTE_WRITE | PTE_READ;
-       pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
-       return pte;
-}
-
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-
-/*
- * Encode and decode a swap entry.  Swap entries are stored in the Linux
- * page tables as follows:
- *
- *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
- *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *   <--------------- offset --------------> <--- type --> 0 0 0 0 0
- *
- * This gives us up to 127 swap files and 32GB per swap file.  Note that
- * the offset field is always non-zero.
- */
-#define __SWP_TYPE_SHIFT       5
-#define __SWP_TYPE_BITS                7
-#define __SWP_TYPE_MASK                ((1 << __SWP_TYPE_BITS) - 1)
-#define __SWP_OFFSET_SHIFT     (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
-
-#define __swp_type(x)          (((x).val >> __SWP_TYPE_SHIFT)          \
-                               & __SWP_TYPE_MASK)
-#define __swp_offset(x)                ((x).val >> __SWP_OFFSET_SHIFT)
-#define __swp_entry(type, offset) ((swp_entry_t) {                     \
-                               ((type) << __SWP_TYPE_SHIFT) |          \
-                               ((offset) << __SWP_OFFSET_SHIFT) })
-
-#define __pte_to_swp_entry(pte)        ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(swp)        ((pte_t) { (swp).val })
-
-/*
- * It is an error for the kernel to have more swap files than we can
- * encode in the PTEs.  This ensures that we know when MAX_SWAPFILES
- * is increased beyond what we presently support.
- */
-#define MAX_SWAPFILES_CHECK()  \
-       BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-/* FIXME: this is not correct */
-#define kern_addr_valid(addr)  (1)
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __UNICORE_PGTABLE_H__ */
diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h
deleted file mode 100644 (file)
index 6f01620..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/processor.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_PROCESSOR_H__
-#define __UNICORE_PROCESSOR_H__
-
-#ifdef __KERNEL__
-
-#include <asm/ptrace.h>
-#include <asm/types.h>
-
-#ifdef __KERNEL__
-#define STACK_TOP      TASK_SIZE
-#define STACK_TOP_MAX  TASK_SIZE
-#endif
-
-struct debug_entry {
-       u32                     address;
-       u32                     insn;
-};
-
-struct debug_info {
-       int                     nsaved;
-       struct debug_entry      bp[2];
-};
-
-struct thread_struct {
-                                                       /* fault info     */
-       unsigned long           address;
-       unsigned long           trap_no;
-       unsigned long           error_code;
-                                                       /* debugging      */
-       struct debug_info       debug;
-};
-
-#define INIT_THREAD  { }
-
-#define start_thread(regs, pc, sp)                                     \
-({                                                                     \
-       unsigned long *stack = (unsigned long *)sp;                     \
-       memset(regs->uregs, 0, sizeof(regs->uregs));                    \
-       regs->UCreg_asr = USER_MODE;                                    \
-       regs->UCreg_pc = pc & ~1;       /* pc */                        \
-       regs->UCreg_sp = sp;            /* sp */                        \
-       regs->UCreg_02 = stack[2];      /* r2 (envp) */                 \
-       regs->UCreg_01 = stack[1];      /* r1 (argv) */                 \
-       regs->UCreg_00 = stack[0];      /* r0 (argc) */                 \
-})
-
-/* Forward declaration, a strange C thing */
-struct task_struct;
-
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-unsigned long get_wchan(struct task_struct *p);
-
-#define cpu_relax()                    barrier()
-
-#define task_pt_regs(p) \
-       ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
-
-#define KSTK_EIP(tsk)  (task_pt_regs(tsk)->UCreg_pc)
-#define KSTK_ESP(tsk)  (task_pt_regs(tsk)->UCreg_sp)
-
-#endif
-
-#endif /* __UNICORE_PROCESSOR_H__ */
diff --git a/arch/unicore32/include/asm/ptrace.h b/arch/unicore32/include/asm/ptrace.h
deleted file mode 100644 (file)
index bb4cbc4..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/ptrace.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_PTRACE_H__
-#define __UNICORE_PTRACE_H__
-
-#include <uapi/asm/ptrace.h>
-
-#ifndef __ASSEMBLY__
-
-#define user_mode(regs)        \
-       (processor_mode(regs) == USER_MODE)
-
-#define processor_mode(regs) \
-       ((regs)->UCreg_asr & MODE_MASK)
-
-#define interrupts_enabled(regs) \
-       (!((regs)->UCreg_asr & PSR_I_BIT))
-
-#define fast_interrupts_enabled(regs) \
-       (!((regs)->UCreg_asr & PSR_R_BIT))
-
-/* Are the current registers suitable for user mode?
- * (used to maintain security in signal handlers)
- */
-static inline int valid_user_regs(struct pt_regs *regs)
-{
-       unsigned long mode = regs->UCreg_asr & MODE_MASK;
-
-       /*
-        * Always clear the R (REAL) bits
-        */
-       regs->UCreg_asr &= ~(PSR_R_BIT);
-
-       if ((regs->UCreg_asr & PSR_I_BIT) == 0) {
-               if (mode == USER_MODE)
-                       return 1;
-       }
-
-       /*
-        * Force ASR to something logical...
-        */
-       regs->UCreg_asr &= PSR_f | USER_MODE;
-
-       return 0;
-}
-
-#define instruction_pointer(regs)      ((regs)->UCreg_pc)
-#define user_stack_pointer(regs)       ((regs)->UCreg_sp)
-#define profile_pc(regs)               instruction_pointer(regs)
-
-#endif /* __ASSEMBLY__ */
-#endif
diff --git a/arch/unicore32/include/asm/stacktrace.h b/arch/unicore32/include/asm/stacktrace.h
deleted file mode 100644 (file)
index 3e59f9d..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/stacktrace.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_STACKTRACE_H__
-#define __UNICORE_STACKTRACE_H__
-
-struct stackframe {
-       unsigned long fp;
-       unsigned long sp;
-       unsigned long lr;
-       unsigned long pc;
-};
-
-#ifdef CONFIG_FRAME_POINTER
-extern int unwind_frame(struct stackframe *frame);
-#else
-#define unwind_frame(f) (-EINVAL)
-#endif
-extern void walk_stackframe(struct stackframe *frame,
-                           int (*fn)(struct stackframe *, void *), void *data);
-
-#endif /* __UNICORE_STACKTRACE_H__ */
diff --git a/arch/unicore32/include/asm/string.h b/arch/unicore32/include/asm/string.h
deleted file mode 100644 (file)
index 1649b0e..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/string.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_STRING_H__
-#define __UNICORE_STRING_H__
-
-/*
- * We don't do inline string functions, since the
- * optimised inline asm versions are not small.
- */
-
-#define __HAVE_ARCH_STRRCHR
-extern char *strrchr(const char *s, int c);
-
-#define __HAVE_ARCH_STRCHR
-extern char *strchr(const char *s, int c);
-
-#define __HAVE_ARCH_MEMCPY
-extern void *memcpy(void *, const void *, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMMOVE
-extern void *memmove(void *, const void *, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMCHR
-extern void *memchr(const void *, int, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMSET
-extern void *memset(void *, int, __kernel_size_t);
-
-#endif
diff --git a/arch/unicore32/include/asm/suspend.h b/arch/unicore32/include/asm/suspend.h
deleted file mode 100644 (file)
index 72bd89c..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/suspend.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_SUSPEND_H__
-#define __UNICORE_SUSPEND_H__
-
-#ifndef __ASSEMBLY__
-
-#include <asm/ptrace.h>
-
-struct swsusp_arch_regs {
-       struct cpu_context_save cpu_context;    /* cpu context */
-#ifdef CONFIG_UNICORE_FPU_F64
-       struct fp_state         fpstate __attribute__((aligned(8)));
-#endif
-};
-#endif
-
-#endif /* __UNICORE_SUSPEND_H__ */
-
diff --git a/arch/unicore32/include/asm/switch_to.h b/arch/unicore32/include/asm/switch_to.h
deleted file mode 100644 (file)
index 12e534b..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Task switching for PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2012 GUAN Xue-tao
- */
-#ifndef __UNICORE_SWITCH_TO_H__
-#define __UNICORE_SWITCH_TO_H__
-
-struct task_struct;
-struct thread_info;
-
-/*
- * switch_to(prev, next) should switch from task `prev' to `next'
- * `prev' will never be the same as `next'.  schedule() itself
- * contains the memory barrier to tell GCC not to cache `current'.
- */
-extern struct task_struct *__switch_to(struct task_struct *,
-               struct thread_info *, struct thread_info *);
-
-#define switch_to(prev, next, last)                                    \
-       do {                                                            \
-               last = __switch_to(prev, task_thread_info(prev),        \
-                                       task_thread_info(next));        \
-       } while (0)
-
-#endif /* __UNICORE_SWITCH_TO_H__ */
diff --git a/arch/unicore32/include/asm/syscall.h b/arch/unicore32/include/asm/syscall.h
deleted file mode 100644 (file)
index 6079617..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_UNICORE_SYSCALL_H
-#define _ASM_UNICORE_SYSCALL_H
-
-#include <uapi/linux/audit.h>
-
-static inline int syscall_get_arch(struct task_struct *task)
-{
-       return AUDIT_ARCH_UNICORE;
-}
-
-#endif /* _ASM_UNICORE_SYSCALL_H */
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
deleted file mode 100644 (file)
index d8a6d6b..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/thread_info.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_THREAD_INFO_H__
-#define __UNICORE_THREAD_INFO_H__
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-#include <asm/fpstate.h>
-
-#define THREAD_SIZE_ORDER      1
-#define THREAD_SIZE            8192
-#define THREAD_START_SP                (THREAD_SIZE - 8)
-
-#ifndef __ASSEMBLY__
-
-struct task_struct;
-
-#include <asm/types.h>
-
-typedef struct {
-       unsigned long seg;
-} mm_segment_t;
-
-struct cpu_context_save {
-       __u32   r4;
-       __u32   r5;
-       __u32   r6;
-       __u32   r7;
-       __u32   r8;
-       __u32   r9;
-       __u32   r10;
-       __u32   r11;
-       __u32   r12;
-       __u32   r13;
-       __u32   r14;
-       __u32   r15;
-       __u32   r16;
-       __u32   r17;
-       __u32   r18;
-       __u32   r19;
-       __u32   r20;
-       __u32   r21;
-       __u32   r22;
-       __u32   r23;
-       __u32   r24;
-       __u32   r25;
-       __u32   r26;
-       __u32   fp;
-       __u32   sp;
-       __u32   pc;
-};
-
-/*
- * low level task data that entry.S needs immediate access to.
- * __switch_to() assumes cpu_context follows immediately after cpu_domain.
- */
-struct thread_info {
-       unsigned long           flags;          /* low level flags */
-       int                     preempt_count;  /* 0 => preemptable */
-                                               /* <0 => bug */
-       mm_segment_t            addr_limit;     /* address limit */
-       struct task_struct      *task;          /* main task structure */
-       __u32                   cpu;            /* cpu */
-       struct cpu_context_save cpu_context;    /* cpu context */
-       __u32                   syscall;        /* syscall number */
-       __u8                    used_cp[16];    /* thread used copro */
-#ifdef CONFIG_UNICORE_FPU_F64
-       struct fp_state         fpstate __attribute__((aligned(8)));
-#endif
-};
-
-#define INIT_THREAD_INFO(tsk)                                          \
-{                                                                      \
-       .task           = &tsk,                                         \
-       .flags          = 0,                                            \
-       .preempt_count  = INIT_PREEMPT_COUNT,                           \
-       .addr_limit     = KERNEL_DS,                                    \
-}
-
-/*
- * how to get the thread information struct from C
- */
-static inline struct thread_info *current_thread_info(void) __attribute_const__;
-
-static inline struct thread_info *current_thread_info(void)
-{
-       register unsigned long sp asm ("sp");
-       return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
-}
-
-#define thread_saved_pc(tsk)   \
-       ((unsigned long)(task_thread_info(tsk)->cpu_context.pc))
-#define thread_saved_sp(tsk)   \
-       ((unsigned long)(task_thread_info(tsk)->cpu_context.sp))
-#define thread_saved_fp(tsk)   \
-       ((unsigned long)(task_thread_info(tsk)->cpu_context.fp))
-
-#endif
-
-/*
- * thread information flags:
- *  TIF_SYSCALL_TRACE  - syscall trace active
- *  TIF_SIGPENDING     - signal pending
- *  TIF_NEED_RESCHED   - rescheduling necessary
- *  TIF_NOTIFY_RESUME  - callback before returning to user
- */
-#define TIF_SIGPENDING         0
-#define TIF_NEED_RESCHED       1
-#define TIF_NOTIFY_RESUME      2       /* callback before returning to user */
-#define TIF_SYSCALL_TRACE      8
-#define TIF_MEMDIE             18
-#define TIF_RESTORE_SIGMASK    20
-
-#define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
-#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
-
-/*
- * Change these and you break ASM code in entry-common.S
- */
-#define _TIF_WORK_MASK \
-       (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_NOTIFY_RESUME)
-
-#endif /* __KERNEL__ */
-#endif /* __UNICORE_THREAD_INFO_H__ */
diff --git a/arch/unicore32/include/asm/timex.h b/arch/unicore32/include/asm/timex.h
deleted file mode 100644 (file)
index d714af3..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/timex.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __UNICORE_TIMEX_H__
-#define __UNICORE_TIMEX_H__
-
-#ifdef CONFIG_ARCH_FPGA
-
-/* in FPGA, APB clock is 33M, and OST clock is 32K, */
-/* so, 1M is selected for timer interrupt correctly */
-#define        CLOCK_TICK_RATE         (32*1024)
-
-#endif
-
-#if defined(CONFIG_PUV3_DB0913)                \
-       || defined(CONFIG_PUV3_NB0916)  \
-       || defined(CONFIG_PUV3_SMW0919)
-
-#define  CLOCK_TICK_RATE         (14318000)
-
-#endif
-
-#include <asm-generic/timex.h>
-
-#endif
diff --git a/arch/unicore32/include/asm/tlb.h b/arch/unicore32/include/asm/tlb.h
deleted file mode 100644 (file)
index 4663d8c..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/tlb.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_TLB_H__
-#define __UNICORE_TLB_H__
-
-/*
- * unicore32 lacks an efficient flush_tlb_range(), use flush_tlb_mm().
- */
-
-#define __pte_free_tlb(tlb, pte, addr)                         \
-       do {                                                    \
-               pgtable_pte_page_dtor(pte);                     \
-               tlb_remove_page((tlb), (pte));                  \
-       } while (0)
-
-#include <asm-generic/tlb.h>
-
-#endif
diff --git a/arch/unicore32/include/asm/tlbflush.h b/arch/unicore32/include/asm/tlbflush.h
deleted file mode 100644 (file)
index 1cf18ef..0000000
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/tlbflush.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_TLBFLUSH_H__
-#define __UNICORE_TLBFLUSH_H__
-
-#ifndef __ASSEMBLY__
-
-#include <linux/sched.h>
-
-extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long,
-                                       struct vm_area_struct *);
-extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
-
-/*
- *     TLB Management
- *     ==============
- *
- *     The arch/unicore/mm/tlb-*.S files implement these methods.
- *
- *     The TLB specific code is expected to perform whatever tests it
- *     needs to determine if it should invalidate the TLB for each
- *     call.  Start addresses are inclusive and end addresses are
- *     exclusive; it is safe to round these addresses down.
- *
- *     flush_tlb_all()
- *
- *             Invalidate the entire TLB.
- *
- *     flush_tlb_mm(mm)
- *
- *             Invalidate all TLB entries in a particular address
- *             space.
- *             - mm    - mm_struct describing address space
- *
- *     flush_tlb_range(mm,start,end)
- *
- *             Invalidate a range of TLB entries in the specified
- *             address space.
- *             - mm    - mm_struct describing address space
- *             - start - start address (may not be aligned)
- *             - end   - end address (exclusive, may not be aligned)
- *
- *     flush_tlb_page(vaddr,vma)
- *
- *             Invalidate the specified page in the specified address range.
- *             - vaddr - virtual address (may not be aligned)
- *             - vma   - vma_struct describing address range
- *
- *     flush_kern_tlb_page(kaddr)
- *
- *             Invalidate the TLB entry for the specified page.  The address
- *             will be in the kernels virtual memory space.  Current uses
- *             only require the D-TLB to be invalidated.
- *             - kaddr - Kernel virtual memory address
- */
-
-static inline void local_flush_tlb_all(void)
-{
-       const int zero = 0;
-
-       /* TLB invalidate all */
-       asm("movc p0.c6, %0, #6; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (zero) : "cc");
-}
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-       const int zero = 0;
-
-       if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) {
-               /* TLB invalidate all */
-               asm("movc p0.c6, %0, #6; nop; nop; nop; nop; nop; nop; nop; nop"
-                       : : "r" (zero) : "cc");
-       }
-       put_cpu();
-}
-
-static inline void
-local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
-{
-       if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
-#ifndef CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE
-               /* iTLB invalidate page */
-               asm("movc p0.c6, %0, #5; nop; nop; nop; nop; nop; nop; nop; nop"
-                       : : "r" (uaddr & PAGE_MASK) : "cc");
-               /* dTLB invalidate page */
-               asm("movc p0.c6, %0, #3; nop; nop; nop; nop; nop; nop; nop; nop"
-                       : : "r" (uaddr & PAGE_MASK) : "cc");
-#else
-               /* TLB invalidate all */
-               asm("movc p0.c6, %0, #6; nop; nop; nop; nop; nop; nop; nop; nop"
-                       : : "r" (uaddr & PAGE_MASK) : "cc");
-#endif
-       }
-}
-
-static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
-{
-#ifndef CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE
-       /* iTLB invalidate page */
-       asm("movc p0.c6, %0, #5; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (kaddr & PAGE_MASK) : "cc");
-       /* dTLB invalidate page */
-       asm("movc p0.c6, %0, #3; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (kaddr & PAGE_MASK) : "cc");
-#else
-       /* TLB invalidate all */
-       asm("movc p0.c6, %0, #6; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (kaddr & PAGE_MASK) : "cc");
-#endif
-}
-
-/*
- *     flush_pmd_entry
- *
- *     Flush a PMD entry (word aligned, or double-word aligned) to
- *     RAM if the TLB for the CPU we are running on requires this.
- *     This is typically used when we are creating PMD entries.
- *
- *     clean_pmd_entry
- *
- *     Clean (but don't drain the write buffer) if the CPU requires
- *     these operations.  This is typically used when we are removing
- *     PMD entries.
- */
-static inline void flush_pmd_entry(pmd_t *pmd)
-{
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       /* flush dcache line, see dcacheline_flush in proc-macros.S */
-       asm("mov        r1, %0 << #20\n"
-               "ldw    r2, =_stext\n"
-               "add    r2, r2, r1 >> #20\n"
-               "ldw    r1, [r2+], #0x0000\n"
-               "ldw    r1, [r2+], #0x1000\n"
-               "ldw    r1, [r2+], #0x2000\n"
-               "ldw    r1, [r2+], #0x3000\n"
-               : : "r" (pmd) : "r1", "r2");
-#else
-       /* flush dcache all */
-       asm("movc p0.c5, %0, #14; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (pmd) : "cc");
-#endif
-}
-
-static inline void clean_pmd_entry(pmd_t *pmd)
-{
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       /* clean dcache line */
-       asm("movc p0.c5, %0, #11; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (__pa(pmd) & ~(L1_CACHE_BYTES - 1)) : "cc");
-#else
-       /* clean dcache all */
-       asm("movc p0.c5, %0, #10; nop; nop; nop; nop; nop; nop; nop; nop"
-               : : "r" (pmd) : "cc");
-#endif
-}
-
-/*
- * Convert calls to our calling convention.
- */
-#define local_flush_tlb_range(vma, start, end) \
-       __cpu_flush_user_tlb_range(start, end, vma)
-#define local_flush_tlb_kernel_range(s, e)     \
-       __cpu_flush_kern_tlb_range(s, e)
-
-#define flush_tlb_all          local_flush_tlb_all
-#define flush_tlb_mm           local_flush_tlb_mm
-#define flush_tlb_page         local_flush_tlb_page
-#define flush_tlb_kernel_page  local_flush_tlb_kernel_page
-#define flush_tlb_range                local_flush_tlb_range
-#define flush_tlb_kernel_range local_flush_tlb_kernel_range
-
-/*
- * if PG_dcache_clean is not set for the page, we need to ensure that any
- * cache entries for the kernels virtual memory range are written
- * back to the page.
- */
-extern void update_mmu_cache(struct vm_area_struct *vma,
-               unsigned long addr, pte_t *ptep);
-
-extern void do_bad_area(unsigned long addr, unsigned int fsr,
-               struct pt_regs *regs);
-
-#endif
-
-#endif
diff --git a/arch/unicore32/include/asm/traps.h b/arch/unicore32/include/asm/traps.h
deleted file mode 100644 (file)
index ad1508a..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/traps.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_TRAP_H__
-#define __UNICORE_TRAP_H__
-
-extern void __init early_trap_init(void);
-extern void dump_backtrace_entry(unsigned long where,
-               unsigned long from, unsigned long frame);
-
-extern void do_DataAbort(unsigned long addr, unsigned int fsr,
-                struct pt_regs *regs);
-#endif
diff --git a/arch/unicore32/include/asm/uaccess.h b/arch/unicore32/include/asm/uaccess.h
deleted file mode 100644 (file)
index 33c24f4..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/asm/uaccess.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_UACCESS_H__
-#define __UNICORE_UACCESS_H__
-
-#include <asm/memory.h>
-
-#define __strncpy_from_user    __strncpy_from_user
-#define __strnlen_user         __strnlen_user
-#define __clear_user           __clear_user
-
-#define __kernel_ok            (uaccess_kernel())
-#define __user_ok(addr, size)  (((size) <= TASK_SIZE)                  \
-                               && ((addr) <= TASK_SIZE - (size)))
-#define __access_ok(addr, size)        (__kernel_ok || __user_ok((addr), (size)))
-
-extern unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
-extern unsigned long __must_check
-__clear_user(void __user *addr, unsigned long n);
-extern unsigned long __must_check
-__strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long
-__strnlen_user(const char __user *s, long n);
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#include <asm-generic/uaccess.h>
-
-#endif /* __UNICORE_UACCESS_H__ */
diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h
deleted file mode 100644 (file)
index 0544358..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _ASM_UNICORE32_VMALLOC_H
-#define _ASM_UNICORE32_VMALLOC_H
-
-#endif /* _ASM_UNICORE32_VMALLOC_H */
diff --git a/arch/unicore32/include/mach/PKUnity.h b/arch/unicore32/include/mach/PKUnity.h
deleted file mode 100644 (file)
index 78f7751..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/PKUnity.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-/* Be sure that virtual mapping is defined right */
-#ifndef __MACH_PUV3_HARDWARE_H__
-#error You must include hardware.h not PKUnity.h
-#endif
-
-#include <mach/bitfield.h>
-
-/*
- * Memory Definitions
- */
-#define PKUNITY_SDRAM_BASE             0x00000000 /* 0x00000000 - 0x7FFFFFFF 2GB */
-#define PKUNITY_MMIO_BASE              0x80000000 /* 0x80000000 - 0xFFFFFFFF 2GB */
-
-/*
- * PKUNITY System Bus Addresses (PCI): 0x80000000 - 0xBFFFFFFF (1GB)
- * 0x80000000 - 0x8000000B 12B    PCI Configuration regs
- * 0x80010000 - 0x80010250 592B   PCI Bridge Base
- * 0x80030000 - 0x8003FFFF 64KB   PCI Legacy IO
- * 0x90000000 - 0x97FFFFFF 128MB  PCI AHB-PCI MEM-mapping
- * 0x98000000 - 0x9FFFFFFF 128MB  PCI PCI-AHB MEM-mapping
- */
-#define PKUNITY_PCI_BASE               io_p2v(0x80000000) /* 0x80000000 - 0xBFFFFFFF 1GB */
-#include <mach/regs-pci.h>
-
-#define PKUNITY_PCICFG_BASE            (PKUNITY_PCI_BASE + 0x0)
-#define PKUNITY_PCIBRI_BASE            (PKUNITY_PCI_BASE + 0x00010000)
-#define PKUNITY_PCILIO_BASE            (PKUNITY_PCI_BASE + 0x00030000)
-#define PKUNITY_PCIMEM_BASE            (PKUNITY_PCI_BASE + 0x10000000)
-#define PKUNITY_PCIAHB_BASE            (PKUNITY_PCI_BASE + 0x18000000)
-
-/*
- * PKUNITY System Bus Addresses (AHB): 0xC0000000 - 0xEDFFFFFF (640MB)
- */
-#define PKUNITY_AHB_BASE               io_p2v(0xC0000000)
-
-/* AHB-0 is DDR2 SDRAM */
-/* AHB-1 is PCI Space */
-#define PKUNITY_ARBITER_BASE           (PKUNITY_AHB_BASE + 0x000000) /* AHB-2 */
-#define PKUNITY_DDR2CTRL_BASE          (PKUNITY_AHB_BASE + 0x100000) /* AHB-3 */
-#define PKUNITY_DMAC_BASE              (PKUNITY_AHB_BASE + 0x200000) /* AHB-4 */
-#include <mach/regs-dmac.h>
-#define PKUNITY_UMAL_BASE              (PKUNITY_AHB_BASE + 0x300000) /* AHB-5 */
-#include <mach/regs-umal.h>
-#define PKUNITY_USB_BASE               (PKUNITY_AHB_BASE + 0x400000) /* AHB-6 */
-#define PKUNITY_SATA_BASE              (PKUNITY_AHB_BASE + 0x500000) /* AHB-7 */
-#define PKUNITY_SMC_BASE               (PKUNITY_AHB_BASE + 0x600000) /* AHB-8 */
-/* AHB-9 is for APB bridge */
-#define PKUNITY_MME_BASE               (PKUNITY_AHB_BASE + 0x700000) /* AHB-10 */
-#define PKUNITY_UNIGFX_BASE            (PKUNITY_AHB_BASE + 0x800000) /* AHB-11 */
-#include <mach/regs-unigfx.h>
-#define PKUNITY_NAND_BASE              (PKUNITY_AHB_BASE + 0x900000) /* AHB-12 */
-#include <mach/regs-nand.h>
-#define PKUNITY_H264D_BASE             (PKUNITY_AHB_BASE + 0xA00000) /* AHB-13 */
-#define PKUNITY_H264E_BASE             (PKUNITY_AHB_BASE + 0xB00000) /* AHB-14 */
-
-/*
- * PKUNITY Peripheral Bus Addresses (APB): 0xEE000000 - 0xEFFFFFFF (128MB)
- */
-#define PKUNITY_APB_BASE               io_p2v(0xEE000000)
-
-#define PKUNITY_UART0_BASE             (PKUNITY_APB_BASE + 0x000000) /* APB-0 */
-#define PKUNITY_UART1_BASE             (PKUNITY_APB_BASE + 0x100000) /* APB-1 */
-#include <mach/regs-uart.h>
-#define PKUNITY_I2C_BASE               (PKUNITY_APB_BASE + 0x200000) /* APB-2 */
-#include <mach/regs-i2c.h>
-#define PKUNITY_SPI_BASE               (PKUNITY_APB_BASE + 0x300000) /* APB-3 */
-#include <mach/regs-spi.h>
-#define PKUNITY_AC97_BASE              (PKUNITY_APB_BASE + 0x400000) /* APB-4 */
-#include <mach/regs-ac97.h>
-#define PKUNITY_GPIO_BASE              (PKUNITY_APB_BASE + 0x500000) /* APB-5 */
-#include <mach/regs-gpio.h>
-#define PKUNITY_INTC_BASE              (PKUNITY_APB_BASE + 0x600000) /* APB-6 */
-#include <mach/regs-intc.h>
-#define PKUNITY_RTC_BASE               (PKUNITY_APB_BASE + 0x700000) /* APB-7 */
-#include <mach/regs-rtc.h>
-#define PKUNITY_OST_BASE               (PKUNITY_APB_BASE + 0x800000) /* APB-8 */
-#include <mach/regs-ost.h>
-#define PKUNITY_RESETC_BASE            (PKUNITY_APB_BASE + 0x900000) /* APB-9 */
-#include <mach/regs-resetc.h>
-#define PKUNITY_PM_BASE                        (PKUNITY_APB_BASE + 0xA00000) /* APB-10 */
-#include <mach/regs-pm.h>
-#define PKUNITY_PS2_BASE               (PKUNITY_APB_BASE + 0xB00000) /* APB-11 */
-#include <mach/regs-ps2.h>
-#define PKUNITY_SDC_BASE               (PKUNITY_APB_BASE + 0xC00000) /* APB-12 */
-#include <mach/regs-sdc.h>
-
diff --git a/arch/unicore32/include/mach/bitfield.h b/arch/unicore32/include/mach/bitfield.h
deleted file mode 100644 (file)
index 766b7f0..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/bitfield.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __MACH_PUV3_BITFIELD_H__
-#define __MACH_PUV3_BITFIELD_H__
-
-#ifndef __ASSEMBLY__
-#define UData(Data)    ((unsigned long) (Data))
-#else
-#define UData(Data)    (Data)
-#endif
-
-#define FIELD(val, vmask, vshift)      (((val) & ((UData(1) << (vmask)) - 1)) << (vshift))
-#define FMASK(vmask, vshift)           (((UData(1) << (vmask)) - 1) << (vshift))
-
-#endif /* __MACH_PUV3_BITFIELD_H__ */
diff --git a/arch/unicore32/include/mach/dma.h b/arch/unicore32/include/mach/dma.h
deleted file mode 100644 (file)
index 271001c..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/dma.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __MACH_PUV3_DMA_H__
-#define __MACH_PUV3_DMA_H__
-
-/*
- * The PKUnity has six internal DMA channels.
- */
-#define MAX_DMA_CHANNELS       6
-
-typedef enum {
-       DMA_PRIO_HIGH = 0,
-       DMA_PRIO_MEDIUM = 1,
-       DMA_PRIO_LOW = 2
-} puv3_dma_prio;
-
-/*
- * DMA registration
- */
-
-extern int puv3_request_dma(char *name,
-                        puv3_dma_prio prio,
-                        void (*irq_handler)(int, void *),
-                        void (*err_handler)(int, void *),
-                        void *data);
-
-extern void puv3_free_dma(int dma_ch);
-
-static inline void puv3_stop_dma(int ch)
-{
-       writel(readl(DMAC_CONFIG(ch)) & ~DMAC_CONFIG_EN, DMAC_CONFIG(ch));
-}
-
-static inline void puv3_resume_dma(int ch)
-{
-       writel(readl(DMAC_CONFIG(ch)) | DMAC_CONFIG_EN, DMAC_CONFIG(ch));
-}
-
-#endif /* __MACH_PUV3_DMA_H__ */
diff --git a/arch/unicore32/include/mach/hardware.h b/arch/unicore32/include/mach/hardware.h
deleted file mode 100644 (file)
index 2d7571c..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/hardware.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This file contains the hardware definitions for PKUnity architecture
- */
-
-#ifndef __MACH_PUV3_HARDWARE_H__
-#define __MACH_PUV3_HARDWARE_H__
-
-#include <mach/PKUnity.h>
-
-#ifndef __ASSEMBLY__
-#define io_p2v(x)      (void __iomem *)((x) - PKUNITY_MMIO_BASE)
-#define io_v2p(x)      (phys_addr_t)((x) + PKUNITY_MMIO_BASE)
-#else
-#define io_p2v(x)      ((x) - PKUNITY_MMIO_BASE)
-#define io_v2p(x)      ((x) + PKUNITY_MMIO_BASE)
-#endif
-
-#define PCIBIOS_MIN_IO                 0x4000 /* should lower than 64KB */
-#define PCIBIOS_MIN_MEM                        io_v2p(PKUNITY_PCIMEM_BASE)
-
-#define pcibios_assign_all_busses()    1
-
-#endif  /* __MACH_PUV3_HARDWARE_H__ */
diff --git a/arch/unicore32/include/mach/map.h b/arch/unicore32/include/mach/map.h
deleted file mode 100644 (file)
index 7a83eee..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/map.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  Page table mapping constructs and function prototypes
- */
-#define MT_DEVICE              0
-#define MT_DEVICE_CACHED       2
-#define MT_KUSER               7
-#define MT_HIGH_VECTORS                8
-#define MT_MEMORY              9
-#define MT_ROM                 10
-
diff --git a/arch/unicore32/include/mach/memory.h b/arch/unicore32/include/mach/memory.h
deleted file mode 100644 (file)
index b4e6035..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/memory.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __MACH_PUV3_MEMORY_H__
-#define __MACH_PUV3_MEMORY_H__
-
-#include <mach/hardware.h>
-
-/* Physical DRAM offset. */
-#define PHYS_OFFSET    UL(0x00000000)
-/* The base address of exception vectors. */
-#define VECTORS_BASE   UL(0xffff0000)
-/* The base address of kuser area. */
-#define KUSER_BASE     UL(0x80000000)
-
-#ifdef __ASSEMBLY__
-/* The byte offset of the kernel image in RAM from the start of RAM. */
-#define KERNEL_IMAGE_START     0x00408000
-#endif
-
-#if !defined(__ASSEMBLY__) && defined(CONFIG_PCI)
-
-void puv3_pci_adjust_zones(unsigned long *max_zone_pfn);
-
-#define arch_adjust_zones(max_zone_pfn) \
-       puv3_pci_adjust_zones(max_zone_pfn)
-
-#endif
-
-/*
- * PCI controller in PKUnity-3 masks highest 5-bit for upstream channel,
- * so we must limit the DMA allocation within 128M physical memory for
- * supporting PCI devices.
- */
-#define PCI_DMA_THRESHOLD      (PHYS_OFFSET + SZ_128M - 1)
-
-#define is_pcibus_device(dev)  (dev &&                 \
-                               (strncmp(dev->bus->name, "pci", 3) == 0))
-
-#define __virt_to_pcibus(x)     (__virt_to_phys((x) + PKUNITY_PCIAHB_BASE))
-#define __pcibus_to_virt(x)     (__phys_to_virt(x) - PKUNITY_PCIAHB_BASE)
-
-/* kuser area */
-#define KUSER_VECPAGE_BASE     (KUSER_BASE + UL(0x3fff0000))
-/* kuser_vecpage (0xbfff0000) is ro, and vectors page (0xffff0000) is rw */
-#define kuser_vecpage_to_vectors(x)    ((x) - (KUSER_VECPAGE_BASE)     \
-                                       + (VECTORS_BASE))
-
-#endif
diff --git a/arch/unicore32/include/mach/ocd.h b/arch/unicore32/include/mach/ocd.h
deleted file mode 100644 (file)
index 2a81492..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/ocd.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __MACH_PUV3_OCD_H__
-#define __MACH_PUV3_OCD_H__
-
-#if defined(CONFIG_DEBUG_OCD)
-static inline void ocd_putc(unsigned int c)
-{
-       int status, i = 0x2000000;
-
-       do {
-               if (--i < 0)
-                       return;
-
-               asm volatile ("movc %0, p1.c0, #0" : "=r" (status));
-       } while (status & 2);
-
-       asm("movc p1.c1, %0, #1" : : "r" (c));
-}
-
-#define putc(ch)       ocd_putc(ch)
-#else
-#define putc(ch)
-#endif
-
-#endif
diff --git a/arch/unicore32/include/mach/pm.h b/arch/unicore32/include/mach/pm.h
deleted file mode 100644 (file)
index cb40b84..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore/include/mach/pm.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __PUV3_PM_H__
-#define __PUV3_PM_H__
-
-#include <linux/suspend.h>
-
-struct puv3_cpu_pm_fns {
-       int     save_count;
-       void    (*save)(unsigned long *);
-       void    (*restore)(unsigned long *);
-       int     (*valid)(suspend_state_t state);
-       void    (*enter)(suspend_state_t state);
-       int     (*prepare)(void);
-       void    (*finish)(void);
-};
-
-extern struct puv3_cpu_pm_fns *puv3_cpu_pm_fns;
-
-/* sleep.S */
-extern void puv3_cpu_suspend(unsigned int);
-
-extern void puv3_cpu_resume(void);
-
-extern int puv3_pm_enter(suspend_state_t state);
-
-/* Defined in hibernate_asm.S */
-extern int restore_image(pgd_t *resume_pg_dir, struct pbe *restore_pblist);
-
-extern struct pbe *restore_pblist;
-#endif
diff --git a/arch/unicore32/include/mach/regs-ac97.h b/arch/unicore32/include/mach/regs-ac97.h
deleted file mode 100644 (file)
index 85c6018..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity AC97 Registers
- */
-
-#define PKUNITY_AC97_CONR              (PKUNITY_AC97_BASE + 0x0000)
-#define PKUNITY_AC97_OCR               (PKUNITY_AC97_BASE + 0x0004)
-#define PKUNITY_AC97_ICR               (PKUNITY_AC97_BASE + 0x0008)
-#define PKUNITY_AC97_CRAC              (PKUNITY_AC97_BASE + 0x000C)
-#define PKUNITY_AC97_INTR              (PKUNITY_AC97_BASE + 0x0010)
-#define PKUNITY_AC97_INTRSTAT          (PKUNITY_AC97_BASE + 0x0014)
-#define PKUNITY_AC97_INTRCLEAR         (PKUNITY_AC97_BASE + 0x0018)
-#define PKUNITY_AC97_ENABLE            (PKUNITY_AC97_BASE + 0x001C)
-#define PKUNITY_AC97_OUT_FIFO          (PKUNITY_AC97_BASE + 0x0020)
-#define PKUNITY_AC97_IN_FIFO           (PKUNITY_AC97_BASE + 0x0030)
-
-#define AC97_CODEC_REG(v)               FIELD((v), 7, 16)
-#define AC97_CODEC_VAL(v)               FIELD((v), 16, 0)
-#define AC97_CODEC_WRITECOMPLETE        FIELD(1, 1, 2)
-
-/*
- * VAR PLAY SAMPLE RATE
- */
-#define AC97_CMD_VPSAMPLE              (FIELD(3, 2, 16) | FIELD(3, 2, 0))
-
-/*
- * FIX CAPTURE SAMPLE RATE
- */
-#define AC97_CMD_FCSAMPLE              FIELD(7, 3, 0)
-
-#define AC97_CMD_RESET                 FIELD(1, 1, 0)
-#define AC97_CMD_ENABLE                        FIELD(1, 1, 0)
-#define AC97_CMD_DISABLE               FIELD(0, 1, 0)
diff --git a/arch/unicore32/include/mach/regs-dmac.h b/arch/unicore32/include/mach/regs-dmac.h
deleted file mode 100644 (file)
index bbdc52d..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Direct Memory Access Controller (DMAC)
- */
-
-/*
- * Interrupt Status Reg DMAC_ISR.
- */
-#define DMAC_ISR               (PKUNITY_DMAC_BASE + 0x0020)
-/*
- * Interrupt Transfer Complete Status Reg DMAC_ITCSR.
- */
-#define DMAC_ITCSR             (PKUNITY_DMAC_BASE + 0x0050)
-/*
- * Interrupt Transfer Complete Clear Reg DMAC_ITCCR.
- */
-#define DMAC_ITCCR             (PKUNITY_DMAC_BASE + 0x0060)
-/*
- * Interrupt Error Status Reg DMAC_IESR.
- */
-#define DMAC_IESR              (PKUNITY_DMAC_BASE + 0x0080)
-/*
- * Interrupt Error Clear Reg DMAC_IECR.
- */
-#define DMAC_IECR              (PKUNITY_DMAC_BASE + 0x0090)
-/*
- * Enable Channels Reg DMAC_ENCH.
- */
-#define DMAC_ENCH              (PKUNITY_DMAC_BASE + 0x00B0)
-
-/*
- * DMA control reg. Space [byte]
- */
-#define DMASp                   0x00000100
-
-/*
- * Source Addr DMAC_SRCADDR(ch).
- */
-#define DMAC_SRCADDR(ch)       (PKUNITY_DMAC_BASE + (ch)*DMASp + 0x00)
-/*
- * Destination Addr DMAC_DESTADDR(ch).
- */
-#define DMAC_DESTADDR(ch)      (PKUNITY_DMAC_BASE + (ch)*DMASp + 0x04)
-/*
- * Control Reg DMAC_CONTROL(ch).
- */
-#define DMAC_CONTROL(ch)       (PKUNITY_DMAC_BASE + (ch)*DMASp + 0x0C)
-/*
- * Configuration Reg DMAC_CONFIG(ch).
- */
-#define DMAC_CONFIG(ch)                (PKUNITY_DMAC_BASE + (ch)*DMASp + 0x10)
-
-#define DMAC_IR_MASK            FMASK(6, 0)
-/*
- * select channel (ch)
- */
-#define DMAC_CHANNEL(ch)       FIELD(1, 1, (ch))
-
-#define DMAC_CONTROL_SIZE_BYTE(v)       (FIELD((v), 12, 14) | \
-                                       FIELD(0, 3, 9) | FIELD(0, 3, 6))
-#define DMAC_CONTROL_SIZE_HWORD(v)      (FIELD((v) >> 1, 12, 14) | \
-                                       FIELD(1, 3, 9) | FIELD(1, 3, 6))
-#define DMAC_CONTROL_SIZE_WORD(v)       (FIELD((v) >> 2, 12, 14) | \
-                                       FIELD(2, 3, 9) | FIELD(2, 3, 6))
-#define DMAC_CONTROL_DI                 FIELD(1, 1, 13)
-#define DMAC_CONTROL_SI                 FIELD(1, 1, 12)
-#define DMAC_CONTROL_BURST_1BYTE        (FIELD(0, 3, 3) | FIELD(0, 3, 0))
-#define DMAC_CONTROL_BURST_4BYTE        (FIELD(3, 3, 3) | FIELD(3, 3, 0))
-#define DMAC_CONTROL_BURST_8BYTE        (FIELD(5, 3, 3) | FIELD(5, 3, 0))
-#define DMAC_CONTROL_BURST_16BYTE       (FIELD(7, 3, 3) | FIELD(7, 3, 0))
-
-#define        DMAC_CONFIG_UART0_WR    (FIELD(2, 4, 11) | FIELD(1, 2, 1))
-#define        DMAC_CONFIG_UART0_RD    (FIELD(2, 4, 7)  | FIELD(2, 2, 1))
-#define        DMAC_CONFIG_UART1_WR    (FIELD(3, 4, 11) | FIELD(1, 2, 1))
-#define        DMAC_CONFIG_UART1RD     (FIELD(3, 4, 7)  | FIELD(2, 2, 1))
-#define        DMAC_CONFIG_AC97WR      (FIELD(4, 4, 11) | FIELD(1, 2, 1))
-#define        DMAC_CONFIG_AC97RD      (FIELD(4, 4, 7)  | FIELD(2, 2, 1))
-#define        DMAC_CONFIG_MMCWR       (FIELD(7, 4, 11) | FIELD(1, 2, 1))
-#define        DMAC_CONFIG_MMCRD       (FIELD(7, 4, 7)  | FIELD(2, 2, 1))
-#define DMAC_CONFIG_MASKITC     FIELD(1, 1, 4)
-#define DMAC_CONFIG_MASKIE      FIELD(1, 1, 3)
-#define DMAC_CONFIG_EN          FIELD(1, 1, 0)
diff --git a/arch/unicore32/include/mach/regs-gpio.h b/arch/unicore32/include/mach/regs-gpio.h
deleted file mode 100644 (file)
index 5fc701e..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity General-Purpose Input/Output (GPIO) Registers
- */
-
-/*
- * Voltage Status Reg GPIO_GPLR.
- */
-#define GPIO_GPLR      (PKUNITY_GPIO_BASE + 0x0000)
-/*
- * Pin Direction Reg GPIO_GPDR.
- */
-#define GPIO_GPDR      (PKUNITY_GPIO_BASE + 0x0004)
-/*
- * Output Pin Set Reg GPIO_GPSR.
- */
-#define GPIO_GPSR      (PKUNITY_GPIO_BASE + 0x0008)
-/*
- * Output Pin Clear Reg GPIO_GPCR.
- */
-#define GPIO_GPCR      (PKUNITY_GPIO_BASE + 0x000C)
-/*
- * Raise Edge Detect Reg GPIO_GRER.
- */
-#define GPIO_GRER      (PKUNITY_GPIO_BASE + 0x0010)
-/*
- * Fall Edge Detect Reg GPIO_GFER.
- */
-#define GPIO_GFER      (PKUNITY_GPIO_BASE + 0x0014)
-/*
- * Edge Status Reg GPIO_GEDR.
- */
-#define GPIO_GEDR      (PKUNITY_GPIO_BASE + 0x0018)
-/*
- * Special Voltage Detect Reg GPIO_GPIR.
- */
-#define GPIO_GPIR      (PKUNITY_GPIO_BASE + 0x0020)
-
-#define GPIO_MIN       (0)
-#define GPIO_MAX       (27)
-
-#define GPIO_GPIO(Nb)  (0x00000001 << (Nb))    /* GPIO [0..27] */
-#define GPIO_GPIO0     GPIO_GPIO(0)    /* GPIO  [0] */
-#define GPIO_GPIO1     GPIO_GPIO(1)    /* GPIO  [1] */
-#define GPIO_GPIO2     GPIO_GPIO(2)    /* GPIO  [2] */
-#define GPIO_GPIO3     GPIO_GPIO(3)    /* GPIO  [3] */
-#define GPIO_GPIO4     GPIO_GPIO(4)    /* GPIO  [4] */
-#define GPIO_GPIO5     GPIO_GPIO(5)    /* GPIO  [5] */
-#define GPIO_GPIO6     GPIO_GPIO(6)    /* GPIO  [6] */
-#define GPIO_GPIO7     GPIO_GPIO(7)    /* GPIO  [7] */
-#define GPIO_GPIO8     GPIO_GPIO(8)    /* GPIO  [8] */
-#define GPIO_GPIO9     GPIO_GPIO(9)    /* GPIO  [9] */
-#define GPIO_GPIO10    GPIO_GPIO(10)   /* GPIO [10] */
-#define GPIO_GPIO11    GPIO_GPIO(11)   /* GPIO [11] */
-#define GPIO_GPIO12    GPIO_GPIO(12)   /* GPIO [12] */
-#define GPIO_GPIO13    GPIO_GPIO(13)   /* GPIO [13] */
-#define GPIO_GPIO14    GPIO_GPIO(14)   /* GPIO [14] */
-#define GPIO_GPIO15    GPIO_GPIO(15)   /* GPIO [15] */
-#define GPIO_GPIO16    GPIO_GPIO(16)   /* GPIO [16] */
-#define GPIO_GPIO17    GPIO_GPIO(17)   /* GPIO [17] */
-#define GPIO_GPIO18    GPIO_GPIO(18)   /* GPIO [18] */
-#define GPIO_GPIO19    GPIO_GPIO(19)   /* GPIO [19] */
-#define GPIO_GPIO20    GPIO_GPIO(20)   /* GPIO [20] */
-#define GPIO_GPIO21    GPIO_GPIO(21)   /* GPIO [21] */
-#define GPIO_GPIO22    GPIO_GPIO(22)   /* GPIO [22] */
-#define GPIO_GPIO23    GPIO_GPIO(23)   /* GPIO [23] */
-#define GPIO_GPIO24    GPIO_GPIO(24)   /* GPIO [24] */
-#define GPIO_GPIO25    GPIO_GPIO(25)   /* GPIO [25] */
-#define GPIO_GPIO26    GPIO_GPIO(26)   /* GPIO [26] */
-#define GPIO_GPIO27    GPIO_GPIO(27)   /* GPIO [27] */
-
diff --git a/arch/unicore32/include/mach/regs-i2c.h b/arch/unicore32/include/mach/regs-i2c.h
deleted file mode 100644 (file)
index b41aa7c..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Inter-integrated Circuit (I2C) Registers
- */
-
-/*
- * Control Reg I2C_CON.
- */
-#define I2C_CON                (PKUNITY_I2C_BASE + 0x0000)
-/*
- * Target Address Reg I2C_TAR.
- */
-#define I2C_TAR                (PKUNITY_I2C_BASE + 0x0004)
-/*
- * Data buffer and command Reg I2C_DATACMD.
- */
-#define I2C_DATACMD    (PKUNITY_I2C_BASE + 0x0010)
-/*
- * Enable Reg I2C_ENABLE.
- */
-#define I2C_ENABLE     (PKUNITY_I2C_BASE + 0x006C)
-/*
- * Status Reg I2C_STATUS.
- */
-#define I2C_STATUS     (PKUNITY_I2C_BASE + 0x0070)
-/*
- * Tx FIFO Length Reg I2C_TXFLR.
- */
-#define I2C_TXFLR      (PKUNITY_I2C_BASE + 0x0074)
-/*
- * Rx FIFO Length Reg I2C_RXFLR.
- */
-#define I2C_RXFLR      (PKUNITY_I2C_BASE + 0x0078)
-/*
- * Enable Status Reg I2C_ENSTATUS.
- */
-#define I2C_ENSTATUS   (PKUNITY_I2C_BASE + 0x009C)
-
-#define I2C_CON_MASTER          FIELD(1, 1, 0)
-#define I2C_CON_SPEED_STD       FIELD(1, 2, 1)
-#define I2C_CON_SPEED_FAST      FIELD(2, 2, 1)
-#define I2C_CON_RESTART         FIELD(1, 1, 5)
-#define I2C_CON_SLAVEDISABLE    FIELD(1, 1, 6)
-
-#define I2C_DATACMD_READ        FIELD(1, 1, 8)
-#define I2C_DATACMD_WRITE       FIELD(0, 1, 8)
-#define I2C_DATACMD_DAT_MASK    FMASK(8, 0)
-#define I2C_DATACMD_DAT(v)      FIELD((v), 8, 0)
-
-#define I2C_ENABLE_ENABLE       FIELD(1, 1, 0)
-#define I2C_ENABLE_DISABLE      FIELD(0, 1, 0)
-
-#define I2C_STATUS_RFF          FIELD(1, 1, 4)
-#define I2C_STATUS_RFNE         FIELD(1, 1, 3)
-#define I2C_STATUS_TFE          FIELD(1, 1, 2)
-#define I2C_STATUS_TFNF         FIELD(1, 1, 1)
-#define I2C_STATUS_ACTIVITY     FIELD(1, 1, 0)
-
-#define I2C_ENSTATUS_ENABLE    FIELD(1, 1, 0)
-
-#define I2C_TAR_THERMAL        0x4f
-#define I2C_TAR_SPD    0x50
-#define I2C_TAR_PWIC    0x55
-#define I2C_TAR_EEPROM 0x57
diff --git a/arch/unicore32/include/mach/regs-intc.h b/arch/unicore32/include/mach/regs-intc.h
deleted file mode 100644 (file)
index 4eb1b5b..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUNITY Interrupt Controller (INTC) Registers
- */
-/*
- * INTC Level Reg INTC_ICLR.
- */
-#define INTC_ICLR      (PKUNITY_INTC_BASE + 0x0000)
-/*
- * INTC Mask Reg INTC_ICMR.
- */
-#define INTC_ICMR      (PKUNITY_INTC_BASE + 0x0004)
-/*
- * INTC Pending Reg INTC_ICPR.
- */
-#define INTC_ICPR      (PKUNITY_INTC_BASE + 0x0008)
-/*
- * INTC IRQ Pending Reg INTC_ICIP.
- */
-#define INTC_ICIP      (PKUNITY_INTC_BASE + 0x000C)
-/*
- * INTC REAL Pending Reg INTC_ICFP.
- */
-#define INTC_ICFP      (PKUNITY_INTC_BASE + 0x0010)
-/*
- * INTC Control Reg INTC_ICCR.
- */
-#define INTC_ICCR      (PKUNITY_INTC_BASE + 0x0014)
-
diff --git a/arch/unicore32/include/mach/regs-nand.h b/arch/unicore32/include/mach/regs-nand.h
deleted file mode 100644 (file)
index 7f29939..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity NAND Controller Registers
- */
-/*
- * ID Reg. 0 NAND_IDR0
- */
-#define NAND_IDR0      (PKUNITY_NAND_BASE + 0x0000)
-/*
- * ID Reg. 1 NAND_IDR1
- */
-#define NAND_IDR1      (PKUNITY_NAND_BASE + 0x0004)
-/*
- * ID Reg. 2 NAND_IDR2
- */
-#define NAND_IDR2      (PKUNITY_NAND_BASE + 0x0008)
-/*
- * ID Reg. 3 NAND_IDR3
- */
-#define NAND_IDR3      (PKUNITY_NAND_BASE + 0x000C)
-/*
- * Page Address Reg 0 NAND_PAR0
- */
-#define NAND_PAR0      (PKUNITY_NAND_BASE + 0x0010)
-/*
- * Page Address Reg 1 NAND_PAR1
- */
-#define NAND_PAR1      (PKUNITY_NAND_BASE + 0x0014)
-/*
- * Page Address Reg 2 NAND_PAR2
- */
-#define NAND_PAR2      (PKUNITY_NAND_BASE + 0x0018)
-/*
- * ECC Enable Reg NAND_ECCEN
- */
-#define NAND_ECCEN     (PKUNITY_NAND_BASE + 0x001C)
-/*
- * Buffer Reg NAND_BUF
- */
-#define NAND_BUF       (PKUNITY_NAND_BASE + 0x0020)
-/*
- * ECC Status Reg NAND_ECCSR
- */
-#define NAND_ECCSR     (PKUNITY_NAND_BASE + 0x0024)
-/*
- * Command Reg NAND_CMD
- */
-#define NAND_CMD       (PKUNITY_NAND_BASE + 0x0028)
-/*
- * DMA Configure Reg NAND_DMACR
- */
-#define NAND_DMACR     (PKUNITY_NAND_BASE + 0x002C)
-/*
- * Interrupt Reg NAND_IR
- */
-#define NAND_IR                (PKUNITY_NAND_BASE + 0x0030)
-/*
- * Interrupt Mask Reg NAND_IMR
- */
-#define NAND_IMR       (PKUNITY_NAND_BASE + 0x0034)
-/*
- * Chip Enable Reg NAND_CHIPEN
- */
-#define NAND_CHIPEN    (PKUNITY_NAND_BASE + 0x0038)
-/*
- * Address Reg NAND_ADDR
- */
-#define NAND_ADDR      (PKUNITY_NAND_BASE + 0x003C)
-
-/*
- * Command bits NAND_CMD_CMD_MASK
- */
-#define NAND_CMD_CMD_MASK              FMASK(4, 4)
-#define NAND_CMD_CMD_READPAGE          FIELD(0x0, 4, 4)
-#define NAND_CMD_CMD_ERASEBLOCK                FIELD(0x6, 4, 4)
-#define NAND_CMD_CMD_READSTATUS                FIELD(0x7, 4, 4)
-#define NAND_CMD_CMD_WRITEPAGE         FIELD(0x8, 4, 4)
-#define NAND_CMD_CMD_READID            FIELD(0x9, 4, 4)
-#define NAND_CMD_CMD_RESET             FIELD(0xf, 4, 4)
-
diff --git a/arch/unicore32/include/mach/regs-ost.h b/arch/unicore32/include/mach/regs-ost.h
deleted file mode 100644 (file)
index 6c63e7b..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Operating System Timer (OST) Registers
- */
-/*
- * Match Reg 0 OST_OSMR0
- */
-#define OST_OSMR0      (PKUNITY_OST_BASE + 0x0000)
-/*
- * Match Reg 1 OST_OSMR1
- */
-#define OST_OSMR1      (PKUNITY_OST_BASE + 0x0004)
-/*
- * Match Reg 2 OST_OSMR2
- */
-#define OST_OSMR2      (PKUNITY_OST_BASE + 0x0008)
-/*
- * Match Reg 3 OST_OSMR3
- */
-#define OST_OSMR3      (PKUNITY_OST_BASE + 0x000C)
-/*
- * Counter Reg OST_OSCR
- */
-#define OST_OSCR       (PKUNITY_OST_BASE + 0x0010)
-/*
- * Status Reg OST_OSSR
- */
-#define OST_OSSR       (PKUNITY_OST_BASE + 0x0014)
-/*
- * Watchdog Enable Reg OST_OWER
- */
-#define OST_OWER       (PKUNITY_OST_BASE + 0x0018)
-/*
- * Interrupt Enable Reg OST_OIER
- */
-#define OST_OIER       (PKUNITY_OST_BASE + 0x001C)
-
-/*
- * PWM Registers: IO base address: PKUNITY_OST_BASE + 0x80
- *      PWCR: Pulse Width Control Reg
- *      DCCR: Duty Cycle Control Reg
- *      PCR: Period Control Reg
- */
-#define OST_PWM_PWCR   (0x00)
-#define OST_PWM_DCCR   (0x04)
-#define OST_PWM_PCR    (0x08)
-
-/*
- * Match detected 0 OST_OSSR_M0
- */
-#define OST_OSSR_M0            FIELD(1, 1, 0)
-/*
- * Match detected 1 OST_OSSR_M1
- */
-#define OST_OSSR_M1            FIELD(1, 1, 1)
-/*
- * Match detected 2 OST_OSSR_M2
- */
-#define OST_OSSR_M2            FIELD(1, 1, 2)
-/*
- * Match detected 3 OST_OSSR_M3
- */
-#define OST_OSSR_M3            FIELD(1, 1, 3)
-
-/*
- * Interrupt enable 0 OST_OIER_E0
- */
-#define OST_OIER_E0            FIELD(1, 1, 0)
-/*
- * Interrupt enable 1 OST_OIER_E1
- */
-#define OST_OIER_E1            FIELD(1, 1, 1)
-/*
- * Interrupt enable 2 OST_OIER_E2
- */
-#define OST_OIER_E2            FIELD(1, 1, 2)
-/*
- * Interrupt enable 3 OST_OIER_E3
- */
-#define OST_OIER_E3            FIELD(1, 1, 3)
-
-/*
- * Watchdog Match Enable OST_OWER_WME
- */
-#define OST_OWER_WME           FIELD(1, 1, 0)
-
-/*
- * PWM Full Duty Cycle OST_PWMDCCR_FDCYCLE
- */
-#define OST_PWMDCCR_FDCYCLE    FIELD(1, 1, 10)
-
diff --git a/arch/unicore32/include/mach/regs-pci.h b/arch/unicore32/include/mach/regs-pci.h
deleted file mode 100644 (file)
index 25bb307..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity AHB-PCI Bridge Registers
- */
-
-/*
- * AHB/PCI fixed physical address for pci addess configuration
- */
-/*
- * PCICFG Bridge Base Reg.
- */
-#define PCICFG_BRIBASE          (PKUNITY_PCICFG_BASE + 0x0000)
-/*
- * PCICFG Address Reg.
- */
-#define PCICFG_ADDR             (PKUNITY_PCICFG_BASE + 0x0004)
-/*
- * PCICFG Address Reg.
- */
-#define PCICFG_DATA             (PKUNITY_PCICFG_BASE + 0x0008)
-
-/*
- * PCI Bridge configuration space
- */
-#define PCIBRI_ID              (PKUNITY_PCIBRI_BASE + 0x0000)
-#define PCIBRI_CMD             (PKUNITY_PCIBRI_BASE + 0x0004)
-#define PCIBRI_CLASS           (PKUNITY_PCIBRI_BASE + 0x0008)
-#define PCIBRI_LTR             (PKUNITY_PCIBRI_BASE + 0x000C)
-#define PCIBRI_BAR0            (PKUNITY_PCIBRI_BASE + 0x0010)
-#define PCIBRI_BAR1            (PKUNITY_PCIBRI_BASE + 0x0014)
-#define PCIBRI_BAR2            (PKUNITY_PCIBRI_BASE + 0x0018)
-#define PCIBRI_BAR3            (PKUNITY_PCIBRI_BASE + 0x001C)
-#define PCIBRI_BAR4            (PKUNITY_PCIBRI_BASE + 0x0020)
-#define PCIBRI_BAR5            (PKUNITY_PCIBRI_BASE + 0x0024)
-
-#define PCIBRI_PCICTL0         (PKUNITY_PCIBRI_BASE + 0x0100)
-#define PCIBRI_PCIBAR0         (PKUNITY_PCIBRI_BASE + 0x0104)
-#define PCIBRI_PCIAMR0         (PKUNITY_PCIBRI_BASE + 0x0108)
-#define PCIBRI_PCITAR0         (PKUNITY_PCIBRI_BASE + 0x010C)
-#define PCIBRI_PCICTL1         (PKUNITY_PCIBRI_BASE + 0x0110)
-#define PCIBRI_PCIBAR1         (PKUNITY_PCIBRI_BASE + 0x0114)
-#define PCIBRI_PCIAMR1         (PKUNITY_PCIBRI_BASE + 0x0118)
-#define PCIBRI_PCITAR1         (PKUNITY_PCIBRI_BASE + 0x011C)
-#define PCIBRI_PCICTL2         (PKUNITY_PCIBRI_BASE + 0x0120)
-#define PCIBRI_PCIBAR2         (PKUNITY_PCIBRI_BASE + 0x0124)
-#define PCIBRI_PCIAMR2         (PKUNITY_PCIBRI_BASE + 0x0128)
-#define PCIBRI_PCITAR2         (PKUNITY_PCIBRI_BASE + 0x012C)
-#define PCIBRI_PCICTL3         (PKUNITY_PCIBRI_BASE + 0x0130)
-#define PCIBRI_PCIBAR3         (PKUNITY_PCIBRI_BASE + 0x0134)
-#define PCIBRI_PCIAMR3         (PKUNITY_PCIBRI_BASE + 0x0138)
-#define PCIBRI_PCITAR3         (PKUNITY_PCIBRI_BASE + 0x013C)
-#define PCIBRI_PCICTL4         (PKUNITY_PCIBRI_BASE + 0x0140)
-#define PCIBRI_PCIBAR4         (PKUNITY_PCIBRI_BASE + 0x0144)
-#define PCIBRI_PCIAMR4         (PKUNITY_PCIBRI_BASE + 0x0148)
-#define PCIBRI_PCITAR4         (PKUNITY_PCIBRI_BASE + 0x014C)
-#define PCIBRI_PCICTL5         (PKUNITY_PCIBRI_BASE + 0x0150)
-#define PCIBRI_PCIBAR5         (PKUNITY_PCIBRI_BASE + 0x0154)
-#define PCIBRI_PCIAMR5         (PKUNITY_PCIBRI_BASE + 0x0158)
-#define PCIBRI_PCITAR5         (PKUNITY_PCIBRI_BASE + 0x015C)
-
-#define PCIBRI_AHBCTL0         (PKUNITY_PCIBRI_BASE + 0x0180)
-#define PCIBRI_AHBBAR0         (PKUNITY_PCIBRI_BASE + 0x0184)
-#define PCIBRI_AHBAMR0         (PKUNITY_PCIBRI_BASE + 0x0188)
-#define PCIBRI_AHBTAR0         (PKUNITY_PCIBRI_BASE + 0x018C)
-#define PCIBRI_AHBCTL1         (PKUNITY_PCIBRI_BASE + 0x0190)
-#define PCIBRI_AHBBAR1         (PKUNITY_PCIBRI_BASE + 0x0194)
-#define PCIBRI_AHBAMR1         (PKUNITY_PCIBRI_BASE + 0x0198)
-#define PCIBRI_AHBTAR1         (PKUNITY_PCIBRI_BASE + 0x019C)
-#define PCIBRI_AHBCTL2         (PKUNITY_PCIBRI_BASE + 0x01A0)
-#define PCIBRI_AHBBAR2         (PKUNITY_PCIBRI_BASE + 0x01A4)
-#define PCIBRI_AHBAMR2         (PKUNITY_PCIBRI_BASE + 0x01A8)
-#define PCIBRI_AHBTAR2         (PKUNITY_PCIBRI_BASE + 0x01AC)
-#define PCIBRI_AHBCTL3         (PKUNITY_PCIBRI_BASE + 0x01B0)
-#define PCIBRI_AHBBAR3         (PKUNITY_PCIBRI_BASE + 0x01B4)
-#define PCIBRI_AHBAMR3         (PKUNITY_PCIBRI_BASE + 0x01B8)
-#define PCIBRI_AHBTAR3         (PKUNITY_PCIBRI_BASE + 0x01BC)
-#define PCIBRI_AHBCTL4         (PKUNITY_PCIBRI_BASE + 0x01C0)
-#define PCIBRI_AHBBAR4         (PKUNITY_PCIBRI_BASE + 0x01C4)
-#define PCIBRI_AHBAMR4         (PKUNITY_PCIBRI_BASE + 0x01C8)
-#define PCIBRI_AHBTAR4         (PKUNITY_PCIBRI_BASE + 0x01CC)
-#define PCIBRI_AHBCTL5         (PKUNITY_PCIBRI_BASE + 0x01D0)
-#define PCIBRI_AHBBAR5         (PKUNITY_PCIBRI_BASE + 0x01D4)
-#define PCIBRI_AHBAMR5         (PKUNITY_PCIBRI_BASE + 0x01D8)
-#define PCIBRI_AHBTAR5         (PKUNITY_PCIBRI_BASE + 0x01DC)
-
-#define PCIBRI_CTLx_AT          FIELD(1, 1, 2)
-#define PCIBRI_CTLx_PREF        FIELD(1, 1, 1)
-#define PCIBRI_CTLx_MRL         FIELD(1, 1, 0)
-
-#define PCIBRI_BARx_ADDR        FIELD(0xFFFFFFFC, 30, 2)
-#define PCIBRI_BARx_IO          FIELD(1, 1, 0)
-#define PCIBRI_BARx_MEM         FIELD(0, 1, 0)
-
-#define PCIBRI_CMD_IO           FIELD(1, 1, 0)
-#define PCIBRI_CMD_MEM          FIELD(1, 1, 1)
diff --git a/arch/unicore32/include/mach/regs-pm.h b/arch/unicore32/include/mach/regs-pm.h
deleted file mode 100644 (file)
index 777b1ac..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUNITY Power Manager (PM) Registers
- */
-/*
- * PM Control Reg PM_PMCR
- */
-#define PM_PMCR                 (PKUNITY_PM_BASE + 0x0000)
-/*
- * PM General Conf. Reg PM_PGCR
- */
-#define PM_PGCR                 (PKUNITY_PM_BASE + 0x0004)
-/*
- * PM PLL Conf. Reg PM_PPCR
- */
-#define PM_PPCR                 (PKUNITY_PM_BASE + 0x0008)
-/*
- * PM Wakeup Enable Reg PM_PWER
- */
-#define PM_PWER                 (PKUNITY_PM_BASE + 0x000C)
-/*
- * PM GPIO Sleep Status Reg PM_PGSR
- */
-#define PM_PGSR                 (PKUNITY_PM_BASE + 0x0010)
-/*
- * PM Clock Gate Reg PM_PCGR
- */
-#define PM_PCGR                 (PKUNITY_PM_BASE + 0x0014)
-/*
- * PM SYS PLL Conf. Reg PM_PLLSYSCFG
- */
-#define PM_PLLSYSCFG            (PKUNITY_PM_BASE + 0x0018)
-/*
- * PM DDR PLL Conf. Reg PM_PLLDDRCFG
- */
-#define PM_PLLDDRCFG            (PKUNITY_PM_BASE + 0x001C)
-/*
- * PM VGA PLL Conf. Reg PM_PLLVGACFG
- */
-#define PM_PLLVGACFG            (PKUNITY_PM_BASE + 0x0020)
-/*
- * PM Div Conf. Reg PM_DIVCFG
- */
-#define PM_DIVCFG               (PKUNITY_PM_BASE + 0x0024)
-/*
- * PM SYS PLL Status Reg PM_PLLSYSSTATUS
- */
-#define PM_PLLSYSSTATUS         (PKUNITY_PM_BASE + 0x0028)
-/*
- * PM DDR PLL Status Reg PM_PLLDDRSTATUS
- */
-#define PM_PLLDDRSTATUS         (PKUNITY_PM_BASE + 0x002C)
-/*
- * PM VGA PLL Status Reg PM_PLLVGASTATUS
- */
-#define PM_PLLVGASTATUS         (PKUNITY_PM_BASE + 0x0030)
-/*
- * PM Div Status Reg PM_DIVSTATUS
- */
-#define PM_DIVSTATUS            (PKUNITY_PM_BASE + 0x0034)
-/*
- * PM Software Reset Reg PM_SWRESET
- */
-#define PM_SWRESET              (PKUNITY_PM_BASE + 0x0038)
-/*
- * PM DDR2 PAD Start Reg PM_DDR2START
- */
-#define PM_DDR2START            (PKUNITY_PM_BASE + 0x003C)
-/*
- * PM DDR2 PAD Status Reg PM_DDR2CAL0
- */
-#define PM_DDR2CAL0             (PKUNITY_PM_BASE + 0x0040)
-/*
- * PM PLL DFC Done Reg PM_PLLDFCDONE
- */
-#define PM_PLLDFCDONE           (PKUNITY_PM_BASE + 0x0044)
-
-#define PM_PMCR_SFB             FIELD(1, 1, 0)
-#define PM_PMCR_IFB             FIELD(1, 1, 1)
-#define PM_PMCR_CFBSYS          FIELD(1, 1, 2)
-#define PM_PMCR_CFBDDR          FIELD(1, 1, 3)
-#define PM_PMCR_CFBVGA          FIELD(1, 1, 4)
-#define PM_PMCR_CFBDIVBCLK      FIELD(1, 1, 5)
-
-/*
- * GPIO 8~27 wake-up enable PM_PWER_GPIOHIGH
- */
-#define PM_PWER_GPIOHIGH        FIELD(1, 1, 8)
-/*
- * RTC alarm wake-up enable PM_PWER_RTC
- */
-#define PM_PWER_RTC             FIELD(1, 1, 31)
-
-#define PM_PCGR_BCLK64DDR      FIELD(1, 1, 0)
-#define PM_PCGR_BCLK64VGA      FIELD(1, 1, 1)
-#define PM_PCGR_BCLKDDR                FIELD(1, 1, 2)
-#define PM_PCGR_BCLKPCI                FIELD(1, 1, 4)
-#define PM_PCGR_BCLKDMAC       FIELD(1, 1, 5)
-#define PM_PCGR_BCLKUMAL       FIELD(1, 1, 6)
-#define PM_PCGR_BCLKUSB                FIELD(1, 1, 7)
-#define PM_PCGR_BCLKMME                FIELD(1, 1, 10)
-#define PM_PCGR_BCLKNAND       FIELD(1, 1, 11)
-#define PM_PCGR_BCLKH264E      FIELD(1, 1, 12)
-#define PM_PCGR_BCLKVGA                FIELD(1, 1, 13)
-#define PM_PCGR_BCLKH264D      FIELD(1, 1, 14)
-#define PM_PCGR_VECLK          FIELD(1, 1, 15)
-#define PM_PCGR_HECLK          FIELD(1, 1, 16)
-#define PM_PCGR_HDCLK          FIELD(1, 1, 17)
-#define PM_PCGR_NANDCLK                FIELD(1, 1, 18)
-#define PM_PCGR_GECLK          FIELD(1, 1, 19)
-#define PM_PCGR_VGACLK          FIELD(1, 1, 20)
-#define PM_PCGR_PCICLK         FIELD(1, 1, 21)
-#define PM_PCGR_SATACLK                FIELD(1, 1, 25)
-
-/*
- * [23:20]PM_DIVCFG_VGACLK(v)
- */
-#define PM_DIVCFG_VGACLK_MASK   FMASK(4, 20)
-#define PM_DIVCFG_VGACLK(v)    FIELD((v), 4, 20)
-
-#define PM_SWRESET_USB          FIELD(1, 1, 6)
-#define PM_SWRESET_VGADIV       FIELD(1, 1, 26)
-#define PM_SWRESET_GEDIV        FIELD(1, 1, 27)
-
-#define PM_PLLDFCDONE_SYSDFC    FIELD(1, 1, 0)
-#define PM_PLLDFCDONE_DDRDFC    FIELD(1, 1, 1)
-#define PM_PLLDFCDONE_VGADFC    FIELD(1, 1, 2)
diff --git a/arch/unicore32/include/mach/regs-ps2.h b/arch/unicore32/include/mach/regs-ps2.h
deleted file mode 100644 (file)
index d539d74..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity PS2 Controller Registers
- */
-/*
- * the same as I8042_DATA_REG PS2_DATA
- */
-#define PS2_DATA       (PKUNITY_PS2_BASE + 0x0060)
-/*
- * the same as I8042_COMMAND_REG PS2_COMMAND
- */
-#define PS2_COMMAND    (PKUNITY_PS2_BASE + 0x0064)
-/*
- * the same as I8042_STATUS_REG PS2_STATUS
- */
-#define PS2_STATUS     (PKUNITY_PS2_BASE + 0x0064)
-/*
- * counter reg PS2_CNT
- */
-#define PS2_CNT                (PKUNITY_PS2_BASE + 0x0068)
-
diff --git a/arch/unicore32/include/mach/regs-resetc.h b/arch/unicore32/include/mach/regs-resetc.h
deleted file mode 100644 (file)
index 5f2b9d7..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Reset Controller (RC) Registers
- */
-/*
- * Software Reset Register
- */
-#define RESETC_SWRR    (PKUNITY_RESETC_BASE + 0x0000)
-/*
- * Reset Status Register
- */
-#define RESETC_RSSR    (PKUNITY_RESETC_BASE + 0x0004)
-
-/*
- * Software Reset Bit
- */
-#define RESETC_SWRR_SRB                FIELD(1, 1, 0)
-
-/*
- * Hardware Reset
- */
-#define RESETC_RSSR_HWR                FIELD(1, 1, 0)
-/*
- * Software Reset
- */
-#define RESETC_RSSR_SWR                FIELD(1, 1, 1)
-/*
- * Watchdog Reset
- */
-#define RESETC_RSSR_WDR                FIELD(1, 1, 2)
-/*
- * Sleep Mode Reset
- */
-#define RESETC_RSSR_SMR                FIELD(1, 1, 3)
-
diff --git a/arch/unicore32/include/mach/regs-rtc.h b/arch/unicore32/include/mach/regs-rtc.h
deleted file mode 100644 (file)
index f2f7f47..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Real-Time Clock (RTC) control registers
- */
-/*
- * RTC Alarm Reg RTC_RTAR
- */
-#define RTC_RTAR       (PKUNITY_RTC_BASE + 0x0000)
-/*
- * RTC Count Reg RTC_RCNR
- */
-#define RTC_RCNR       (PKUNITY_RTC_BASE + 0x0004)
-/*
- * RTC Trim Reg RTC_RTTR
- */
-#define RTC_RTTR       (PKUNITY_RTC_BASE + 0x0008)
-/*
- * RTC Status Reg RTC_RTSR
- */
-#define RTC_RTSR       (PKUNITY_RTC_BASE + 0x0010)
-
-/*
- * ALarm detected RTC_RTSR_AL
- */
-#define RTC_RTSR_AL            FIELD(1, 1, 0)
-/*
- * 1 Hz clock detected RTC_RTSR_HZ
- */
-#define RTC_RTSR_HZ            FIELD(1, 1, 1)
-/*
- * ALarm interrupt Enable RTC_RTSR_ALE
- */
-#define RTC_RTSR_ALE           FIELD(1, 1, 2)
-/*
- * 1 Hz clock interrupt Enable RTC_RTSR_HZE
- */
-#define RTC_RTSR_HZE           FIELD(1, 1, 3)
-
diff --git a/arch/unicore32/include/mach/regs-sdc.h b/arch/unicore32/include/mach/regs-sdc.h
deleted file mode 100644 (file)
index 658bfaf..0000000
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Multi-Media Card and Security Digital Card (MMC/SD) Registers
- */
-/*
- * Clock Control Reg SDC_CCR
- */
-#define SDC_CCR                (PKUNITY_SDC_BASE + 0x0000)
-/*
- * Software Reset Reg SDC_SRR
- */
-#define SDC_SRR                (PKUNITY_SDC_BASE + 0x0004)
-/*
- * Argument Reg SDC_ARGUMENT
- */
-#define SDC_ARGUMENT   (PKUNITY_SDC_BASE + 0x0008)
-/*
- * Command Reg SDC_COMMAND
- */
-#define SDC_COMMAND    (PKUNITY_SDC_BASE + 0x000C)
-/*
- * Block Size Reg SDC_BLOCKSIZE
- */
-#define SDC_BLOCKSIZE  (PKUNITY_SDC_BASE + 0x0010)
-/*
- * Block Cound Reg SDC_BLOCKCOUNT
- */
-#define SDC_BLOCKCOUNT (PKUNITY_SDC_BASE + 0x0014)
-/*
- * Transfer Mode Reg SDC_TMR
- */
-#define SDC_TMR                (PKUNITY_SDC_BASE + 0x0018)
-/*
- * Response Reg. 0 SDC_RES0
- */
-#define SDC_RES0       (PKUNITY_SDC_BASE + 0x001C)
-/*
- * Response Reg. 1 SDC_RES1
- */
-#define SDC_RES1       (PKUNITY_SDC_BASE + 0x0020)
-/*
- * Response Reg. 2 SDC_RES2
- */
-#define SDC_RES2       (PKUNITY_SDC_BASE + 0x0024)
-/*
- * Response Reg. 3 SDC_RES3
- */
-#define SDC_RES3       (PKUNITY_SDC_BASE + 0x0028)
-/*
- * Read Timeout Control Reg SDC_RTCR
- */
-#define SDC_RTCR       (PKUNITY_SDC_BASE + 0x002C)
-/*
- * Interrupt Status Reg SDC_ISR
- */
-#define SDC_ISR                (PKUNITY_SDC_BASE + 0x0030)
-/*
- * Interrupt Status Mask Reg SDC_ISMR
- */
-#define SDC_ISMR       (PKUNITY_SDC_BASE + 0x0034)
-/*
- * RX FIFO SDC_RXFIFO
- */
-#define SDC_RXFIFO     (PKUNITY_SDC_BASE + 0x0038)
-/*
- * TX FIFO SDC_TXFIFO
- */
-#define SDC_TXFIFO     (PKUNITY_SDC_BASE + 0x003C)
-
-/*
- * SD Clock Enable SDC_CCR_CLKEN
- */
-#define SDC_CCR_CLKEN                  FIELD(1, 1, 2)
-/*
- * [15:8] SDC_CCR_PDIV(v)
- */
-#define SDC_CCR_PDIV(v)                        FIELD((v), 8, 8)
-
-/*
- * Software reset enable SDC_SRR_ENABLE
- */
-#define SDC_SRR_ENABLE                 FIELD(0, 1, 0)
-/*
- * Software reset disable SDC_SRR_DISABLE
- */
-#define SDC_SRR_DISABLE                        FIELD(1, 1, 0)
-
-/*
- * Response type SDC_COMMAND_RESTYPE_MASK
- */
-#define SDC_COMMAND_RESTYPE_MASK       FMASK(2, 0)
-/*
- * No response SDC_COMMAND_RESTYPE_NONE
- */
-#define SDC_COMMAND_RESTYPE_NONE       FIELD(0, 2, 0)
-/*
- * 136-bit long response SDC_COMMAND_RESTYPE_LONG
- */
-#define SDC_COMMAND_RESTYPE_LONG       FIELD(1, 2, 0)
-/*
- * 48-bit short response SDC_COMMAND_RESTYPE_SHORT
- */
-#define SDC_COMMAND_RESTYPE_SHORT      FIELD(2, 2, 0)
-/*
- * 48-bit short and test if busy response SDC_COMMAND_RESTYPE_SHORTBUSY
- */
-#define SDC_COMMAND_RESTYPE_SHORTBUSY  FIELD(3, 2, 0)
-/*
- * data ready SDC_COMMAND_DATAREADY
- */
-#define SDC_COMMAND_DATAREADY          FIELD(1, 1, 2)
-#define SDC_COMMAND_CMDEN              FIELD(1, 1, 3)
-/*
- * [10:5] SDC_COMMAND_CMDINDEX(v)
- */
-#define SDC_COMMAND_CMDINDEX(v)                FIELD((v), 6, 5)
-
-/*
- * [10:0] SDC_BLOCKSIZE_BSMASK(v)
- */
-#define SDC_BLOCKSIZE_BSMASK(v)                FIELD((v), 11, 0)
-/*
- * [11:0] SDC_BLOCKCOUNT_BCMASK(v)
- */
-#define SDC_BLOCKCOUNT_BCMASK(v)       FIELD((v), 12, 0)
-
-/*
- * Data Width 1bit SDC_TMR_WTH_1BIT
- */
-#define SDC_TMR_WTH_1BIT               FIELD(0, 1, 0)
-/*
- * Data Width 4bit SDC_TMR_WTH_4BIT
- */
-#define SDC_TMR_WTH_4BIT               FIELD(1, 1, 0)
-/*
- * Read SDC_TMR_DIR_READ
- */
-#define SDC_TMR_DIR_READ               FIELD(0, 1, 1)
-/*
- * Write SDC_TMR_DIR_WRITE
- */
-#define SDC_TMR_DIR_WRITE              FIELD(1, 1, 1)
-
-#define SDC_IR_MASK                    FMASK(13, 0)
-#define SDC_IR_RESTIMEOUT              FIELD(1, 1, 0)
-#define SDC_IR_WRITECRC                        FIELD(1, 1, 1)
-#define SDC_IR_READCRC                 FIELD(1, 1, 2)
-#define SDC_IR_TXFIFOREAD              FIELD(1, 1, 3)
-#define SDC_IR_RXFIFOWRITE             FIELD(1, 1, 4)
-#define SDC_IR_READTIMEOUT             FIELD(1, 1, 5)
-#define SDC_IR_DATACOMPLETE            FIELD(1, 1, 6)
-#define SDC_IR_CMDCOMPLETE             FIELD(1, 1, 7)
-#define SDC_IR_RXFIFOFULL              FIELD(1, 1, 8)
-#define SDC_IR_RXFIFOEMPTY             FIELD(1, 1, 9)
-#define SDC_IR_TXFIFOFULL              FIELD(1, 1, 10)
-#define SDC_IR_TXFIFOEMPTY             FIELD(1, 1, 11)
-#define SDC_IR_ENDCMDWITHRES           FIELD(1, 1, 12)
diff --git a/arch/unicore32/include/mach/regs-spi.h b/arch/unicore32/include/mach/regs-spi.h
deleted file mode 100644 (file)
index 3460647..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Serial Peripheral Interface (SPI) Registers
- */
-/*
- * Control reg. 0 SPI_CR0
- */
-#define SPI_CR0                (PKUNITY_SPI_BASE + 0x0000)
-/*
- * Control reg. 1 SPI_CR1
- */
-#define SPI_CR1                (PKUNITY_SPI_BASE + 0x0004)
-/*
- * Enable reg SPI_SSIENR
- */
-#define SPI_SSIENR     (PKUNITY_SPI_BASE + 0x0008)
-/*
- * Status reg SPI_SR
- */
-#define SPI_SR         (PKUNITY_SPI_BASE + 0x0028)
-/*
- * Interrupt Mask reg SPI_IMR
- */
-#define SPI_IMR                (PKUNITY_SPI_BASE + 0x002C)
-/*
- * Interrupt Status reg SPI_ISR
- */
-#define SPI_ISR                (PKUNITY_SPI_BASE + 0x0030)
-
-/*
- * Enable SPI Controller SPI_SSIENR_EN
- */
-#define SPI_SSIENR_EN          FIELD(1, 1, 0)
-
-/*
- * SPI Busy SPI_SR_BUSY
- */
-#define SPI_SR_BUSY            FIELD(1, 1, 0)
-/*
- * Transmit FIFO Not Full SPI_SR_TFNF
- */
-#define SPI_SR_TFNF            FIELD(1, 1, 1)
-/*
- * Transmit FIFO Empty SPI_SR_TFE
- */
-#define SPI_SR_TFE             FIELD(1, 1, 2)
-/*
- * Receive FIFO Not Empty SPI_SR_RFNE
- */
-#define SPI_SR_RFNE            FIELD(1, 1, 3)
-/*
- * Receive FIFO Full SPI_SR_RFF
- */
-#define SPI_SR_RFF             FIELD(1, 1, 4)
-
-/*
- * Trans. FIFO Empty Interrupt Status SPI_ISR_TXEIS
- */
-#define SPI_ISR_TXEIS          FIELD(1, 1, 0)
-/*
- * Trans. FIFO Overflow Interrupt Status SPI_ISR_TXOIS
- */
-#define SPI_ISR_TXOIS          FIELD(1, 1, 1)
-/*
- * Receiv. FIFO Underflow Interrupt Status SPI_ISR_RXUIS
- */
-#define SPI_ISR_RXUIS          FIELD(1, 1, 2)
-/*
- * Receiv. FIFO Overflow Interrupt Status SPI_ISR_RXOIS
- */
-#define SPI_ISR_RXOIS          FIELD(1, 1, 3)
-/*
- * Receiv. FIFO Full Interrupt Status SPI_ISR_RXFIS
- */
-#define SPI_ISR_RXFIS          FIELD(1, 1, 4)
-#define SPI_ISR_MSTIS          FIELD(1, 1, 5)
-
-/*
- * Trans. FIFO Empty Interrupt Mask SPI_IMR_TXEIM
- */
-#define SPI_IMR_TXEIM          FIELD(1, 1, 0)
-/*
- * Trans. FIFO Overflow Interrupt Mask SPI_IMR_TXOIM
- */
-#define SPI_IMR_TXOIM          FIELD(1, 1, 1)
-/*
- * Receiv. FIFO Underflow Interrupt Mask SPI_IMR_RXUIM
- */
-#define SPI_IMR_RXUIM          FIELD(1, 1, 2)
-/*
- * Receiv. FIFO Overflow Interrupt Mask SPI_IMR_RXOIM
- */
-#define SPI_IMR_RXOIM          FIELD(1, 1, 3)
-/*
- * Receiv. FIFO Full Interrupt Mask SPI_IMR_RXFIM
- */
-#define SPI_IMR_RXFIM          FIELD(1, 1, 4)
-#define SPI_IMR_MSTIM          FIELD(1, 1, 5)
-
diff --git a/arch/unicore32/include/mach/regs-uart.h b/arch/unicore32/include/mach/regs-uart.h
deleted file mode 100644 (file)
index 9fa6b19..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-/*
- * PKUnity Universal Asynchronous Receiver/Transmitter (UART) Registers
- */
diff --git a/arch/unicore32/include/mach/regs-umal.h b/arch/unicore32/include/mach/regs-umal.h
deleted file mode 100644 (file)
index 7023089..0000000
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity Ultra Media Access Layer (UMAL) Ethernet MAC Registers
- */
-
-/* MAC module of UMAL */
-/* UMAL's MAC module includes G/MII interface, several additional PHY
- * interfaces, and MAC control sub-layer, which provides support for control
- * frames (e.g. PAUSE frames).
- */
-/*
- * TX/RX reset and control UMAL_CFG1
- */
-#define UMAL_CFG1              (PKUNITY_UMAL_BASE + 0x0000)
-/*
- * MAC interface mode control UMAL_CFG2
- */
-#define UMAL_CFG2              (PKUNITY_UMAL_BASE + 0x0004)
-/*
- * Inter Packet/Frame Gap UMAL_IPGIFG
- */
-#define UMAL_IPGIFG            (PKUNITY_UMAL_BASE + 0x0008)
-/*
- * Collision retry or backoff UMAL_HALFDUPLEX
- */
-#define UMAL_HALFDUPLEX                (PKUNITY_UMAL_BASE + 0x000c)
-/*
- * Maximum Frame Length UMAL_MAXFRAME
- */
-#define UMAL_MAXFRAME          (PKUNITY_UMAL_BASE + 0x0010)
-/*
- * Test Regsiter UMAL_TESTREG
- */
-#define UMAL_TESTREG           (PKUNITY_UMAL_BASE + 0x001c)
-/*
- * MII Management Configure UMAL_MIICFG
- */
-#define UMAL_MIICFG            (PKUNITY_UMAL_BASE + 0x0020)
-/*
- * MII Management Command UMAL_MIICMD
- */
-#define UMAL_MIICMD            (PKUNITY_UMAL_BASE + 0x0024)
-/*
- * MII Management Address UMAL_MIIADDR
- */
-#define UMAL_MIIADDR           (PKUNITY_UMAL_BASE + 0x0028)
-/*
- * MII Management Control UMAL_MIICTRL
- */
-#define UMAL_MIICTRL           (PKUNITY_UMAL_BASE + 0x002c)
-/*
- * MII Management Status UMAL_MIISTATUS
- */
-#define UMAL_MIISTATUS         (PKUNITY_UMAL_BASE + 0x0030)
-/*
- * MII Management Indicator UMAL_MIIIDCT
- */
-#define UMAL_MIIIDCT           (PKUNITY_UMAL_BASE + 0x0034)
-/*
- * Interface Control UMAL_IFCTRL
- */
-#define UMAL_IFCTRL            (PKUNITY_UMAL_BASE + 0x0038)
-/*
- * Interface Status UMAL_IFSTATUS
- */
-#define UMAL_IFSTATUS          (PKUNITY_UMAL_BASE + 0x003c)
-/*
- * MAC address (high 4 bytes) UMAL_STADDR1
- */
-#define UMAL_STADDR1           (PKUNITY_UMAL_BASE + 0x0040)
-/*
- * MAC address (low 2 bytes) UMAL_STADDR2
- */
-#define UMAL_STADDR2           (PKUNITY_UMAL_BASE + 0x0044)
-
-/* FIFO MODULE OF UMAL */
-/* UMAL's FIFO module provides data queuing for increased system level
- * throughput
- */
-#define UMAL_FIFOCFG0          (PKUNITY_UMAL_BASE + 0x0048)
-#define UMAL_FIFOCFG1          (PKUNITY_UMAL_BASE + 0x004c)
-#define UMAL_FIFOCFG2          (PKUNITY_UMAL_BASE + 0x0050)
-#define UMAL_FIFOCFG3          (PKUNITY_UMAL_BASE + 0x0054)
-#define UMAL_FIFOCFG4          (PKUNITY_UMAL_BASE + 0x0058)
-#define UMAL_FIFOCFG5          (PKUNITY_UMAL_BASE + 0x005c)
-#define UMAL_FIFORAM0          (PKUNITY_UMAL_BASE + 0x0060)
-#define UMAL_FIFORAM1          (PKUNITY_UMAL_BASE + 0x0064)
-#define UMAL_FIFORAM2          (PKUNITY_UMAL_BASE + 0x0068)
-#define UMAL_FIFORAM3          (PKUNITY_UMAL_BASE + 0x006c)
-#define UMAL_FIFORAM4          (PKUNITY_UMAL_BASE + 0x0070)
-#define UMAL_FIFORAM5          (PKUNITY_UMAL_BASE + 0x0074)
-#define UMAL_FIFORAM6          (PKUNITY_UMAL_BASE + 0x0078)
-#define UMAL_FIFORAM7          (PKUNITY_UMAL_BASE + 0x007c)
-
-/* MAHBE MODULE OF UMAL */
-/* UMAL's MAHBE module interfaces to the host system through 32-bit AHB Master
- * and Slave ports.Registers within the M-AHBE provide Control and Status
- * information concerning these transfers.
- */
-/*
- * Transmit Control UMAL_DMATxCtrl
- */
-#define UMAL_DMATxCtrl         (PKUNITY_UMAL_BASE + 0x0180)
-/*
- * Pointer to TX Descripter UMAL_DMATxDescriptor
- */
-#define UMAL_DMATxDescriptor   (PKUNITY_UMAL_BASE + 0x0184)
-/*
- * Status of Tx Packet Transfers UMAL_DMATxStatus
- */
-#define UMAL_DMATxStatus       (PKUNITY_UMAL_BASE + 0x0188)
-/*
- * Receive Control UMAL_DMARxCtrl
- */
-#define UMAL_DMARxCtrl         (PKUNITY_UMAL_BASE + 0x018c)
-/*
- * Pointer to Rx Descriptor UMAL_DMARxDescriptor
- */
-#define UMAL_DMARxDescriptor   (PKUNITY_UMAL_BASE + 0x0190)
-/*
- * Status of Rx Packet Transfers UMAL_DMARxStatus
- */
-#define UMAL_DMARxStatus       (PKUNITY_UMAL_BASE + 0x0194)
-/*
- * Interrupt Mask UMAL_DMAIntrMask
- */
-#define UMAL_DMAIntrMask       (PKUNITY_UMAL_BASE + 0x0198)
-/*
- * Interrupts, read only UMAL_DMAInterrupt
- */
-#define UMAL_DMAInterrupt      (PKUNITY_UMAL_BASE + 0x019c)
-
-/*
- * Commands for UMAL_CFG1 register
- */
-#define UMAL_CFG1_TXENABLE     FIELD(1, 1, 0)
-#define UMAL_CFG1_RXENABLE     FIELD(1, 1, 2)
-#define UMAL_CFG1_TXFLOWCTL    FIELD(1, 1, 4)
-#define UMAL_CFG1_RXFLOWCTL    FIELD(1, 1, 5)
-#define UMAL_CFG1_CONFLPBK     FIELD(1, 1, 8)
-#define UMAL_CFG1_RESET                FIELD(1, 1, 31)
-#define UMAL_CFG1_CONFFLCTL    (MAC_TX_FLOW_CTL | MAC_RX_FLOW_CTL)
-
-/*
- * Commands for UMAL_CFG2 register
- */
-#define UMAL_CFG2_FULLDUPLEX   FIELD(1, 1, 0)
-#define UMAL_CFG2_CRCENABLE    FIELD(1, 1, 1)
-#define UMAL_CFG2_PADCRC       FIELD(1, 1, 2)
-#define UMAL_CFG2_LENGTHCHECK  FIELD(1, 1, 4)
-#define UMAL_CFG2_MODEMASK     FMASK(2, 8)
-#define UMAL_CFG2_NIBBLEMODE   FIELD(1, 2, 8)
-#define UMAL_CFG2_BYTEMODE     FIELD(2, 2, 8)
-#define UMAL_CFG2_PREAMBLENMASK        FMASK(4, 12)
-#define UMAL_CFG2_DEFPREAMBLEN FIELD(7, 4, 12)
-#define UMAL_CFG2_FD100                (UMAL_CFG2_DEFPREAMBLEN | UMAL_CFG2_NIBBLEMODE \
-                               | UMAL_CFG2_LENGTHCHECK | UMAL_CFG2_PADCRC \
-                               | UMAL_CFG2_CRCENABLE | UMAL_CFG2_FULLDUPLEX)
-#define UMAL_CFG2_FD1000       (UMAL_CFG2_DEFPREAMBLEN | UMAL_CFG2_BYTEMODE \
-                               | UMAL_CFG2_LENGTHCHECK | UMAL_CFG2_PADCRC \
-                               | UMAL_CFG2_CRCENABLE | UMAL_CFG2_FULLDUPLEX)
-#define UMAL_CFG2_HD100                (UMAL_CFG2_DEFPREAMBLEN | UMAL_CFG2_NIBBLEMODE \
-                               | UMAL_CFG2_LENGTHCHECK | UMAL_CFG2_PADCRC \
-                               | UMAL_CFG2_CRCENABLE)
-
-/*
- * Command for UMAL_IFCTRL register
- */
-#define UMAL_IFCTRL_RESET      FIELD(1, 1, 31)
-
-/*
- * Command for UMAL_MIICFG register
- */
-#define UMAL_MIICFG_RESET      FIELD(1, 1, 31)
-
-/*
- * Command for UMAL_MIICMD register
- */
-#define UMAL_MIICMD_READ       FIELD(1, 1, 0)
-
-/*
- * Command for UMAL_MIIIDCT register
- */
-#define UMAL_MIIIDCT_BUSY      FIELD(1, 1, 0)
-#define UMAL_MIIIDCT_NOTVALID  FIELD(1, 1, 2)
-
-/*
- * Commands for DMATxCtrl regesters
- */
-#define UMAL_DMA_Enable                FIELD(1, 1, 0)
-
-/*
- * Commands for DMARxCtrl regesters
- */
-#define UMAL_DMAIntrMask_ENABLEHALFWORD        FIELD(1, 1, 16)
-
-/*
- * Command for DMARxStatus
- */
-#define CLR_RX_BUS_ERR         FIELD(1, 1, 3)
-#define CLR_RX_OVERFLOW                FIELD(1, 1, 2)
-#define CLR_RX_PKT             FIELD(1, 1, 0)
-
-/*
- * Command for DMATxStatus
- */
-#define CLR_TX_BUS_ERR         FIELD(1, 1, 3)
-#define CLR_TX_UNDERRUN                FIELD(1, 1, 1)
-#define CLR_TX_PKT             FIELD(1, 1, 0)
-
-/*
- * Commands for DMAIntrMask and DMAInterrupt register
- */
-#define INT_RX_MASK            FIELD(0xd, 4, 4)
-#define INT_TX_MASK            FIELD(0xb, 4, 0)
-
-#define INT_RX_BUS_ERR         FIELD(1, 1, 7)
-#define INT_RX_OVERFLOW                FIELD(1, 1, 6)
-#define INT_RX_PKT             FIELD(1, 1, 4)
-#define INT_TX_BUS_ERR         FIELD(1, 1, 3)
-#define INT_TX_UNDERRUN                FIELD(1, 1, 1)
-#define INT_TX_PKT             FIELD(1, 1, 0)
-
-/*
- * MARCOS of UMAL's descriptors
- */
-#define UMAL_DESC_PACKETSIZE_EMPTY     FIELD(1, 1, 31)
-#define UMAL_DESC_PACKETSIZE_NONEMPTY  FIELD(0, 1, 31)
-#define UMAL_DESC_PACKETSIZE_SIZEMASK  FMASK(12, 0)
-
diff --git a/arch/unicore32/include/mach/regs-unigfx.h b/arch/unicore32/include/mach/regs-unigfx.h
deleted file mode 100644 (file)
index 553d115..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PKUnity UNIGFX Registers
- */
-
-#define UDE_BASE      (PKUNITY_UNIGFX_BASE + 0x1400)
-#define UGE_BASE      (PKUNITY_UNIGFX_BASE + 0x0000)
-
-/*
- * command reg for UNIGFX DE
- */
-/*
- * control reg UDE_CFG
- */
-#define UDE_CFG       (UDE_BASE + 0x0000)
-/*
- * framebuffer start address reg UDE_FSA
- */
-#define UDE_FSA       (UDE_BASE + 0x0004)
-/*
- * line size reg UDE_LS
- */
-#define UDE_LS        (UDE_BASE + 0x0008)
-/*
- * pitch size reg UDE_PS
- */
-#define UDE_PS        (UDE_BASE + 0x000C)
-/*
- * horizontal active time reg UDE_HAT
- */
-#define UDE_HAT       (UDE_BASE + 0x0010)
-/*
- * horizontal blank time reg UDE_HBT
- */
-#define UDE_HBT       (UDE_BASE + 0x0014)
-/*
- * horizontal sync time reg UDE_HST
- */
-#define UDE_HST       (UDE_BASE + 0x0018)
-/*
- * vertival active time reg UDE_VAT
- */
-#define UDE_VAT       (UDE_BASE + 0x001C)
-/*
- * vertival blank time reg UDE_VBT
- */
-#define UDE_VBT       (UDE_BASE + 0x0020)
-/*
- * vertival sync time reg UDE_VST
- */
-#define UDE_VST       (UDE_BASE + 0x0024)
-/*
- * cursor position UDE_CXY
- */
-#define UDE_CXY       (UDE_BASE + 0x0028)
-/*
- * cursor front color UDE_CC0
- */
-#define UDE_CC0       (UDE_BASE + 0x002C)
-/*
- * cursor background color UDE_CC1
- */
-#define UDE_CC1       (UDE_BASE + 0x0030)
-/*
- * video position UDE_VXY
- */
-#define UDE_VXY       (UDE_BASE + 0x0034)
-/*
- * video start address reg UDE_VSA
- */
-#define UDE_VSA       (UDE_BASE + 0x0040)
-/*
- * video size reg UDE_VS
- */
-#define UDE_VS        (UDE_BASE + 0x004C)
-
-/*
- * command reg for UNIGFX GE
- */
-/*
- * src xy reg UGE_SRCXY
- */
-#define UGE_SRCXY     (UGE_BASE + 0x0000)
-/*
- * dst xy reg UGE_DSTXY
- */
-#define UGE_DSTXY     (UGE_BASE + 0x0004)
-/*
- * pitch reg UGE_PITCH
- */
-#define UGE_PITCH     (UGE_BASE + 0x0008)
-/*
- * src start reg UGE_SRCSTART
- */
-#define UGE_SRCSTART  (UGE_BASE + 0x000C)
-/*
- * dst start reg UGE_DSTSTART
- */
-#define UGE_DSTSTART  (UGE_BASE + 0x0010)
-/*
- * width height reg UGE_WIDHEIGHT
- */
-#define UGE_WIDHEIGHT (UGE_BASE + 0x0014)
-/*
- * rop alpah reg UGE_ROPALPHA
- */
-#define UGE_ROPALPHA  (UGE_BASE + 0x0018)
-/*
- * front color UGE_FCOLOR
- */
-#define UGE_FCOLOR    (UGE_BASE + 0x001C)
-/*
- * background color UGE_BCOLOR
- */
-#define UGE_BCOLOR    (UGE_BASE + 0x0020)
-/*
- * src color key for high value UGE_SCH
- */
-#define UGE_SCH       (UGE_BASE + 0x0024)
-/*
- * dst color key for high value UGE_DCH
- */
-#define UGE_DCH       (UGE_BASE + 0x0028)
-/*
- * src color key for low value UGE_SCL
- */
-#define UGE_SCL       (UGE_BASE + 0x002C)
-/*
- * dst color key for low value UGE_DCL
- */
-#define UGE_DCL       (UGE_BASE + 0x0030)
-/*
- * clip 0 reg UGE_CLIP0
- */
-#define UGE_CLIP0     (UGE_BASE + 0x0034)
-/*
- * clip 1 reg UGE_CLIP1
- */
-#define UGE_CLIP1     (UGE_BASE + 0x0038)
-/*
- * command reg UGE_COMMAND
- */
-#define UGE_COMMAND   (UGE_BASE + 0x003C)
-/*
- * pattern 0 UGE_P0
- */
-#define UGE_P0        (UGE_BASE + 0x0040)
-#define UGE_P1        (UGE_BASE + 0x0044)
-#define UGE_P2        (UGE_BASE + 0x0048)
-#define UGE_P3        (UGE_BASE + 0x004C)
-#define UGE_P4        (UGE_BASE + 0x0050)
-#define UGE_P5        (UGE_BASE + 0x0054)
-#define UGE_P6        (UGE_BASE + 0x0058)
-#define UGE_P7        (UGE_BASE + 0x005C)
-#define UGE_P8        (UGE_BASE + 0x0060)
-#define UGE_P9        (UGE_BASE + 0x0064)
-#define UGE_P10       (UGE_BASE + 0x0068)
-#define UGE_P11       (UGE_BASE + 0x006C)
-#define UGE_P12       (UGE_BASE + 0x0070)
-#define UGE_P13       (UGE_BASE + 0x0074)
-#define UGE_P14       (UGE_BASE + 0x0078)
-#define UGE_P15       (UGE_BASE + 0x007C)
-#define UGE_P16       (UGE_BASE + 0x0080)
-#define UGE_P17       (UGE_BASE + 0x0084)
-#define UGE_P18       (UGE_BASE + 0x0088)
-#define UGE_P19       (UGE_BASE + 0x008C)
-#define UGE_P20       (UGE_BASE + 0x0090)
-#define UGE_P21       (UGE_BASE + 0x0094)
-#define UGE_P22       (UGE_BASE + 0x0098)
-#define UGE_P23       (UGE_BASE + 0x009C)
-#define UGE_P24       (UGE_BASE + 0x00A0)
-#define UGE_P25       (UGE_BASE + 0x00A4)
-#define UGE_P26       (UGE_BASE + 0x00A8)
-#define UGE_P27       (UGE_BASE + 0x00AC)
-#define UGE_P28       (UGE_BASE + 0x00B0)
-#define UGE_P29       (UGE_BASE + 0x00B4)
-#define UGE_P30       (UGE_BASE + 0x00B8)
-#define UGE_P31       (UGE_BASE + 0x00BC)
-
-#define UDE_CFG_DST_MASK       FMASK(2, 8)
-#define UDE_CFG_DST8            FIELD(0x0, 2, 8)
-#define UDE_CFG_DST16           FIELD(0x1, 2, 8)
-#define UDE_CFG_DST24           FIELD(0x2, 2, 8)
-#define UDE_CFG_DST32           FIELD(0x3, 2, 8)
-
-/*
- * GDEN enable UDE_CFG_GDEN_ENABLE
- */
-#define UDE_CFG_GDEN_ENABLE     FIELD(1, 1, 3)
-/*
- * VDEN enable UDE_CFG_VDEN_ENABLE
- */
-#define UDE_CFG_VDEN_ENABLE     FIELD(1, 1, 4)
-/*
- * CDEN enable UDE_CFG_CDEN_ENABLE
- */
-#define UDE_CFG_CDEN_ENABLE     FIELD(1, 1, 5)
-/*
- * TIMEUP enable UDE_CFG_TIMEUP_ENABLE
- */
-#define UDE_CFG_TIMEUP_ENABLE   FIELD(1, 1, 6)
diff --git a/arch/unicore32/include/mach/uncompress.h b/arch/unicore32/include/mach/uncompress.h
deleted file mode 100644 (file)
index 0c1a56a..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/include/mach/uncompress.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#ifndef __MACH_PUV3_UNCOMPRESS_H__
-#define __MACH_PUV3_UNCOMPRESS_H__
-
-#include <mach/hardware.h>
-#include <mach/ocd.h>
-
-extern char input_data[];
-extern char input_data_end[];
-
-static void arch_decomp_puts(const char *ptr)
-{
-       char c;
-
-       while ((c = *ptr++) != '\0') {
-               if (c == '\n')
-                       putc('\r');
-               putc(c);
-       }
-}
-#define ARCH_HAVE_DECOMP_PUTS
-
-#endif /* __MACH_PUV3_UNCOMPRESS_H__ */
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
deleted file mode 100644 (file)
index e784701..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += ucontext.h
diff --git a/arch/unicore32/include/uapi/asm/byteorder.h b/arch/unicore32/include/uapi/asm/byteorder.h
deleted file mode 100644 (file)
index 864fe48..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * linux/arch/unicore32/include/asm/byteorder.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * UniCore ONLY support Little Endian mode, the data bus is connected such
- * that byte accesses appear as:
- *  0 = d0...d7, 1 = d8...d15, 2 = d16...d23, 3 = d24...d31
- * and word accesses (data or instruction) appear as:
- *  d0...d31
- */
-#ifndef __UNICORE_BYTEORDER_H__
-#define __UNICORE_BYTEORDER_H__
-
-#include <linux/byteorder/little_endian.h>
-
-#endif
-
diff --git a/arch/unicore32/include/uapi/asm/ptrace.h b/arch/unicore32/include/uapi/asm/ptrace.h
deleted file mode 100644 (file)
index 2820de8..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * linux/arch/unicore32/include/asm/ptrace.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef _UAPI__UNICORE_PTRACE_H__
-#define _UAPI__UNICORE_PTRACE_H__
-
-#define PTRACE_GET_THREAD_AREA 22
-
-/*
- * PSR bits
- */
-#define USER_MODE      0x00000010
-#define REAL_MODE      0x00000011
-#define INTR_MODE      0x00000012
-#define PRIV_MODE      0x00000013
-#define ABRT_MODE      0x00000017
-#define EXTN_MODE      0x0000001b
-#define SUSR_MODE      0x0000001f
-#define MODE_MASK      0x0000001f
-#define PSR_R_BIT      0x00000040
-#define PSR_I_BIT      0x00000080
-#define PSR_V_BIT      0x10000000
-#define PSR_C_BIT      0x20000000
-#define PSR_Z_BIT      0x40000000
-#define PSR_S_BIT      0x80000000
-
-/*
- * Groups of PSR bits
- */
-#define PSR_f          0xff000000      /* Flags                */
-#define PSR_c          0x000000ff      /* Control              */
-
-#ifndef __ASSEMBLY__
-
-/*
- * This struct defines the way the registers are stored on the
- * stack during a system call.  Note that sizeof(struct pt_regs)
- * has to be a multiple of 8.
- */
-struct pt_regs {
-       unsigned long uregs[34];
-};
-
-#define UCreg_asr              uregs[32]
-#define UCreg_pc               uregs[31]
-#define UCreg_lr               uregs[30]
-#define UCreg_sp               uregs[29]
-#define UCreg_ip               uregs[28]
-#define UCreg_fp               uregs[27]
-#define UCreg_26               uregs[26]
-#define UCreg_25               uregs[25]
-#define UCreg_24               uregs[24]
-#define UCreg_23               uregs[23]
-#define UCreg_22               uregs[22]
-#define UCreg_21               uregs[21]
-#define UCreg_20               uregs[20]
-#define UCreg_19               uregs[19]
-#define UCreg_18               uregs[18]
-#define UCreg_17               uregs[17]
-#define UCreg_16               uregs[16]
-#define UCreg_15               uregs[15]
-#define UCreg_14               uregs[14]
-#define UCreg_13               uregs[13]
-#define UCreg_12               uregs[12]
-#define UCreg_11               uregs[11]
-#define UCreg_10               uregs[10]
-#define UCreg_09               uregs[9]
-#define UCreg_08               uregs[8]
-#define UCreg_07               uregs[7]
-#define UCreg_06               uregs[6]
-#define UCreg_05               uregs[5]
-#define UCreg_04               uregs[4]
-#define UCreg_03               uregs[3]
-#define UCreg_02               uregs[2]
-#define UCreg_01               uregs[1]
-#define UCreg_00               uregs[0]
-#define UCreg_ORIG_00          uregs[33]
-
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _UAPI__UNICORE_PTRACE_H__ */
diff --git a/arch/unicore32/include/uapi/asm/sigcontext.h b/arch/unicore32/include/uapi/asm/sigcontext.h
deleted file mode 100644 (file)
index 79e56f2..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * linux/arch/unicore32/include/asm/sigcontext.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __UNICORE_SIGCONTEXT_H__
-#define __UNICORE_SIGCONTEXT_H__
-
-#include <asm/ptrace.h>
-/*
- * Signal context structure - contains all info to do with the state
- * before the signal handler was invoked.  Note: only add new entries
- * to the end of the structure.
- */
-struct sigcontext {
-       unsigned long trap_no;
-       unsigned long error_code;
-       unsigned long oldmask;
-       unsigned long fault_address;
-       struct pt_regs regs;
-};
-
-#endif
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
deleted file mode 100644 (file)
index 54a7378..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * linux/arch/unicore32/include/asm/unistd.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define __ARCH_WANT_RENAMEAT
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_TIME32_SYSCALLS
-
-/* Use the standard ABI for syscalls. */
-#include <asm-generic/unistd.h>
-#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/unicore32/kernel/Makefile b/arch/unicore32/kernel/Makefile
deleted file mode 100644 (file)
index 2f79aa5..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the linux kernel.
-#
-
-# Object file lists.
-obj-y                          := dma.o elf.o entry.o process.o ptrace.o
-obj-y                          += setup.o signal.o sys.o stacktrace.o traps.o
-
-obj-$(CONFIG_MODULES)          += ksyms.o module.o
-obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
-
-obj-$(CONFIG_UNICORE_FPU_F64)  += fpu-ucf64.o
-
-# obj-y for architecture PKUnity v3
-obj-$(CONFIG_ARCH_PUV3)                += clock.o irq.o time.o
-
-obj-$(CONFIG_PUV3_GPIO)                += gpio.o
-obj-$(CONFIG_PUV3_PM)          += pm.o sleep.o
-obj-$(CONFIG_HIBERNATION)      += hibernate.o hibernate_asm.o
-
-obj-$(CONFIG_PCI)              += pci.o
-
-# obj-y for specific machines
-obj-$(CONFIG_ARCH_PUV3)                += puv3-core.o
-obj-$(CONFIG_PUV3_NB0916)      += puv3-nb0916.o
-
-head-y                         := head.o
-obj-$(CONFIG_DEBUG_LL)         += debug.o
-
-extra-y                                := $(head-y) vmlinux.lds
diff --git a/arch/unicore32/kernel/asm-offsets.c b/arch/unicore32/kernel/asm-offsets.c
deleted file mode 100644 (file)
index f7d6722..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/asm-offsets.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/kbuild.h>
-#include <linux/suspend.h>
-#include <linux/thread_info.h>
-#include <asm/memory.h>
-#include <asm/suspend.h>
-
-/*
- * GCC 3.0, 3.1: general bad code generation.
- * GCC 3.2.0: incorrect function argument offset calculation.
- * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
- *     (http://gcc.gnu.org/PR8896) and incorrect structure
- *             initialisation in fs/jffs2/erase.c
- */
-#if (__GNUC__ < 4)
-#error Your compiler should upgrade to uc4
-#error Known good compilers: 4.2.2
-#endif
-
-int main(void)
-{
-       DEFINE(TSK_ACTIVE_MM,   offsetof(struct task_struct, active_mm));
-       BLANK();
-       DEFINE(TI_FLAGS,        offsetof(struct thread_info, flags));
-       DEFINE(TI_PREEMPT,      offsetof(struct thread_info, preempt_count));
-       DEFINE(TI_ADDR_LIMIT,   offsetof(struct thread_info, addr_limit));
-       DEFINE(TI_TASK,         offsetof(struct thread_info, task));
-       DEFINE(TI_CPU,          offsetof(struct thread_info, cpu));
-       DEFINE(TI_CPU_SAVE,     offsetof(struct thread_info, cpu_context));
-       DEFINE(TI_USED_CP,      offsetof(struct thread_info, used_cp));
-#ifdef CONFIG_UNICORE_FPU_F64
-       DEFINE(TI_FPSTATE,      offsetof(struct thread_info, fpstate));
-#endif
-       BLANK();
-       DEFINE(S_R0,            offsetof(struct pt_regs, UCreg_00));
-       DEFINE(S_R1,            offsetof(struct pt_regs, UCreg_01));
-       DEFINE(S_R2,            offsetof(struct pt_regs, UCreg_02));
-       DEFINE(S_R3,            offsetof(struct pt_regs, UCreg_03));
-       DEFINE(S_R4,            offsetof(struct pt_regs, UCreg_04));
-       DEFINE(S_R5,            offsetof(struct pt_regs, UCreg_05));
-       DEFINE(S_R6,            offsetof(struct pt_regs, UCreg_06));
-       DEFINE(S_R7,            offsetof(struct pt_regs, UCreg_07));
-       DEFINE(S_R8,            offsetof(struct pt_regs, UCreg_08));
-       DEFINE(S_R9,            offsetof(struct pt_regs, UCreg_09));
-       DEFINE(S_R10,           offsetof(struct pt_regs, UCreg_10));
-       DEFINE(S_R11,           offsetof(struct pt_regs, UCreg_11));
-       DEFINE(S_R12,           offsetof(struct pt_regs, UCreg_12));
-       DEFINE(S_R13,           offsetof(struct pt_regs, UCreg_13));
-       DEFINE(S_R14,           offsetof(struct pt_regs, UCreg_14));
-       DEFINE(S_R15,           offsetof(struct pt_regs, UCreg_15));
-       DEFINE(S_R16,           offsetof(struct pt_regs, UCreg_16));
-       DEFINE(S_R17,           offsetof(struct pt_regs, UCreg_17));
-       DEFINE(S_R18,           offsetof(struct pt_regs, UCreg_18));
-       DEFINE(S_R19,           offsetof(struct pt_regs, UCreg_19));
-       DEFINE(S_R20,           offsetof(struct pt_regs, UCreg_20));
-       DEFINE(S_R21,           offsetof(struct pt_regs, UCreg_21));
-       DEFINE(S_R22,           offsetof(struct pt_regs, UCreg_22));
-       DEFINE(S_R23,           offsetof(struct pt_regs, UCreg_23));
-       DEFINE(S_R24,           offsetof(struct pt_regs, UCreg_24));
-       DEFINE(S_R25,           offsetof(struct pt_regs, UCreg_25));
-       DEFINE(S_R26,           offsetof(struct pt_regs, UCreg_26));
-       DEFINE(S_FP,            offsetof(struct pt_regs, UCreg_fp));
-       DEFINE(S_IP,            offsetof(struct pt_regs, UCreg_ip));
-       DEFINE(S_SP,            offsetof(struct pt_regs, UCreg_sp));
-       DEFINE(S_LR,            offsetof(struct pt_regs, UCreg_lr));
-       DEFINE(S_PC,            offsetof(struct pt_regs, UCreg_pc));
-       DEFINE(S_PSR,           offsetof(struct pt_regs, UCreg_asr));
-       DEFINE(S_OLD_R0,        offsetof(struct pt_regs, UCreg_ORIG_00));
-       DEFINE(S_FRAME_SIZE,    sizeof(struct pt_regs));
-       BLANK();
-       DEFINE(VMA_VM_MM,       offsetof(struct vm_area_struct, vm_mm));
-       DEFINE(VMA_VM_FLAGS,    offsetof(struct vm_area_struct, vm_flags));
-       BLANK();
-       DEFINE(VM_EXEC,         VM_EXEC);
-       BLANK();
-       DEFINE(PAGE_SZ,         PAGE_SIZE);
-       BLANK();
-       DEFINE(SYS_ERROR0,      0x9f0000);
-       BLANK();
-       DEFINE(PBE_ADDRESS,             offsetof(struct pbe, address));
-       DEFINE(PBE_ORIN_ADDRESS,        offsetof(struct pbe, orig_address));
-       DEFINE(PBE_NEXT,                offsetof(struct pbe, next));
-       DEFINE(SWSUSP_CPU,              offsetof(struct swsusp_arch_regs, \
-                                                       cpu_context));
-#ifdef CONFIG_UNICORE_FPU_F64
-       DEFINE(SWSUSP_FPSTATE,          offsetof(struct swsusp_arch_regs, \
-                                                       fpstate));
-#endif
-       BLANK();
-       DEFINE(DMA_BIDIRECTIONAL,       DMA_BIDIRECTIONAL);
-       DEFINE(DMA_TO_DEVICE,           DMA_TO_DEVICE);
-       DEFINE(DMA_FROM_DEVICE,         DMA_FROM_DEVICE);
-       return 0;
-}
diff --git a/arch/unicore32/kernel/clock.c b/arch/unicore32/kernel/clock.c
deleted file mode 100644 (file)
index 41df6be..0000000
+++ /dev/null
@@ -1,387 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/clock.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <linux/clk.h>
-#include <linux/mutex.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-
-/*
- * Very simple clock implementation
- */
-struct clk {
-       struct list_head        node;
-       unsigned long           rate;
-       const char              *name;
-};
-
-static struct clk clk_ost_clk = {
-       .name           = "OST_CLK",
-       .rate           = CLOCK_TICK_RATE,
-};
-
-static struct clk clk_mclk_clk = {
-       .name           = "MAIN_CLK",
-};
-
-static struct clk clk_bclk32_clk = {
-       .name           = "BUS32_CLK",
-};
-
-static struct clk clk_ddr_clk = {
-       .name           = "DDR_CLK",
-};
-
-static struct clk clk_vga_clk = {
-       .name           = "VGA_CLK",
-};
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
-       struct clk *p, *clk = ERR_PTR(-ENOENT);
-
-       mutex_lock(&clocks_mutex);
-       list_for_each_entry(p, &clocks, node) {
-               if (strcmp(id, p->name) == 0) {
-                       clk = p;
-                       break;
-               }
-       }
-       mutex_unlock(&clocks_mutex);
-
-       return clk;
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
-
-int clk_enable(struct clk *clk)
-{
-       return 0;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-       return clk->rate;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-struct {
-       unsigned long rate;
-       unsigned long cfg;
-       unsigned long div;
-} vga_clk_table[] = {
-       {.rate =  25175000, .cfg = 0x00002001, .div = 0x9},
-       {.rate =  31500000, .cfg = 0x00002001, .div = 0x7},
-       {.rate =  40000000, .cfg = 0x00003801, .div = 0x9},
-       {.rate =  49500000, .cfg = 0x00003801, .div = 0x7},
-       {.rate =  65000000, .cfg = 0x00002c01, .div = 0x4},
-       {.rate =  78750000, .cfg = 0x00002400, .div = 0x7},
-       {.rate = 108000000, .cfg = 0x00002c01, .div = 0x2},
-       {.rate = 106500000, .cfg = 0x00003c01, .div = 0x3},
-       {.rate =  50650000, .cfg = 0x00106400, .div = 0x9},
-       {.rate =  61500000, .cfg = 0x00106400, .div = 0xa},
-       {.rate =  85500000, .cfg = 0x00002800, .div = 0x6},
-};
-
-struct {
-       unsigned long mrate;
-       unsigned long prate;
-} mclk_clk_table[] = {
-       {.mrate = 500000000, .prate = 0x00109801},
-       {.mrate = 525000000, .prate = 0x00104C00},
-       {.mrate = 550000000, .prate = 0x00105000},
-       {.mrate = 575000000, .prate = 0x00105400},
-       {.mrate = 600000000, .prate = 0x00105800},
-       {.mrate = 625000000, .prate = 0x00105C00},
-       {.mrate = 650000000, .prate = 0x00106000},
-       {.mrate = 675000000, .prate = 0x00106400},
-       {.mrate = 700000000, .prate = 0x00106800},
-       {.mrate = 725000000, .prate = 0x00106C00},
-       {.mrate = 750000000, .prate = 0x00107000},
-       {.mrate = 775000000, .prate = 0x00107400},
-       {.mrate = 800000000, .prate = 0x00107800},
-};
-
-int clk_set_rate(struct clk *clk, unsigned long rate)
-{
-       if (clk == &clk_vga_clk) {
-               unsigned long pll_vgacfg, pll_vgadiv;
-               int ret, i;
-
-               /* lookup vga_clk_table */
-               ret = -EINVAL;
-               for (i = 0; i < ARRAY_SIZE(vga_clk_table); i++) {
-                       if (rate == vga_clk_table[i].rate) {
-                               pll_vgacfg = vga_clk_table[i].cfg;
-                               pll_vgadiv = vga_clk_table[i].div;
-                               ret = 0;
-                               break;
-                       }
-               }
-
-               if (ret)
-                       return ret;
-
-               if (readl(PM_PLLVGACFG) == pll_vgacfg)
-                       return 0;
-
-               /* set pll vga cfg reg. */
-               writel(pll_vgacfg, PM_PLLVGACFG);
-
-               writel(PM_PMCR_CFBVGA, PM_PMCR);
-               while ((readl(PM_PLLDFCDONE) & PM_PLLDFCDONE_VGADFC)
-                               != PM_PLLDFCDONE_VGADFC)
-                       udelay(100); /* about 1ms */
-
-               /* set div cfg reg. */
-               writel(readl(PM_PCGR) | PM_PCGR_VGACLK, PM_PCGR);
-
-               writel((readl(PM_DIVCFG) & ~PM_DIVCFG_VGACLK_MASK)
-                               | PM_DIVCFG_VGACLK(pll_vgadiv), PM_DIVCFG);
-
-               writel(readl(PM_SWRESET) | PM_SWRESET_VGADIV, PM_SWRESET);
-               while ((readl(PM_SWRESET) & PM_SWRESET_VGADIV)
-                               == PM_SWRESET_VGADIV)
-                       udelay(100); /* 65536 bclk32, about 320us */
-
-               writel(readl(PM_PCGR) & ~PM_PCGR_VGACLK, PM_PCGR);
-       }
-#ifdef CONFIG_CPU_FREQ
-       if (clk == &clk_mclk_clk) {
-               u32 pll_rate, divstatus = readl(PM_DIVSTATUS);
-               int ret, i;
-
-               /* lookup mclk_clk_table */
-               ret = -EINVAL;
-               for (i = 0; i < ARRAY_SIZE(mclk_clk_table); i++) {
-                       if (rate == mclk_clk_table[i].mrate) {
-                               pll_rate = mclk_clk_table[i].prate;
-                               clk_mclk_clk.rate = mclk_clk_table[i].mrate;
-                               ret = 0;
-                               break;
-                       }
-               }
-
-               if (ret)
-                       return ret;
-
-               if (clk_mclk_clk.rate)
-                       clk_bclk32_clk.rate = clk_mclk_clk.rate
-                               / (((divstatus & 0x0000f000) >> 12) + 1);
-
-               /* set pll sys cfg reg. */
-               writel(pll_rate, PM_PLLSYSCFG);
-
-               writel(PM_PMCR_CFBSYS, PM_PMCR);
-               while ((readl(PM_PLLDFCDONE) & PM_PLLDFCDONE_SYSDFC)
-                               != PM_PLLDFCDONE_SYSDFC)
-                       udelay(100);
-                       /* about 1ms */
-       }
-#endif
-       return 0;
-}
-EXPORT_SYMBOL(clk_set_rate);
-
-int clk_register(struct clk *clk)
-{
-       mutex_lock(&clocks_mutex);
-       list_add(&clk->node, &clocks);
-       mutex_unlock(&clocks_mutex);
-       printk(KERN_DEFAULT "PKUnity PM: %s %lu.%02luM\n", clk->name,
-               (clk->rate)/1000000, (clk->rate)/10000 % 100);
-       return 0;
-}
-EXPORT_SYMBOL(clk_register);
-
-void clk_unregister(struct clk *clk)
-{
-       mutex_lock(&clocks_mutex);
-       list_del(&clk->node);
-       mutex_unlock(&clocks_mutex);
-}
-EXPORT_SYMBOL(clk_unregister);
-
-struct {
-       unsigned long prate;
-       unsigned long rate;
-} pllrate_table[] = {
-       {.prate = 0x00002001, .rate = 250000000},
-       {.prate = 0x00104801, .rate = 250000000},
-       {.prate = 0x00104C01, .rate = 262500000},
-       {.prate = 0x00002401, .rate = 275000000},
-       {.prate = 0x00105001, .rate = 275000000},
-       {.prate = 0x00105401, .rate = 287500000},
-       {.prate = 0x00002801, .rate = 300000000},
-       {.prate = 0x00105801, .rate = 300000000},
-       {.prate = 0x00105C01, .rate = 312500000},
-       {.prate = 0x00002C01, .rate = 325000000},
-       {.prate = 0x00106001, .rate = 325000000},
-       {.prate = 0x00106401, .rate = 337500000},
-       {.prate = 0x00003001, .rate = 350000000},
-       {.prate = 0x00106801, .rate = 350000000},
-       {.prate = 0x00106C01, .rate = 362500000},
-       {.prate = 0x00003401, .rate = 375000000},
-       {.prate = 0x00107001, .rate = 375000000},
-       {.prate = 0x00107401, .rate = 387500000},
-       {.prate = 0x00003801, .rate = 400000000},
-       {.prate = 0x00107801, .rate = 400000000},
-       {.prate = 0x00107C01, .rate = 412500000},
-       {.prate = 0x00003C01, .rate = 425000000},
-       {.prate = 0x00108001, .rate = 425000000},
-       {.prate = 0x00108401, .rate = 437500000},
-       {.prate = 0x00004001, .rate = 450000000},
-       {.prate = 0x00108801, .rate = 450000000},
-       {.prate = 0x00108C01, .rate = 462500000},
-       {.prate = 0x00004401, .rate = 475000000},
-       {.prate = 0x00109001, .rate = 475000000},
-       {.prate = 0x00109401, .rate = 487500000},
-       {.prate = 0x00004801, .rate = 500000000},
-       {.prate = 0x00109801, .rate = 500000000},
-       {.prate = 0x00104C00, .rate = 525000000},
-       {.prate = 0x00002400, .rate = 550000000},
-       {.prate = 0x00105000, .rate = 550000000},
-       {.prate = 0x00105400, .rate = 575000000},
-       {.prate = 0x00002800, .rate = 600000000},
-       {.prate = 0x00105800, .rate = 600000000},
-       {.prate = 0x00105C00, .rate = 625000000},
-       {.prate = 0x00002C00, .rate = 650000000},
-       {.prate = 0x00106000, .rate = 650000000},
-       {.prate = 0x00106400, .rate = 675000000},
-       {.prate = 0x00003000, .rate = 700000000},
-       {.prate = 0x00106800, .rate = 700000000},
-       {.prate = 0x00106C00, .rate = 725000000},
-       {.prate = 0x00003400, .rate = 750000000},
-       {.prate = 0x00107000, .rate = 750000000},
-       {.prate = 0x00107400, .rate = 775000000},
-       {.prate = 0x00003800, .rate = 800000000},
-       {.prate = 0x00107800, .rate = 800000000},
-       {.prate = 0x00107C00, .rate = 825000000},
-       {.prate = 0x00003C00, .rate = 850000000},
-       {.prate = 0x00108000, .rate = 850000000},
-       {.prate = 0x00108400, .rate = 875000000},
-       {.prate = 0x00004000, .rate = 900000000},
-       {.prate = 0x00108800, .rate = 900000000},
-       {.prate = 0x00108C00, .rate = 925000000},
-       {.prate = 0x00004400, .rate = 950000000},
-       {.prate = 0x00109000, .rate = 950000000},
-       {.prate = 0x00109400, .rate = 975000000},
-       {.prate = 0x00004800, .rate = 1000000000},
-       {.prate = 0x00109800, .rate = 1000000000},
-};
-
-struct {
-       unsigned long prate;
-       unsigned long drate;
-} pddr_table[] = {
-       {.prate = 0x00100800, .drate = 44236800},
-       {.prate = 0x00100C00, .drate = 66355200},
-       {.prate = 0x00101000, .drate = 88473600},
-       {.prate = 0x00101400, .drate = 110592000},
-       {.prate = 0x00101800, .drate = 132710400},
-       {.prate = 0x00101C01, .drate = 154828800},
-       {.prate = 0x00102001, .drate = 176947200},
-       {.prate = 0x00102401, .drate = 199065600},
-       {.prate = 0x00102801, .drate = 221184000},
-       {.prate = 0x00102C01, .drate = 243302400},
-       {.prate = 0x00103001, .drate = 265420800},
-       {.prate = 0x00103401, .drate = 287539200},
-       {.prate = 0x00103801, .drate = 309657600},
-       {.prate = 0x00103C01, .drate = 331776000},
-       {.prate = 0x00104001, .drate = 353894400},
-};
-
-static int __init clk_init(void)
-{
-#ifdef CONFIG_PUV3_PM
-       u32 pllrate, divstatus = readl(PM_DIVSTATUS);
-       u32 pcgr_val = readl(PM_PCGR);
-       int i;
-
-       pcgr_val |= PM_PCGR_BCLKMME | PM_PCGR_BCLKH264E | PM_PCGR_BCLKH264D
-                       | PM_PCGR_HECLK | PM_PCGR_HDCLK;
-       writel(pcgr_val, PM_PCGR);
-
-       pllrate = readl(PM_PLLSYSSTATUS);
-
-       /* lookup pmclk_table */
-       clk_mclk_clk.rate = 0;
-       for (i = 0; i < ARRAY_SIZE(pllrate_table); i++) {
-               if (pllrate == pllrate_table[i].prate) {
-                       clk_mclk_clk.rate = pllrate_table[i].rate;
-                       break;
-               }
-       }
-
-       if (clk_mclk_clk.rate)
-               clk_bclk32_clk.rate = clk_mclk_clk.rate /
-                       (((divstatus & 0x0000f000) >> 12) + 1);
-
-       pllrate = readl(PM_PLLDDRSTATUS);
-
-       /* lookup pddr_table */
-       clk_ddr_clk.rate = 0;
-       for (i = 0; i < ARRAY_SIZE(pddr_table); i++) {
-               if (pllrate == pddr_table[i].prate) {
-                       clk_ddr_clk.rate = pddr_table[i].drate;
-                       break;
-               }
-       }
-
-       pllrate = readl(PM_PLLVGASTATUS);
-
-       /* lookup pvga_table */
-       clk_vga_clk.rate = 0;
-       for (i = 0; i < ARRAY_SIZE(pllrate_table); i++) {
-               if (pllrate == pllrate_table[i].prate) {
-                       clk_vga_clk.rate = pllrate_table[i].rate;
-                       break;
-               }
-       }
-
-       if (clk_vga_clk.rate)
-               clk_vga_clk.rate = clk_vga_clk.rate /
-                       (((divstatus & 0x00f00000) >> 20) + 1);
-
-       clk_register(&clk_vga_clk);
-#endif
-#ifdef CONFIG_ARCH_FPGA
-       clk_ddr_clk.rate = 33000000;
-       clk_mclk_clk.rate = 33000000;
-       clk_bclk32_clk.rate = 33000000;
-#endif
-       clk_register(&clk_ddr_clk);
-       clk_register(&clk_mclk_clk);
-       clk_register(&clk_bclk32_clk);
-       clk_register(&clk_ost_clk);
-       return 0;
-}
-core_initcall(clk_init);
diff --git a/arch/unicore32/kernel/debug-macro.S b/arch/unicore32/kernel/debug-macro.S
deleted file mode 100644 (file)
index 7e2da0d..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/debug-macro.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * Debugging macro include header
- */
-#include <generated/asm-offsets.h>
-#include <mach/hardware.h>
-
-               .macro  put_word_ocd, rd, rx=r16
-1001:          movc            \rx, p1.c0, #0
-               cand.a  \rx, #2
-               bne     1001b
-               movc            p1.c1, \rd, #1
-               .endm
-
-#ifdef CONFIG_DEBUG_OCD
-               /* debug using UniCore On-Chip-Debugger */
-               .macro  addruart, rx
-               .endm
-
-               .macro  senduart, rd, rx
-               put_word_ocd    \rd, \rx
-               .endm
-
-               .macro  busyuart, rd, rx
-               .endm
-
-               .macro  waituart, rd, rx
-               .endm
-#else
-#define UART_CLK_DEFAULT        3686400 * 20
-       /* Uartclk = MCLK/ 2, The MCLK on my board is 3686400 * 40  */
-#define BAUD_RATE_DEFAULT      115200
-       /* The baud rate of the serial port */
-
-#define UART_DIVISOR_DEFAULT   (UART_CLK_DEFAULT \
-                               / (16 * BAUD_RATE_DEFAULT) - 1)
-
-               .macro  addruart,rx
-               mrc     p0, #0, \rx, c1, c0
-               tst     \rx, #1                 @ MMU enabled?
-               moveq   \rx, #0xee000000        @ physical base address
-               movne   \rx, #0x6e000000        @ virtual address
-
-               @ We probe for the active serial port here
-               @ However, now we assume UART0 is active:       epip4d
-               @ We assume r1 and r2 can be clobbered.
-
-               movl    r2, #UART_DIVISOR_DEFAULT
-               mov     r1, #0x80
-               str     r1, [\rx, #UART_LCR_OFFSET]
-               and     r1, r2, #0xff00
-               mov     r1, r1, lsr #8
-               str     r1, [\rx, #UART_DLH_OFFSET]
-               and     r1, r2, #0xff
-               str     r1, [\rx, #UART_DLL_OFFSET]
-               mov     r1, #0x7
-               str     r1, [\rx, #UART_FCR_OFFSET]
-               mov     r1, #0x3
-               str     r1, [\rx, #UART_LCR_OFFSET]
-               mov     r1, #0x0
-               str     r1, [\rx, #UART_IER_OFFSET]
-               .endm
-
-               .macro  senduart,rd,rx
-               str     \rd, [\rx, #UART_THR_OFFSET]
-               .endm
-
-               .macro  waituart,rd,rx
-1001:          ldr     \rd, [\rx, #UART_LSR_OFFSET]
-               tst     \rd, #UART_LSR_THRE
-               beq     1001b
-               .endm
-
-               .macro  busyuart,rd,rx
-1001:          ldr     \rd, [\rx, #UART_LSR_OFFSET]
-               tst     \rd, #UART_LSR_TEMT
-               bne     1001b
-               .endm
-#endif
-
diff --git a/arch/unicore32/kernel/debug.S b/arch/unicore32/kernel/debug.S
deleted file mode 100644 (file)
index 13bc8c8..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/debug.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  32-bit debugging code
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-               .text
-
-/*
- * Some debugging routines (useful if you've got MM problems and
- * printk isn't working).  For DEBUGGING ONLY!!!  Do not leave
- * references to these in a production kernel!
- */
-#include "debug-macro.S"
-
-/*
- * Useful debugging routines
- */
-ENTRY(printhex8)
-               mov     r1, #8
-               b       printhex
-ENDPROC(printhex8)
-
-ENTRY(printhex4)
-               mov     r1, #4
-               b       printhex
-ENDPROC(printhex4)
-
-ENTRY(printhex2)
-               mov     r1, #2
-printhex:      adr     r2, hexbuf
-               add     r3, r2, r1
-               mov     r1, #0
-               stb     r1, [r3]
-1:             and     r1, r0, #15
-               mov     r0, r0 >> #4
-               csub.a  r1, #10
-               beg     2f
-               add     r1, r1, #'0' - 'a' + 10
-2:             add     r1, r1, #'a' - 10
-               stb.w   r1, [r3+], #-1
-               cxor.a  r3, r2
-               bne     1b
-               mov     r0, r2
-               b       printascii
-ENDPROC(printhex2)
-
-               .ltorg
-
-ENTRY(printascii)
-               addruart r3
-               b       2f
-1:             waituart r2, r3
-               senduart r1, r3
-               busyuart r2, r3
-               cxor.a  r1, #'\n'
-               cmoveq  r1, #'\r'
-               beq     1b
-2:             cxor.a  r0, #0
-               beq     3f
-               ldb.w   r1, [r0]+, #1
-               cxor.a  r1, #0
-               bne     1b
-3:             mov     pc, lr
-ENDPROC(printascii)
-
-ENTRY(printch)
-               addruart r3
-               mov     r1, r0
-               mov     r0, #0
-               b       1b
-ENDPROC(printch)
-
-hexbuf:                .space 16
-
diff --git a/arch/unicore32/kernel/dma.c b/arch/unicore32/kernel/dma.c
deleted file mode 100644 (file)
index 7a0e2d4..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/dma.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <mach/dma.h>
-
-struct dma_channel {
-       char *name;
-       puv3_dma_prio prio;
-       void (*irq_handler)(int, void *);
-       void (*err_handler)(int, void *);
-       void *data;
-};
-
-static struct dma_channel dma_channels[MAX_DMA_CHANNELS];
-
-int puv3_request_dma(char *name, puv3_dma_prio prio,
-                        void (*irq_handler)(int, void *),
-                        void (*err_handler)(int, void *),
-                        void *data)
-{
-       unsigned long flags;
-       int i, found = 0;
-
-       /* basic sanity checks */
-       if (!name)
-               return -EINVAL;
-
-       local_irq_save(flags);
-
-       do {
-               /* try grabbing a DMA channel with the requested priority */
-               for (i = 0; i < MAX_DMA_CHANNELS; i++) {
-                       if ((dma_channels[i].prio == prio) &&
-                           !dma_channels[i].name) {
-                               found = 1;
-                               break;
-                       }
-               }
-               /* if requested prio group is full, try a hier priority */
-       } while (!found && prio--);
-
-       if (found) {
-               dma_channels[i].name = name;
-               dma_channels[i].irq_handler = irq_handler;
-               dma_channels[i].err_handler = err_handler;
-               dma_channels[i].data = data;
-       } else {
-               printk(KERN_WARNING "No more available DMA channels for %s\n",
-                               name);
-               i = -ENODEV;
-       }
-
-       local_irq_restore(flags);
-       return i;
-}
-EXPORT_SYMBOL(puv3_request_dma);
-
-void puv3_free_dma(int dma_ch)
-{
-       unsigned long flags;
-
-       if (!dma_channels[dma_ch].name) {
-               printk(KERN_CRIT
-                       "%s: trying to free channel %d which is already freed\n",
-                       __func__, dma_ch);
-               return;
-       }
-
-       local_irq_save(flags);
-       dma_channels[dma_ch].name = NULL;
-       dma_channels[dma_ch].err_handler = NULL;
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(puv3_free_dma);
-
-static irqreturn_t dma_irq_handler(int irq, void *dev_id)
-{
-       int i, dint;
-
-       dint = readl(DMAC_ITCSR);
-       for (i = 0; i < MAX_DMA_CHANNELS; i++) {
-               if (dint & DMAC_CHANNEL(i)) {
-                       struct dma_channel *channel = &dma_channels[i];
-
-                       /* Clear TC interrupt of channel i */
-                       writel(DMAC_CHANNEL(i), DMAC_ITCCR);
-                       writel(0, DMAC_ITCCR);
-
-                       if (channel->name && channel->irq_handler) {
-                               channel->irq_handler(i, channel->data);
-                       } else {
-                               /*
-                                * IRQ for an unregistered DMA channel:
-                                * let's clear the interrupts and disable it.
-                                */
-                               printk(KERN_WARNING "spurious IRQ for"
-                                               " DMA channel %d\n", i);
-                       }
-               }
-       }
-       return IRQ_HANDLED;
-}
-
-static irqreturn_t dma_err_handler(int irq, void *dev_id)
-{
-       int i, dint;
-
-       dint = readl(DMAC_IESR);
-       for (i = 0; i < MAX_DMA_CHANNELS; i++) {
-               if (dint & DMAC_CHANNEL(i)) {
-                       struct dma_channel *channel = &dma_channels[i];
-
-                       /* Clear Err interrupt of channel i */
-                       writel(DMAC_CHANNEL(i), DMAC_IECR);
-                       writel(0, DMAC_IECR);
-
-                       if (channel->name && channel->err_handler) {
-                               channel->err_handler(i, channel->data);
-                       } else {
-                               /*
-                                * IRQ for an unregistered DMA channel:
-                                * let's clear the interrupts and disable it.
-                                */
-                               printk(KERN_WARNING "spurious IRQ for"
-                                               " DMA channel %d\n", i);
-                       }
-               }
-       }
-       return IRQ_HANDLED;
-}
-
-int __init puv3_init_dma(void)
-{
-       int i, ret;
-
-       /* dma channel priorities on v8 processors:
-        * ch 0 - 1  <--> (0) DMA_PRIO_HIGH
-        * ch 2 - 3  <--> (1) DMA_PRIO_MEDIUM
-        * ch 4 - 5  <--> (2) DMA_PRIO_LOW
-        */
-       for (i = 0; i < MAX_DMA_CHANNELS; i++) {
-               puv3_stop_dma(i);
-               dma_channels[i].name = NULL;
-               dma_channels[i].prio = min((i & 0x7) >> 1, DMA_PRIO_LOW);
-       }
-
-       ret = request_irq(IRQ_DMA, dma_irq_handler, 0, "DMA", NULL);
-       if (ret) {
-               printk(KERN_CRIT "Can't register IRQ for DMA\n");
-               return ret;
-       }
-
-       ret = request_irq(IRQ_DMAERR, dma_err_handler, 0, "DMAERR", NULL);
-       if (ret) {
-               printk(KERN_CRIT "Can't register IRQ for DMAERR\n");
-               free_irq(IRQ_DMA, "DMA");
-               return ret;
-       }
-
-       return 0;
-}
-
-postcore_initcall(puv3_init_dma);
diff --git a/arch/unicore32/kernel/early_printk.c b/arch/unicore32/kernel/early_printk.c
deleted file mode 100644 (file)
index c00b671..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/early_printk.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <mach/ocd.h>
-
-/* On-Chip-Debugger functions */
-
-static void early_ocd_write(struct console *con, const char *s, unsigned n)
-{
-       while (*s && n-- > 0) {
-               if (*s == '\n')
-                       ocd_putc((int)'\r');
-               ocd_putc((int)*s);
-               s++;
-       }
-}
-
-static struct console early_ocd_console = {
-       .name =         "earlyocd",
-       .write =        early_ocd_write,
-       .flags =        CON_PRINTBUFFER,
-       .index =        -1,
-};
-
-static int __init setup_early_printk(char *buf)
-{
-       if (!buf || early_console)
-               return 0;
-
-       early_console = &early_ocd_console;
-       if (strstr(buf, "keep"))
-               early_console->flags &= ~CON_BOOT;
-       else
-               early_console->flags |= CON_BOOT;
-       register_console(early_console);
-       return 0;
-}
-early_param("earlyprintk", setup_early_printk);
diff --git a/arch/unicore32/kernel/elf.c b/arch/unicore32/kernel/elf.c
deleted file mode 100644 (file)
index 22adc65..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/elf.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/personality.h>
-#include <linux/binfmts.h>
-#include <linux/elf.h>
-
-int elf_check_arch(const struct elf32_hdr *x)
-{
-       /* Make sure it's an UniCore executable */
-       if (x->e_machine != EM_UNICORE)
-               return 0;
-
-       /* Make sure the entry address is reasonable */
-       if (x->e_entry & 3)
-               return 0;
-
-       return 1;
-}
-EXPORT_SYMBOL(elf_check_arch);
-
-void elf_set_personality(const struct elf32_hdr *x)
-{
-       unsigned int personality = PER_LINUX;
-
-       set_personality(personality);
-}
-EXPORT_SYMBOL(elf_set_personality);
diff --git a/arch/unicore32/kernel/entry.S b/arch/unicore32/kernel/entry.S
deleted file mode 100644 (file)
index b35dc83..0000000
+++ /dev/null
@@ -1,802 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/entry.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  Low-level vector interface routines
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/unistd.h>
-#include <generated/asm-offsets.h>
-#include "debug-macro.S"
-
-@
-@ Most of the stack format comes from struct pt_regs, but with
-@ the addition of 8 bytes for storing syscall args 5 and 6.
-@
-#define S_OFF          8
-
-/*
- * The SWI code relies on the fact that R0 is at the bottom of the stack
- * (due to slow/fast restore user regs).
- */
-#if S_R0 != 0
-#error "Please fix"
-#endif
-
-       .macro  zero_fp
-#ifdef CONFIG_FRAME_POINTER
-       mov     fp, #0
-#endif
-       .endm
-
-       .macro  alignment_trap, rtemp
-#ifdef CONFIG_ALIGNMENT_TRAP
-       ldw     \rtemp, .LCcralign
-       ldw     \rtemp, [\rtemp]
-       movc    p0.c1, \rtemp, #0
-#endif
-       .endm
-
-       .macro  load_user_sp_lr, rd, rtemp, offset = 0
-       mov     \rtemp, asr
-       xor     \rtemp, \rtemp, #(PRIV_MODE ^ SUSR_MODE)
-       mov.a   asr, \rtemp                     @ switch to the SUSR mode
-
-       ldw     sp, [\rd+], #\offset            @ load sp_user
-       ldw     lr, [\rd+], #\offset + 4        @ load lr_user
-
-       xor     \rtemp, \rtemp, #(PRIV_MODE ^ SUSR_MODE)
-       mov.a   asr, \rtemp                     @ switch back to the PRIV mode
-       .endm
-
-       .macro  priv_exit, rpsr
-       mov.a   bsr, \rpsr
-       ldm.w   (r0 - r15), [sp]+
-       ldm.b   (r16 - pc), [sp]+               @ load r0 - pc, asr
-       .endm
-
-       .macro  restore_user_regs, fast = 0, offset = 0
-       ldw     r1, [sp+], #\offset + S_PSR     @ get calling asr
-       ldw     lr, [sp+], #\offset + S_PC      @ get pc
-       mov.a   bsr, r1                         @ save in bsr_priv
-       .if     \fast
-       add     sp, sp, #\offset + S_R1         @ r0 is syscall return value
-       ldm.w   (r1 - r15), [sp]+               @ get calling r1 - r15
-       ldur    (r16 - lr), [sp]+               @ get calling r16 - lr
-       .else
-       ldm.w   (r0 - r15), [sp]+               @ get calling r0 - r15
-       ldur    (r16 - lr), [sp]+               @ get calling r16 - lr
-       .endif
-       nop
-       add     sp, sp, #S_FRAME_SIZE - S_R16
-       mov.a   pc, lr                          @ return
-                                               @ and move bsr_priv into asr
-       .endm
-
-       .macro  get_thread_info, rd
-       mov     \rd, sp >> #13
-       mov     \rd, \rd << #13
-       .endm
-
-       .macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-       ldw     \base, =(PKUNITY_INTC_BASE)
-       ldw     \irqstat, [\base+], #0xC        @ INTC_ICIP
-       ldw     \tmp,     [\base+], #0x4        @ INTC_ICMR
-       and.a   \irqstat, \irqstat, \tmp
-       beq     1001f
-       cntlz   \irqnr, \irqstat
-       rsub    \irqnr, \irqnr, #31
-1001:  /* EQ will be set if no irqs pending */
-       .endm
-
-#ifdef CONFIG_DEBUG_LL
-       .macro  printreg, reg, temp
-               adr     \temp, 901f
-               stm     (r0-r3), [\temp]+
-               stw     lr, [\temp+], #0x10
-               mov     r0, \reg
-               b.l     printhex8
-               mov     r0, #':'
-               b.l     printch
-               mov     r0, pc
-               b.l     printhex8
-               adr     r0, 902f
-               b.l     printascii
-               adr     \temp, 901f
-               ldm     (r0-r3), [\temp]+
-               ldw     lr, [\temp+], #0x10
-               b       903f
-901:   .word   0, 0, 0, 0, 0   @ r0-r3, lr
-902:   .asciz  ": epip4d\n"
-       .align
-903:
-       .endm
-#endif
-
-/*
- * These are the registers used in the syscall handler, and allow us to
- * have in theory up to 7 arguments to a function - r0 to r6.
- *
- * Note that tbl == why is intentional.
- *
- * We must set at least "tsk" and "why" when calling ret_with_reschedule.
- */
-scno   .req    r21             @ syscall number
-tbl    .req    r22             @ syscall table pointer
-why    .req    r22             @ Linux syscall (!= 0)
-tsk    .req    r23             @ current thread_info
-
-/*
- * Interrupt handling.  Preserves r17, r18, r19
- */
-       .macro  intr_handler
-1:     get_irqnr_and_base r0, r6, r5, lr
-       beq     2f
-       mov     r1, sp
-       @
-       @ routine called with r0 = irq number, r1 = struct pt_regs *
-       @
-       adr     lr, 1b
-       b       asm_do_IRQ
-2:
-       .endm
-
-/*
- * PRIV mode handlers
- */
-       .macro  priv_entry
-       sub     sp, sp, #(S_FRAME_SIZE - 4)
-       stm     (r1 - r15), [sp]+
-       add     r5, sp, #S_R15
-       stm     (r16 - r28), [r5]+
-
-       ldm     (r1 - r3), [r0]+
-       add     r5, sp, #S_SP - 4       @ here for interlock avoidance
-       mov     r4, #-1                 @  ""  ""      ""       ""
-       add     r0, sp, #(S_FRAME_SIZE - 4)
-       stw.w   r1, [sp+], #-4          @ save the "real" r0 copied
-                                       @ from the exception stack
-
-       mov     r1, lr
-
-       @
-       @ We are now ready to fill in the remaining blanks on the stack:
-       @
-       @  r0 - sp_priv
-       @  r1 - lr_priv
-       @  r2 - lr_<exception>, already fixed up for correct return/restart
-       @  r3 - bsr_<exception>
-       @  r4 - orig_r0 (see pt_regs definition in ptrace.h)
-       @
-       stm     (r0 - r4), [r5]+
-       .endm
-
-/*
- * User mode handlers
- *
- */
-       .macro  user_entry
-       sub     sp, sp, #S_FRAME_SIZE
-       stm     (r1 - r15), [sp+]
-       add     r4, sp, #S_R16
-       stm     (r16 - r28), [r4]+
-
-       ldm     (r1 - r3), [r0]+
-       add     r0, sp, #S_PC           @ here for interlock avoidance
-       mov     r4, #-1                 @  ""  ""     ""        ""
-
-       stw     r1, [sp]                @ save the "real" r0 copied
-                                       @ from the exception stack
-
-       @
-       @ We are now ready to fill in the remaining blanks on the stack:
-       @
-       @  r2 - lr_<exception>, already fixed up for correct return/restart
-       @  r3 - bsr_<exception>
-       @  r4 - orig_r0 (see pt_regs definition in ptrace.h)
-       @
-       @ Also, separately save sp_user and lr_user
-       @
-       stm     (r2 - r4), [r0]+
-       stur    (sp, lr), [r0-]
-
-       @
-       @ Enable the alignment trap while in kernel mode
-       @
-       alignment_trap r0
-
-       @
-       @ Clear FP to mark the first stack frame
-       @
-       zero_fp
-       .endm
-
-       .text
-
-@
-@ __invalid - generic code for failed exception
-@                      (re-entrant version of handlers)
-@
-__invalid:
-       sub     sp, sp, #S_FRAME_SIZE
-       stm     (r1 - r15), [sp+]
-       add     r1, sp, #S_R16
-       stm     (r16 - r28, sp, lr), [r1]+
-
-       zero_fp
-
-       ldm     (r4 - r6), [r0]+
-       add     r0, sp, #S_PC           @ here for interlock avoidance
-       mov     r7, #-1                 @  ""   ""    ""        ""
-       stw     r4, [sp]                @ save preserved r0
-       stm     (r5 - r7), [r0]+        @ lr_<exception>,
-                                       @ asr_<exception>, "old_r0"
-
-       mov     r0, sp
-       mov     r1, asr
-       b       bad_mode
-ENDPROC(__invalid)
-
-       .align  5
-__dabt_priv:
-       priv_entry
-
-       @
-       @ get ready to re-enable interrupts if appropriate
-       @
-       mov     r17, asr
-       cand.a  r3, #PSR_I_BIT
-       bne     1f
-       andn    r17, r17, #PSR_I_BIT
-1:
-
-       @
-       @ Call the processor-specific abort handler:
-       @
-       @  r2 - aborted context pc
-       @  r3 - aborted context asr
-       @
-       @ The abort handler must return the aborted address in r0, and
-       @ the fault status register in r1.
-       @
-       movc    r1, p0.c3, #0           @ get FSR
-       movc    r0, p0.c4, #0           @ get FAR
-
-       @
-       @ set desired INTR state, then call main handler
-       @
-       mov.a   asr, r17
-       mov     r2, sp
-       b.l     do_DataAbort
-
-       @
-       @ INTRs off again before pulling preserved data off the stack
-       @
-       disable_irq r0
-
-       @
-       @ restore BSR and restart the instruction
-       @
-       ldw     r2, [sp+], #S_PSR
-       priv_exit r2                            @ return from exception
-ENDPROC(__dabt_priv)
-
-       .align  5
-__intr_priv:
-       priv_entry
-
-       intr_handler
-
-       mov     r0, #0                          @ epip4d
-       movc    p0.c5, r0, #14
-       nop; nop; nop; nop; nop; nop; nop; nop
-
-       ldw     r4, [sp+], #S_PSR               @ irqs are already disabled
-
-       priv_exit r4                            @ return from exception
-ENDPROC(__intr_priv)
-
-       .ltorg
-
-       .align  5
-__extn_priv:
-       priv_entry
-
-       mov     r0, sp                          @ struct pt_regs *regs
-       mov     r1, asr
-       b       bad_mode                        @ not supported
-ENDPROC(__extn_priv)
-
-       .align  5
-__pabt_priv:
-       priv_entry
-
-       @
-       @ re-enable interrupts if appropriate
-       @
-       mov     r17, asr
-       cand.a  r3, #PSR_I_BIT
-       bne     1f
-       andn    r17, r17, #PSR_I_BIT
-1:
-
-       @
-       @ set args, then call main handler
-       @
-       @  r0 - address of faulting instruction
-       @  r1 - pointer to registers on stack
-       @
-       mov     r0, r2                  @ pass address of aborted instruction
-       mov     r1, #5
-       mov.a   asr, r17
-       mov     r2, sp                  @ regs
-       b.l     do_PrefetchAbort        @ call abort handler
-
-       @
-       @ INTRs off again before pulling preserved data off the stack
-       @
-       disable_irq r0
-
-       @
-       @ restore BSR and restart the instruction
-       @
-       ldw     r2, [sp+], #S_PSR
-       priv_exit r2                    @ return from exception
-ENDPROC(__pabt_priv)
-
-       .align  5
-.LCcralign:
-       .word   cr_alignment
-
-       .align  5
-__dabt_user:
-       user_entry
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       cff     ip, s31
-       cand.a  ip, #0x08000000         @ FPU execption traps?
-       beq     209f
-
-       ldw     ip, [sp+], #S_PC
-       add     ip, ip, #4
-       stw     ip, [sp+], #S_PC
-       @
-       @ fall through to the emulation code, which returns using r19 if
-       @ it has emulated the instruction, or the more conventional lr
-       @ if we are to treat this as a real extended instruction
-       @
-       @  r0 - instruction
-       @
-1:     ldw.u   r0, [r2]
-       adr     r19, ret_from_exception
-       adr     lr, 209f
-       @
-       @ fallthrough to call do_uc_f64
-       @
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined.  The only instructions that should fault are the
- * co-processor instructions.
- *
- * Emulators may wish to make use of the following registers:
- *  r0  = instruction opcode.
- *  r2  = PC
- *  r19 = normal "successful" return address
- *  r20 = this threads thread_info structure.
- *  lr  = unrecognised instruction return address
- */
-       get_thread_info r20                     @ get current thread
-       and     r8, r0, #0x00003c00             @ mask out CP number
-       mov     r7, #1
-       stb     r7, [r20+], #TI_USED_CP + 2     @ set appropriate used_cp[]
-
-       @ F64 hardware support entry point.
-       @  r0  = faulted instruction
-       @  r19 = return address
-       @  r20 = fp_state
-       enable_irq r4
-       add     r20, r20, #TI_FPSTATE   @ r20 = workspace
-       cff     r1, s31                 @ get fpu FPSCR
-       andn    r2, r1, #0x08000000
-       ctf     r2, s31                 @ clear 27 bit
-       mov     r2, sp                  @ nothing stacked - regdump is at TOS
-       mov     lr, r19                 @ setup for a return to the user code
-
-       @ Now call the C code to package up the bounce to the support code
-       @   r0 holds the trigger instruction
-       @   r1 holds the FPSCR value
-       @   r2 pointer to register dump
-       b       ucf64_exchandler
-209:
-#endif
-       @
-       @ Call the processor-specific abort handler:
-       @
-       @  r2 - aborted context pc
-       @  r3 - aborted context asr
-       @
-       @ The abort handler must return the aborted address in r0, and
-       @ the fault status register in r1.
-       @
-       movc    r1, p0.c3, #0           @ get FSR
-       movc    r0, p0.c4, #0           @ get FAR
-
-       @
-       @ INTRs on, then call the main handler
-       @
-       enable_irq r2
-       mov     r2, sp
-       adr     lr, ret_from_exception
-       b       do_DataAbort
-ENDPROC(__dabt_user)
-
-       .align  5
-__intr_user:
-       user_entry
-
-       get_thread_info tsk
-
-       intr_handler
-
-       mov     why, #0
-       b       ret_to_user
-ENDPROC(__intr_user)
-
-       .ltorg
-
-       .align  5
-__extn_user:
-       user_entry
-
-       mov     r0, sp
-       mov     r1, asr
-       b       bad_mode
-ENDPROC(__extn_user)
-
-       .align  5
-__pabt_user:
-       user_entry
-
-       mov     r0, r2                  @ pass address of aborted instruction.
-       mov     r1, #5
-       enable_irq r1                   @ Enable interrupts
-       mov     r2, sp                  @ regs
-       b.l     do_PrefetchAbort        @ call abort handler
-       /* fall through */
-/*
- * This is the return code to user mode for abort handlers
- */
-ENTRY(ret_from_exception)
-       get_thread_info tsk
-       mov     why, #0
-       b       ret_to_user
-ENDPROC(__pabt_user)
-ENDPROC(ret_from_exception)
-
-/*
- * Register switch for UniCore V2 processors
- * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
- * previous and next are guaranteed not to be the same.
- */
-ENTRY(__switch_to)
-       add     ip, r1, #TI_CPU_SAVE
-       stm.w   (r4 - r15), [ip]+
-       stm.w   (r16 - r27, sp, lr), [ip]+
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       add     ip, r1, #TI_FPSTATE
-       sfm.w   (f0  - f7 ), [ip]+
-       sfm.w   (f8  - f15), [ip]+
-       sfm.w   (f16 - f23), [ip]+
-       sfm.w   (f24 - f31), [ip]+
-       cff     r4, s31
-       stw     r4, [ip]
-
-       add     ip, r2, #TI_FPSTATE
-       lfm.w   (f0  - f7 ), [ip]+
-       lfm.w   (f8  - f15), [ip]+
-       lfm.w   (f16 - f23), [ip]+
-       lfm.w   (f24 - f31), [ip]+
-       ldw     r4, [ip]
-       ctf     r4, s31
-#endif
-       add     ip, r2, #TI_CPU_SAVE
-       ldm.w   (r4 - r15), [ip]+
-       ldm     (r16 - r27, sp, pc), [ip]+      @ Load all regs saved previously
-ENDPROC(__switch_to)
-
-       .align  5
-/*
- * This is the fast syscall return path.  We do as little as
- * possible here, and this includes saving r0 back into the PRIV
- * stack.
- */
-ret_fast_syscall:
-       disable_irq r1                          @ disable interrupts
-       ldw     r1, [tsk+], #TI_FLAGS
-       cand.a  r1, #_TIF_WORK_MASK
-       bne     fast_work_pending
-
-       @ fast_restore_user_regs
-       restore_user_regs fast = 1, offset = S_OFF
-
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
-fast_work_pending:
-       stw.w   r0, [sp+], #S_R0+S_OFF          @ returned r0
-work_pending:
-       cand.a  r1, #_TIF_NEED_RESCHED
-       bne     work_resched
-       mov     r0, sp                          @ 'regs'
-       mov     r2, why                         @ 'syscall'
-       cand.a  r1, #_TIF_SIGPENDING            @ delivering a signal?
-       cmovne  why, #0                         @ prevent further restarts
-       b.l     do_notify_resume
-       b       ret_slow_syscall                @ Check work again
-
-work_resched:
-       b.l     schedule
-/*
- * "slow" syscall return path.  "why" tells us if this was a real syscall.
- */
-ENTRY(ret_to_user)
-ret_slow_syscall:
-       disable_irq r1                          @ disable interrupts
-       get_thread_info tsk                     @ epip4d, one path error?!
-       ldw     r1, [tsk+], #TI_FLAGS
-       cand.a  r1, #_TIF_WORK_MASK
-       bne     work_pending
-no_work_pending:
-       @ slow_restore_user_regs
-       restore_user_regs fast = 0, offset = 0
-ENDPROC(ret_to_user)
-
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-       b.l     schedule_tail
-       b       ret_slow_syscall
-ENDPROC(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-       b.l     schedule_tail
-       mov     r0, r5
-       adr     lr, ret_slow_syscall
-       mov     pc, r4
-ENDPROC(ret_from_kernel_thread)
-
-/*=============================================================================
- * SWI handler
- *-----------------------------------------------------------------------------
- */
-       .align  5
-ENTRY(vector_swi)
-       sub     sp, sp, #S_FRAME_SIZE
-       stm     (r0 - r15), [sp]+               @ Calling r0 - r15
-       add     r8, sp, #S_R16
-       stm     (r16 - r28), [r8]+              @ Calling r16 - r28
-       add     r8, sp, #S_PC
-       stur    (sp, lr), [r8-]                 @ Calling sp, lr
-       mov     r8, bsr                         @ called from non-REAL mode
-       stw     lr, [sp+], #S_PC                @ Save calling PC
-       stw     r8, [sp+], #S_PSR               @ Save ASR
-       stw     r0, [sp+], #S_OLD_R0            @ Save OLD_R0
-       zero_fp
-
-       /*
-        * Get the system call number.
-        */
-       sub     ip, lr, #4
-       ldw.u   scno, [ip]                      @ get SWI instruction
-
-#ifdef CONFIG_ALIGNMENT_TRAP
-       ldw     ip, __cr_alignment
-       ldw     ip, [ip]
-       movc    p0.c1, ip, #0                   @ update control register
-#endif
-       enable_irq ip
-
-       get_thread_info tsk
-       ldw     tbl, =sys_call_table            @ load syscall table pointer
-
-       andn    scno, scno, #0xff000000         @ mask off SWI op-code
-       andn    scno, scno, #0x00ff0000         @ mask off SWI op-code
-
-       stm.w   (r4, r5), [sp-]                 @ push fifth and sixth args
-       ldw     ip, [tsk+], #TI_FLAGS           @ check for syscall tracing
-       cand.a  ip, #_TIF_SYSCALL_TRACE         @ are we tracing syscalls?
-       bne     __sys_trace
-
-       csub.a  scno, #__NR_syscalls            @ check upper syscall limit
-       adr     lr, ret_fast_syscall            @ return address
-       bea     1f
-       ldw     pc, [tbl+], scno << #2          @ call sys_* routine
-1:
-       add     r1, sp, #S_OFF
-2:     mov     why, #0                         @ no longer a real syscall
-       b       sys_ni_syscall                  @ not private func
-
-       /*
-        * This is the really slow path.  We're going to be doing
-        * context switches, and waiting for our parent to respond.
-        */
-__sys_trace:
-       mov     r2, scno
-       add     r1, sp, #S_OFF
-       mov     r0, #0                          @ trace entry [IP = 0]
-       b.l     syscall_trace
-
-       adr     lr, __sys_trace_return          @ return address
-       mov     scno, r0                        @ syscall number (possibly new)
-       add     r1, sp, #S_R0 + S_OFF           @ pointer to regs
-       csub.a  scno, #__NR_syscalls            @ check upper syscall limit
-       bea     2b
-       ldm     (r0 - r3), [r1]+                @ have to reload r0 - r3
-       ldw     pc, [tbl+], scno << #2          @ call sys_* routine
-
-__sys_trace_return:
-       stw.w   r0, [sp+], #S_R0 + S_OFF        @ save returned r0
-       mov     r2, scno
-       mov     r1, sp
-       mov     r0, #1                          @ trace exit [IP = 1]
-       b.l     syscall_trace
-       b       ret_slow_syscall
-
-       .align  5
-#ifdef CONFIG_ALIGNMENT_TRAP
-       .type   __cr_alignment, #object
-__cr_alignment:
-       .word   cr_alignment
-#endif
-       .ltorg
-
-ENTRY(sys_rt_sigreturn)
-               add     r0, sp, #S_OFF
-               mov     why, #0         @ prevent syscall restart handling
-               b       __sys_rt_sigreturn
-ENDPROC(sys_rt_sigreturn)
-
-       __INIT
-
-/*
- * Vector stubs.
- *
- * This code is copied to 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.  Note that this code must not
- * exceed 0x300 bytes.
- *
- * Common stub entry macro:
- *   Enter in INTR mode, bsr = PRIV/USER ASR, lr = PRIV/USER PC
- *
- * SP points to a minimal amount of processor-private memory, the address
- * of which is copied into r0 for the mode specific abort handler.
- */
-       .macro  vector_stub, name, mode
-       .align  5
-
-vector_\name:
-       @
-       @ Save r0, lr_<exception> (parent PC) and bsr_<exception>
-       @ (parent ASR)
-       @
-       stw     r0, [sp]
-       stw     lr, [sp+], #4           @ save r0, lr
-       mov     lr, bsr
-       stw     lr, [sp+], #8           @ save bsr
-
-       @
-       @ Prepare for PRIV mode.  INTRs remain disabled.
-       @
-       mov     r0, asr
-       xor     r0, r0, #(\mode ^ PRIV_MODE)
-       mov.a   bsr, r0
-
-       @
-       @ the branch table must immediately follow this code
-       @
-       and     lr, lr, #0x03
-       add     lr, lr, #1
-       mov     r0, sp
-       ldw     lr, [pc+], lr << #2
-       mov.a   pc, lr                  @ branch to handler in PRIV mode
-ENDPROC(vector_\name)
-       .align  2
-       @ handler addresses follow this label
-       .endm
-
-       .globl  __stubs_start
-__stubs_start:
-/*
- * Interrupt dispatcher
- */
-       vector_stub     intr, INTR_MODE
-
-       .long   __intr_user                     @  0  (USER)
-       .long   __invalid                       @  1
-       .long   __invalid                       @  2
-       .long   __intr_priv                     @  3  (PRIV)
-
-/*
- * Data abort dispatcher
- * Enter in ABT mode, bsr = USER ASR, lr = USER PC
- */
-       vector_stub     dabt, ABRT_MODE
-
-       .long   __dabt_user                     @  0  (USER)
-       .long   __invalid                       @  1
-       .long   __invalid                       @  2  (INTR)
-       .long   __dabt_priv                     @  3  (PRIV)
-
-/*
- * Prefetch abort dispatcher
- * Enter in ABT mode, bsr = USER ASR, lr = USER PC
- */
-       vector_stub     pabt, ABRT_MODE
-
-       .long   __pabt_user                     @  0 (USER)
-       .long   __invalid                       @  1
-       .long   __invalid                       @  2 (INTR)
-       .long   __pabt_priv                     @  3 (PRIV)
-
-/*
- * Undef instr entry dispatcher
- * Enter in EXTN mode, bsr = PRIV/USER ASR, lr = PRIV/USER PC
- */
-       vector_stub     extn, EXTN_MODE
-
-       .long   __extn_user                     @  0 (USER)
-       .long   __invalid                       @  1
-       .long   __invalid                       @  2 (INTR)
-       .long   __extn_priv                     @  3 (PRIV)
-
-/*
- * We group all the following data together to optimise
- * for CPUs with separate I & D caches.
- */
-       .align  5
-
-.LCvswi:
-       .word   vector_swi
-
-       .globl  __stubs_end
-__stubs_end:
-
-       .equ    stubs_offset, __vectors_start + 0x200 - __stubs_start
-
-       .globl  __vectors_start
-__vectors_start:
-       jepriv  SYS_ERROR0
-       b       vector_extn + stubs_offset
-       ldw     pc, .LCvswi + stubs_offset
-       b       vector_pabt + stubs_offset
-       b       vector_dabt + stubs_offset
-       jepriv  SYS_ERROR0
-       b       vector_intr + stubs_offset
-       jepriv  SYS_ERROR0
-
-       .globl  __vectors_end
-__vectors_end:
-
-       .data
-
-       .globl  cr_alignment
-       .globl  cr_no_alignment
-cr_alignment:
-       .space  4
-cr_no_alignment:
-       .space  4
diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c
deleted file mode 100644 (file)
index 85f0af2..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/fpu-ucf64.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/sched/signal.h>
-#include <linux/init.h>
-
-#include <asm/fpu-ucf64.h>
-
-/*
- * A special flag to tell the normalisation code not to normalise.
- */
-#define F64_NAN_FLAG   0x100
-
-/*
- * A bit pattern used to indicate the initial (unset) value of the
- * exception mask, in case nothing handles an instruction.  This
- * doesn't include the NAN flag, which get masked out before
- * we check for an error.
- */
-#define F64_EXCEPTION_ERROR    ((u32)-1 & ~F64_NAN_FLAG)
-
-/*
- * Since we aren't building with -mfpu=f64, we need to code
- * these instructions using their MRC/MCR equivalents.
- */
-#define f64reg(_f64_) #_f64_
-
-#define cff(_f64_) ({                  \
-       u32 __v;                        \
-       asm("cff %0, " f64reg(_f64_) "@ fmrx    %0, " #_f64_    \
-           : "=r" (__v) : : "cc");     \
-       __v;                            \
-       })
-
-#define ctf(_f64_, _var_)              \
-       asm("ctf %0, " f64reg(_f64_) "@ fmxr    " #_f64_ ", %0" \
-          : : "r" (_var_) : "cc")
-
-/*
- * Raise a SIGFPE for the current process.
- * sicode describes the signal being raised.
- */
-void ucf64_raise_sigfpe(struct pt_regs *regs)
-{
-       /*
-        * This is the same as NWFPE, because it's not clear what
-        * this is used for
-        */
-       current->thread.error_code = 0;
-       current->thread.trap_no = 6;
-
-       send_sig_fault(SIGFPE, FPE_FLTUNK,
-                      (void __user *)(instruction_pointer(regs) - 4),
-                      current);
-}
-
-/*
- * Handle exceptions of UniCore-F64.
- */
-void ucf64_exchandler(u32 inst, u32 fpexc, struct pt_regs *regs)
-{
-       u32 tmp = fpexc;
-       u32 exc = F64_EXCEPTION_ERROR & fpexc;
-
-       pr_debug("UniCore-F64: instruction %08x fpscr %08x\n",
-                       inst, fpexc);
-
-       if (exc & FPSCR_CMPINSTR_BIT) {
-               if (exc & FPSCR_CON)
-                       tmp |= FPSCR_CON;
-               else
-                       tmp &= ~(FPSCR_CON);
-               exc &= ~(FPSCR_CMPINSTR_BIT | FPSCR_CON);
-       } else {
-               pr_debug("UniCore-F64 Error: unhandled exceptions\n");
-               pr_debug("UniCore-F64 FPSCR 0x%08x INST 0x%08x\n",
-                               cff(FPSCR), inst);
-
-               ucf64_raise_sigfpe(regs);
-               return;
-       }
-
-       /*
-        * Update the FPSCR with the additional exception flags.
-        * Comparison instructions always return at least one of
-        * these flags set.
-        */
-       tmp &= ~(FPSCR_TRAP | FPSCR_IOS | FPSCR_OFS | FPSCR_UFS |
-                       FPSCR_IXS | FPSCR_HIS | FPSCR_IOC | FPSCR_OFC |
-                       FPSCR_UFC | FPSCR_IXC | FPSCR_HIC);
-
-       tmp |= exc;
-       ctf(FPSCR, tmp);
-}
-
-/*
- * F64 support code initialisation.
- */
-static int __init ucf64_init(void)
-{
-       ctf(FPSCR, 0x0);     /* FPSCR_UFE | FPSCR_NDE perhaps better */
-
-       printk(KERN_INFO "Enable UniCore-F64 support.\n");
-
-       return 0;
-}
-
-late_initcall(ucf64_init);
diff --git a/arch/unicore32/kernel/gpio.c b/arch/unicore32/kernel/gpio.c
deleted file mode 100644 (file)
index 36d395b..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/gpio.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-/* in FPGA, no GPIO support */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/gpio/driver.h>
-/* FIXME: needed for gpio_set_value() - convert to use descriptors or hogs */
-#include <linux/gpio.h>
-#include <mach/hardware.h>
-
-#ifdef CONFIG_LEDS
-#include <linux/leds.h>
-#include <linux/platform_device.h>
-
-static const struct gpio_led puv3_gpio_leds[] = {
-       { .name = "cpuhealth", .gpio = GPO_CPU_HEALTH, .active_low = 0,
-               .default_trigger = "heartbeat", },
-       { .name = "hdd_led", .gpio = GPO_HDD_LED, .active_low = 1,
-               .default_trigger = "disk-activity", },
-};
-
-static const struct gpio_led_platform_data puv3_gpio_led_data = {
-       .num_leds =     ARRAY_SIZE(puv3_gpio_leds),
-       .leds =         (void *) puv3_gpio_leds,
-};
-
-static struct platform_device puv3_gpio_gpio_leds = {
-       .name =         "leds-gpio",
-       .id =           -1,
-       .dev = {
-               .platform_data = (void *) &puv3_gpio_led_data,
-       }
-};
-
-static int __init puv3_gpio_leds_init(void)
-{
-       platform_device_register(&puv3_gpio_gpio_leds);
-       return 0;
-}
-
-device_initcall(puv3_gpio_leds_init);
-#endif
-
-static int puv3_gpio_get(struct gpio_chip *chip, unsigned offset)
-{
-       return !!(readl(GPIO_GPLR) & GPIO_GPIO(offset));
-}
-
-static void puv3_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-       if (value)
-               writel(GPIO_GPIO(offset), GPIO_GPSR);
-       else
-               writel(GPIO_GPIO(offset), GPIO_GPCR);
-}
-
-static int puv3_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       writel(readl(GPIO_GPDR) & ~GPIO_GPIO(offset), GPIO_GPDR);
-       local_irq_restore(flags);
-       return 0;
-}
-
-static int puv3_direction_output(struct gpio_chip *chip, unsigned offset,
-               int value)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       puv3_gpio_set(chip, offset, value);
-       writel(readl(GPIO_GPDR) | GPIO_GPIO(offset), GPIO_GPDR);
-       local_irq_restore(flags);
-       return 0;
-}
-
-static struct gpio_chip puv3_gpio_chip = {
-       .label                  = "gpio",
-       .direction_input        = puv3_direction_input,
-       .direction_output       = puv3_direction_output,
-       .set                    = puv3_gpio_set,
-       .get                    = puv3_gpio_get,
-       .base                   = 0,
-       .ngpio                  = GPIO_MAX + 1,
-};
-
-void __init puv3_init_gpio(void)
-{
-       writel(GPIO_DIR, GPIO_GPDR);
-#if    defined(CONFIG_PUV3_NB0916) || defined(CONFIG_PUV3_SMW0919)     \
-       || defined(CONFIG_PUV3_DB0913)
-       gpio_set_value(GPO_WIFI_EN, 1);
-       gpio_set_value(GPO_HDD_LED, 1);
-       gpio_set_value(GPO_VGA_EN, 1);
-       gpio_set_value(GPO_LCD_EN, 1);
-       gpio_set_value(GPO_CAM_PWR_EN, 0);
-       gpio_set_value(GPO_LCD_VCC_EN, 1);
-       gpio_set_value(GPO_SOFT_OFF, 1);
-       gpio_set_value(GPO_BT_EN, 1);
-       gpio_set_value(GPO_FAN_ON, 0);
-       gpio_set_value(GPO_SPKR, 0);
-       gpio_set_value(GPO_CPU_HEALTH, 1);
-       gpio_set_value(GPO_LAN_SEL, 1);
-/*
- * DO NOT modify the GPO_SET_V1 and GPO_SET_V2 in kernel
- *     gpio_set_value(GPO_SET_V1, 1);
- *     gpio_set_value(GPO_SET_V2, 1);
- */
-#endif
-       gpiochip_add_data(&puv3_gpio_chip, NULL);
-}
diff --git a/arch/unicore32/kernel/head.S b/arch/unicore32/kernel/head.S
deleted file mode 100644 (file)
index 9bbb866..0000000
+++ /dev/null
@@ -1,249 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/head.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-#include <asm/ptrace.h>
-#include <generated/asm-offsets.h>
-#include <asm/memory.h>
-#include <asm/thread_info.h>
-#include <asm/hwdef-copro.h>
-#include <asm/pgtable-hwdef.h>
-
-#if (PHYS_OFFSET & 0x003fffff)
-#error "PHYS_OFFSET must be at an even 4MiB boundary!"
-#endif
-
-#define KERNEL_RAM_VADDR       (PAGE_OFFSET + KERNEL_IMAGE_START)
-#define KERNEL_RAM_PADDR       (PHYS_OFFSET + KERNEL_IMAGE_START)
-
-#define KERNEL_PGD_PADDR       (KERNEL_RAM_PADDR - 0x1000)
-#define KERNEL_PGD_VADDR       (KERNEL_RAM_VADDR - 0x1000)
-
-#define KERNEL_START           KERNEL_RAM_VADDR
-#define KERNEL_END             _end
-
-/*
- * swapper_pg_dir is the virtual address of the initial page table.
- * We place the page tables 4K below KERNEL_RAM_VADDR.  Therefore, we must
- * make sure that KERNEL_RAM_VADDR is correctly set.  Currently, we expect
- * the least significant 16 bits to be 0x8000, but we could probably
- * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x1000.
- */
-#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000
-#error KERNEL_RAM_VADDR must start at 0xXXXX8000
-#endif
-
-       .globl  swapper_pg_dir
-       .equ    swapper_pg_dir, KERNEL_RAM_VADDR - 0x1000
-
-/*
- * Kernel startup entry point.
- * ---------------------------
- *
- * This is normally called from the decompressor code.  The requirements
- * are: MMU = off, D-cache = off, I-cache = dont care
- *
- * This code is mostly position independent, so if you link the kernel at
- * 0xc0008000, you call this at __pa(0xc0008000).
- */
-       __HEAD
-ENTRY(stext)
-       @ set asr
-       mov     r0, #PRIV_MODE                  @ ensure priv mode
-       or      r0, #PSR_R_BIT | PSR_I_BIT      @ disable irqs
-       mov.a   asr, r0
-
-       @ process identify
-       movc    r0, p0.c0, #0                   @ cpuid
-       movl    r1, 0xff00ffff                  @ mask
-       movl    r2, 0x4d000863                  @ value
-       and     r0, r1, r0
-       cxor.a  r0, r2
-       bne     __error_p                       @ invalid processor id
-
-       /*
-        * Clear the 4K level 1 swapper page table
-        */
-       movl    r0, #KERNEL_PGD_PADDR           @ page table address
-       mov     r1, #0
-       add     r2, r0, #0x1000
-101:   stw.w   r1, [r0]+, #4
-       stw.w   r1, [r0]+, #4
-       stw.w   r1, [r0]+, #4
-       stw.w   r1, [r0]+, #4
-       cxor.a  r0, r2
-       bne     101b
-
-       movl    r4, #KERNEL_PGD_PADDR           @ page table address
-       mov     r7, #PMD_TYPE_SECT | PMD_PRESENT        @ page size: section
-       or      r7, r7, #PMD_SECT_CACHEABLE             @ cacheable
-       or      r7, r7, #PMD_SECT_READ | PMD_SECT_WRITE | PMD_SECT_EXEC
-
-       /*
-        * Create identity mapping for first 4MB of kernel to
-        * cater for the MMU enable.  This identity mapping
-        * will be removed by paging_init().  We use our current program
-        * counter to determine corresponding section base address.
-        */
-       mov     r6, pc
-       mov     r6, r6 >> #22                   @ start of kernel section
-       or      r1, r7, r6 << #22               @ flags + kernel base
-       stw     r1, [r4+], r6 << #2             @ identity mapping
-
-       /*
-        * Now setup the pagetables for our kernel direct
-        * mapped region.
-        */
-       add     r0, r4,  #(KERNEL_START & 0xff000000) >> 20
-       stw.w   r1, [r0+], #(KERNEL_START & 0x00c00000) >> 20
-       movl    r6, #(KERNEL_END - 1)
-       add     r0, r0, #4
-       add     r6, r4, r6 >> #20
-102:   csub.a  r0, r6
-       add     r1, r1, #1 << 22
-       bua     103f
-       stw.w   r1, [r0]+, #4
-       b       102b
-103:
-       /*
-        * Then map first 4MB of ram in case it contains our boot params.
-        */
-       add     r0, r4, #PAGE_OFFSET >> 20
-       or      r6, r7, #(PHYS_OFFSET & 0xffc00000)
-       stw     r6, [r0]
-
-       ldw     r15, __switch_data              @ address to jump to after
-
-       /*
-        * Initialise TLB, Caches, and MMU state ready to switch the MMU
-        * on.
-        */
-       mov     r0, #0
-       movc    p0.c5, r0, #28                  @ cache invalidate all
-       nop8
-       movc    p0.c6, r0, #6                   @ TLB invalidate all
-       nop8
-
-       /*
-        * ..V. .... ..TB IDAM
-        * ..1. .... ..01 1111
-        */
-       movl    r0, #0x201f                     @ control register setting
-
-       /*
-        * Setup common bits before finally enabling the MMU.  Essentially
-        * this is just loading the page table pointer and domain access
-        * registers.
-        */
-       #ifndef CONFIG_ALIGNMENT_TRAP
-               andn    r0, r0, #CR_A
-       #endif
-       #ifdef CONFIG_CPU_DCACHE_DISABLE
-               andn    r0, r0, #CR_D
-       #endif
-       #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-               andn    r0, r0, #CR_B
-       #endif
-       #ifdef CONFIG_CPU_ICACHE_DISABLE
-               andn    r0, r0, #CR_I
-       #endif
-
-       movc    p0.c2, r4, #0                   @ set pgd
-       b       __turn_mmu_on
-ENDPROC(stext)
-
-/*
- * Enable the MMU.  This completely changes the structure of the visible
- * memory space.  You will not be able to trace execution through this.
- *
- *  r0  = cp#0 control register
- *  r15 = *virtual* address to jump to upon completion
- */
-       .align  5
-__turn_mmu_on:
-       mov     r0, r0
-       movc    p0.c1, r0, #0                   @ write control reg
-       nop                                     @ fetch inst by phys addr
-       mov     pc, r15
-       nop8                                    @ fetch inst by phys addr
-ENDPROC(__turn_mmu_on)
-
-/*
- * Setup the initial page tables.  We only setup the barest
- * amount which are required to get the kernel running, which
- * generally means mapping in the kernel code.
- *
- * r9  = cpuid
- * r10 = procinfo
- *
- * Returns:
- *  r0, r3, r6, r7 corrupted
- *  r4 = physical page table address
- */
-       .ltorg
-
-       .align  2
-       .type   __switch_data, %object
-__switch_data:
-       .long   __mmap_switched
-       .long   __bss_start                     @ r6
-       .long   _end                            @ r7
-       .long   cr_alignment                    @ r8
-       .long   init_thread_union + THREAD_START_SP @ sp
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode,
- * and uses absolute addresses; this is not position independent.
- *
- *  r0  = cp#0 control register
- */
-__mmap_switched:
-       adr     r3, __switch_data + 4
-
-       ldm.w   (r6, r7, r8), [r3]+
-       ldw     sp, [r3]
-
-       mov     fp, #0                          @ Clear BSS (and zero fp)
-203:   csub.a  r6, r7
-       bea     204f
-       stw.w   fp, [r6]+,#4
-       b       203b
-204:
-       andn    r1, r0, #CR_A                   @ Clear 'A' bit
-       stm     (r0, r1), [r8]+                 @ Save control register values
-       b       start_kernel
-ENDPROC(__mmap_switched)
-
-/*
- * Exception handling.  Something went wrong and we can't proceed.  We
- * ought to tell the user, but since we don't have any guarantee that
- * we're even running on the right architecture, we do virtually nothing.
- *
- * If CONFIG_DEBUG_LL is set we try to print out something about the error
- * and hope for the best (useful if bootloader fails to pass a proper
- * machine ID for example).
- */
-__error_p:
-#ifdef CONFIG_DEBUG_LL
-       adr     r0, str_p1
-       b.l     printascii
-       mov     r0, r9
-       b.l     printhex8
-       adr     r0, str_p2
-       b.l     printascii
-901:   nop8
-       b       901b
-str_p1:        .asciz  "\nError: unrecognized processor variant (0x"
-str_p2:        .asciz  ").\n"
-       .align
-#endif
-ENDPROC(__error_p)
-
diff --git a/arch/unicore32/kernel/hibernate.c b/arch/unicore32/kernel/hibernate.c
deleted file mode 100644 (file)
index 4cdf3c8..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/arch/unicore32/kernel/hibernate.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/gfp.h>
-#include <linux/suspend.h>
-#include <linux/memblock.h>
-#include <linux/pgtable.h>
-
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/sections.h>
-#include <asm/suspend.h>
-
-#include "mach/pm.h"
-
-/* Pointer to the temporary resume page tables */
-pgd_t *resume_pg_dir;
-
-struct swsusp_arch_regs swsusp_arch_regs_cpu0;
-
-/*
- * Create a middle page table on a resume-safe page and put a pointer to it in
- * the given global directory entry.  This only returns the gd entry
- * in non-PAE compilation mode, since the middle layer is folded.
- */
-static pmd_t *resume_one_md_table_init(pgd_t *pgd)
-{
-       pud_t *pud;
-       p4d_t *p4d;
-       pmd_t *pmd_table;
-
-       p4d = p4d_offset(pgd, 0);
-       pud = pud_offset(p4d, 0);
-       pmd_table = pmd_offset(pud, 0);
-
-       return pmd_table;
-}
-
-/*
- * Create a page table on a resume-safe page and place a pointer to it in
- * a middle page directory entry.
- */
-static pte_t *resume_one_page_table_init(pmd_t *pmd)
-{
-       if (pmd_none(*pmd)) {
-               pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC);
-               if (!page_table)
-                       return NULL;
-
-               set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_KERNEL_TABLE));
-
-               BUG_ON(page_table != pte_offset_kernel(pmd, 0));
-
-               return page_table;
-       }
-
-       return pte_offset_kernel(pmd, 0);
-}
-
-/*
- * This maps the physical memory to kernel virtual address space, a total
- * of max_low_pfn pages, by creating page tables starting from address
- * PAGE_OFFSET.  The page tables are allocated out of resume-safe pages.
- */
-static int resume_physical_mapping_init(pgd_t *pgd_base)
-{
-       unsigned long pfn;
-       pgd_t *pgd;
-       pmd_t *pmd;
-       pte_t *pte;
-       int pgd_idx, pmd_idx;
-
-       pgd_idx = pgd_index(PAGE_OFFSET);
-       pgd = pgd_base + pgd_idx;
-       pfn = 0;
-
-       for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
-               pmd = resume_one_md_table_init(pgd);
-               if (!pmd)
-                       return -ENOMEM;
-
-               if (pfn >= max_low_pfn)
-                       continue;
-
-               for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
-                       pte_t *max_pte;
-
-                       if (pfn >= max_low_pfn)
-                               break;
-
-                       /* Map with normal page tables.
-                        * NOTE: We can mark everything as executable here
-                        */
-                       pte = resume_one_page_table_init(pmd);
-                       if (!pte)
-                               return -ENOMEM;
-
-                       max_pte = pte + PTRS_PER_PTE;
-                       for (; pte < max_pte; pte++, pfn++) {
-                               if (pfn >= max_low_pfn)
-                                       break;
-
-                               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-                       }
-               }
-       }
-
-       return 0;
-}
-
-static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
-{
-}
-
-int swsusp_arch_resume(void)
-{
-       int error;
-
-       resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
-       if (!resume_pg_dir)
-               return -ENOMEM;
-
-       resume_init_first_level_page_table(resume_pg_dir);
-       error = resume_physical_mapping_init(resume_pg_dir);
-       if (error)
-               return error;
-
-       /* We have got enough memory and from now on we cannot recover */
-       restore_image(resume_pg_dir, restore_pblist);
-       return 0;
-}
-
-/*
- *     pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
-       unsigned long begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
-       unsigned long end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
-
-       return (pfn >= begin_pfn) && (pfn < end_pfn);
-}
-
-void save_processor_state(void)
-{
-}
-
-void restore_processor_state(void)
-{
-       local_flush_tlb_all();
-}
diff --git a/arch/unicore32/kernel/hibernate_asm.S b/arch/unicore32/kernel/hibernate_asm.S
deleted file mode 100644 (file)
index a589bc1..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/hibernate_asm.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <linux/pgtable.h>
-#include <generated/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/assembler.h>
-
-@ restore_image(pgd_t *resume_pg_dir, struct pbe *restore_pblist)
-@ r0: resume_pg_dir
-@ r1: restore_pblist
-@ copy restore_pblist pages
-@ restore registers from swsusp_arch_regs_cpu0
-@
-ENTRY(restore_image)
-       sub     r0, r0, #PAGE_OFFSET
-       mov     r5, #0
-       movc    p0.c6, r5, #6   @invalidate ITLB & DTLB
-       movc    p0.c2, r0, #0
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-
-       .p2align 4,,7
-101:
-       csub.a  r1, #0
-       beq     109f
-
-       ldw     r6, [r1+], #PBE_ADDRESS
-       ldw     r7, [r1+], #PBE_ORIN_ADDRESS
-
-       movl    ip, #128
-102:   ldm.w   (r8 - r15), [r6]+
-       stm.w   (r8 - r15), [r7]+
-       sub.a   ip, ip, #1
-       bne     102b
-
-       ldw     r1, [r1+], #PBE_NEXT
-       b       101b
-
-       .p2align 4,,7
-109:
-       /* go back to the original page tables */
-       ldw     r0, =swapper_pg_dir
-       sub     r0, r0, #PAGE_OFFSET
-       mov     r5, #0
-       movc    p0.c6, r5, #6
-       movc    p0.c2, r0, #0
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-       nop
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       ldw     ip, 1f
-       add     ip, ip, #SWSUSP_FPSTATE
-       lfm.w   (f0  - f7 ), [ip]+
-       lfm.w   (f8  - f15), [ip]+
-       lfm.w   (f16 - f23), [ip]+
-       lfm.w   (f24 - f31), [ip]+
-       ldw     r4, [ip]
-       ctf     r4, s31
-#endif
-       mov     r0, #0x0
-       ldw     ip, 1f
-       add     ip, ip, #SWSUSP_CPU
-       ldm.w   (r4 - r15), [ip]+
-       ldm     (r16 - r27, sp, pc), [ip]+      @ Load all regs saved previously
-
-       .align  2
-1:     .long   swsusp_arch_regs_cpu0
-
-
-@ swsusp_arch_suspend()
-@ - prepare pc for resume, return from function without swsusp_save on resume
-@ - save registers in swsusp_arch_regs_cpu0
-@ - call swsusp_save write suspend image
-
-ENTRY(swsusp_arch_suspend)
-       ldw     ip, 1f
-       add     ip, ip, #SWSUSP_CPU
-       stm.w   (r4 - r15), [ip]+
-       stm.w   (r16 - r27, sp, lr), [ip]+
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       ldw     ip, 1f
-       add     ip, ip, #SWSUSP_FPSTATE
-       sfm.w   (f0  - f7 ), [ip]+
-       sfm.w   (f8  - f15), [ip]+
-       sfm.w   (f16 - f23), [ip]+
-       sfm.w   (f24 - f31), [ip]+
-       cff     r4, s31
-       stw     r4, [ip]
-#endif
-       b       swsusp_save                     @ no return
-
-1:     .long   swsusp_arch_regs_cpu0
diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c
deleted file mode 100644 (file)
index c014ae3..0000000
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/irq.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/random.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/kallsyms.h>
-#include <linux/proc_fs.h>
-#include <linux/syscore_ops.h>
-
-#include <mach/hardware.h>
-
-#include "setup.h"
-
-/*
- * PKUnity GPIO edge detection for IRQs:
- * IRQs are generated on Falling-Edge, Rising-Edge, or both.
- * Use this instead of directly setting GRER/GFER.
- */
-static int GPIO_IRQ_rising_edge;
-static int GPIO_IRQ_falling_edge;
-static int GPIO_IRQ_mask = 0;
-
-#define GPIO_MASK(irq)         (1 << (irq - IRQ_GPIO0))
-
-static int puv3_gpio_type(struct irq_data *d, unsigned int type)
-{
-       unsigned int mask;
-
-       if (d->irq < IRQ_GPIOHIGH)
-               mask = 1 << d->irq;
-       else
-               mask = GPIO_MASK(d->irq);
-
-       if (type == IRQ_TYPE_PROBE) {
-               if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
-                       return 0;
-               type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-       }
-
-       if (type & IRQ_TYPE_EDGE_RISING)
-               GPIO_IRQ_rising_edge |= mask;
-       else
-               GPIO_IRQ_rising_edge &= ~mask;
-       if (type & IRQ_TYPE_EDGE_FALLING)
-               GPIO_IRQ_falling_edge |= mask;
-       else
-               GPIO_IRQ_falling_edge &= ~mask;
-
-       writel(GPIO_IRQ_rising_edge & GPIO_IRQ_mask, GPIO_GRER);
-       writel(GPIO_IRQ_falling_edge & GPIO_IRQ_mask, GPIO_GFER);
-
-       return 0;
-}
-
-/*
- * GPIO IRQs must be acknowledged.  This is for IRQs from 0 to 7.
- */
-static void puv3_low_gpio_ack(struct irq_data *d)
-{
-       writel((1 << d->irq), GPIO_GEDR);
-}
-
-static void puv3_low_gpio_mask(struct irq_data *d)
-{
-       writel(readl(INTC_ICMR) & ~(1 << d->irq), INTC_ICMR);
-}
-
-static void puv3_low_gpio_unmask(struct irq_data *d)
-{
-       writel(readl(INTC_ICMR) | (1 << d->irq), INTC_ICMR);
-}
-
-static int puv3_low_gpio_wake(struct irq_data *d, unsigned int on)
-{
-       if (on)
-               writel(readl(PM_PWER) | (1 << d->irq), PM_PWER);
-       else
-               writel(readl(PM_PWER) & ~(1 << d->irq), PM_PWER);
-       return 0;
-}
-
-static struct irq_chip puv3_low_gpio_chip = {
-       .name           = "GPIO-low",
-       .irq_ack        = puv3_low_gpio_ack,
-       .irq_mask       = puv3_low_gpio_mask,
-       .irq_unmask     = puv3_low_gpio_unmask,
-       .irq_set_type   = puv3_gpio_type,
-       .irq_set_wake   = puv3_low_gpio_wake,
-};
-
-/*
- * IRQ8 (GPIO0 through 27) handler.  We enter here with the
- * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
- * and call the handler.
- */
-static void puv3_gpio_handler(struct irq_desc *desc)
-{
-       unsigned int mask, irq;
-
-       mask = readl(GPIO_GEDR);
-       do {
-               /*
-                * clear down all currently active IRQ sources.
-                * We will be processing them all.
-                */
-               writel(mask, GPIO_GEDR);
-
-               irq = IRQ_GPIO0;
-               do {
-                       if (mask & 1)
-                               generic_handle_irq(irq);
-                       mask >>= 1;
-                       irq++;
-               } while (mask);
-               mask = readl(GPIO_GEDR);
-       } while (mask);
-}
-
-/*
- * GPIO0-27 edge IRQs need to be handled specially.
- * In addition, the IRQs are all collected up into one bit in the
- * interrupt controller registers.
- */
-static void puv3_high_gpio_ack(struct irq_data *d)
-{
-       unsigned int mask = GPIO_MASK(d->irq);
-
-       writel(mask, GPIO_GEDR);
-}
-
-static void puv3_high_gpio_mask(struct irq_data *d)
-{
-       unsigned int mask = GPIO_MASK(d->irq);
-
-       GPIO_IRQ_mask &= ~mask;
-
-       writel(readl(GPIO_GRER) & ~mask, GPIO_GRER);
-       writel(readl(GPIO_GFER) & ~mask, GPIO_GFER);
-}
-
-static void puv3_high_gpio_unmask(struct irq_data *d)
-{
-       unsigned int mask = GPIO_MASK(d->irq);
-
-       GPIO_IRQ_mask |= mask;
-
-       writel(GPIO_IRQ_rising_edge & GPIO_IRQ_mask, GPIO_GRER);
-       writel(GPIO_IRQ_falling_edge & GPIO_IRQ_mask, GPIO_GFER);
-}
-
-static int puv3_high_gpio_wake(struct irq_data *d, unsigned int on)
-{
-       if (on)
-               writel(readl(PM_PWER) | PM_PWER_GPIOHIGH, PM_PWER);
-       else
-               writel(readl(PM_PWER) & ~PM_PWER_GPIOHIGH, PM_PWER);
-       return 0;
-}
-
-static struct irq_chip puv3_high_gpio_chip = {
-       .name           = "GPIO-high",
-       .irq_ack        = puv3_high_gpio_ack,
-       .irq_mask       = puv3_high_gpio_mask,
-       .irq_unmask     = puv3_high_gpio_unmask,
-       .irq_set_type   = puv3_gpio_type,
-       .irq_set_wake   = puv3_high_gpio_wake,
-};
-
-/*
- * We don't need to ACK IRQs on the PKUnity unless they're GPIOs
- * this is for internal IRQs i.e. from 8 to 31.
- */
-static void puv3_mask_irq(struct irq_data *d)
-{
-       writel(readl(INTC_ICMR) & ~(1 << d->irq), INTC_ICMR);
-}
-
-static void puv3_unmask_irq(struct irq_data *d)
-{
-       writel(readl(INTC_ICMR) | (1 << d->irq), INTC_ICMR);
-}
-
-/*
- * Apart form GPIOs, only the RTC alarm can be a wakeup event.
- */
-static int puv3_set_wake(struct irq_data *d, unsigned int on)
-{
-       if (d->irq == IRQ_RTCAlarm) {
-               if (on)
-                       writel(readl(PM_PWER) | PM_PWER_RTC, PM_PWER);
-               else
-                       writel(readl(PM_PWER) & ~PM_PWER_RTC, PM_PWER);
-               return 0;
-       }
-       return -EINVAL;
-}
-
-static struct irq_chip puv3_normal_chip = {
-       .name           = "PKUnity-v3",
-       .irq_ack        = puv3_mask_irq,
-       .irq_mask       = puv3_mask_irq,
-       .irq_unmask     = puv3_unmask_irq,
-       .irq_set_wake   = puv3_set_wake,
-};
-
-static struct resource irq_resource = {
-       .name   = "irqs",
-       .start  = io_v2p(PKUNITY_INTC_BASE),
-       .end    = io_v2p(PKUNITY_INTC_BASE) + 0xFFFFF,
-};
-
-static struct puv3_irq_state {
-       unsigned int    saved;
-       unsigned int    icmr;
-       unsigned int    iclr;
-       unsigned int    iccr;
-} puv3_irq_state;
-
-static int puv3_irq_suspend(void)
-{
-       struct puv3_irq_state *st = &puv3_irq_state;
-
-       st->saved = 1;
-       st->icmr = readl(INTC_ICMR);
-       st->iclr = readl(INTC_ICLR);
-       st->iccr = readl(INTC_ICCR);
-
-       /*
-        * Disable all GPIO-based interrupts.
-        */
-       writel(readl(INTC_ICMR) & ~(0x1ff), INTC_ICMR);
-
-       /*
-        * Set the appropriate edges for wakeup.
-        */
-       writel(readl(PM_PWER) & GPIO_IRQ_rising_edge, GPIO_GRER);
-       writel(readl(PM_PWER) & GPIO_IRQ_falling_edge, GPIO_GFER);
-
-       /*
-        * Clear any pending GPIO interrupts.
-        */
-       writel(readl(GPIO_GEDR), GPIO_GEDR);
-
-       return 0;
-}
-
-static void puv3_irq_resume(void)
-{
-       struct puv3_irq_state *st = &puv3_irq_state;
-
-       if (st->saved) {
-               writel(st->iccr, INTC_ICCR);
-               writel(st->iclr, INTC_ICLR);
-
-               writel(GPIO_IRQ_rising_edge & GPIO_IRQ_mask, GPIO_GRER);
-               writel(GPIO_IRQ_falling_edge & GPIO_IRQ_mask, GPIO_GFER);
-
-               writel(st->icmr, INTC_ICMR);
-       }
-}
-
-static struct syscore_ops puv3_irq_syscore_ops = {
-       .suspend        = puv3_irq_suspend,
-       .resume         = puv3_irq_resume,
-};
-
-static int __init puv3_irq_init_syscore(void)
-{
-       register_syscore_ops(&puv3_irq_syscore_ops);
-       return 0;
-}
-
-device_initcall(puv3_irq_init_syscore);
-
-void __init init_IRQ(void)
-{
-       unsigned int irq;
-
-       request_resource(&iomem_resource, &irq_resource);
-
-       /* disable all IRQs */
-       writel(0, INTC_ICMR);
-
-       /* all IRQs are IRQ, not REAL */
-       writel(0, INTC_ICLR);
-
-       /* clear all GPIO edge detects */
-       writel(FMASK(8, 0) & ~FIELD(1, 1, GPI_SOFF_REQ), GPIO_GPIR);
-       writel(0, GPIO_GFER);
-       writel(0, GPIO_GRER);
-       writel(0x0FFFFFFF, GPIO_GEDR);
-
-       writel(1, INTC_ICCR);
-
-       for (irq = 0; irq < IRQ_GPIOHIGH; irq++) {
-               irq_set_chip(irq, &puv3_low_gpio_chip);
-               irq_set_handler(irq, handle_edge_irq);
-               irq_modify_status(irq,
-                       IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN,
-                       0);
-       }
-
-       for (irq = IRQ_GPIOHIGH + 1; irq < IRQ_GPIO0; irq++) {
-               irq_set_chip(irq, &puv3_normal_chip);
-               irq_set_handler(irq, handle_level_irq);
-               irq_modify_status(irq,
-                       IRQ_NOREQUEST | IRQ_NOAUTOEN,
-                       IRQ_NOPROBE);
-       }
-
-       for (irq = IRQ_GPIO0; irq <= IRQ_GPIO27; irq++) {
-               irq_set_chip(irq, &puv3_high_gpio_chip);
-               irq_set_handler(irq, handle_edge_irq);
-               irq_modify_status(irq,
-                       IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN,
-                       0);
-       }
-
-       /*
-        * Install handler for GPIO 0-27 edge detect interrupts
-        */
-       irq_set_chip(IRQ_GPIOHIGH, &puv3_normal_chip);
-       irq_set_chained_handler(IRQ_GPIOHIGH, puv3_gpio_handler);
-
-#ifdef CONFIG_PUV3_GPIO
-       puv3_init_gpio();
-#endif
-}
-
-/*
- * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not
- * come via this function.  Instead, they should provide their
- * own 'handler'
- */
-asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
-{
-       struct pt_regs *old_regs = set_irq_regs(regs);
-
-       irq_enter();
-
-       /*
-        * Some hardware gives randomly wrong interrupts.  Rather
-        * than crashing, do something sensible.
-        */
-       if (unlikely(irq >= nr_irqs)) {
-               if (printk_ratelimit())
-                       printk(KERN_WARNING "Bad IRQ%u\n", irq);
-               ack_bad_irq(irq);
-       } else {
-               generic_handle_irq(irq);
-       }
-
-       irq_exit();
-       set_irq_regs(old_regs);
-}
-
diff --git a/arch/unicore32/kernel/ksyms.c b/arch/unicore32/kernel/ksyms.c
deleted file mode 100644 (file)
index 7314450..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/ksyms.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/in6.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-
-#include <asm/checksum.h>
-
-#include "ksyms.h"
-
-EXPORT_SYMBOL(find_first_bit);
-EXPORT_SYMBOL(find_first_zero_bit);
-EXPORT_SYMBOL(find_next_zero_bit);
-EXPORT_SYMBOL(find_next_bit);
-
-       /* platform dependent support */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__const_udelay);
-
-       /* string / mem functions */
-EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(memchr);
-
-       /* user mem (segment) */
-EXPORT_SYMBOL(__strnlen_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-
-EXPORT_SYMBOL(copy_page);
-
-EXPORT_SYMBOL(raw_copy_from_user);
-EXPORT_SYMBOL(raw_copy_to_user);
-EXPORT_SYMBOL(__clear_user);
-
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__divsi3);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__modsi3);
-EXPORT_SYMBOL(__ucmpdi2);
-EXPORT_SYMBOL(__udivsi3);
-EXPORT_SYMBOL(__umodsi3);
-
diff --git a/arch/unicore32/kernel/ksyms.h b/arch/unicore32/kernel/ksyms.h
deleted file mode 100644 (file)
index 5d2d5ba..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * libgcc functions - functions that are used internally by the
- * compiler...  (prototypes are not correct though, but that
- * doesn't really matter since they're not versioned).
- */
-extern void __ashldi3(void);
-extern void __ashrdi3(void);
-extern void __divsi3(void);
-extern void __lshrdi3(void);
-extern void __modsi3(void);
-extern void __ucmpdi2(void);
-extern void __udivsi3(void);
-extern void __umodsi3(void);
diff --git a/arch/unicore32/kernel/module.c b/arch/unicore32/kernel/module.c
deleted file mode 100644 (file)
index 67c89ef..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/module.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/moduleloader.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-
-#include <asm/sections.h>
-
-void *module_alloc(unsigned long size)
-{
-       return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-                               GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-                               __builtin_return_address(0));
-}
-
-int
-apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
-              unsigned int relindex, struct module *module)
-{
-       Elf32_Shdr *symsec = sechdrs + symindex;
-       Elf32_Shdr *relsec = sechdrs + relindex;
-       Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
-       Elf32_Rel *rel = (void *)relsec->sh_addr;
-       unsigned int i;
-
-       for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
-               unsigned long loc;
-               Elf32_Sym *sym;
-               s32 offset;
-
-               offset = ELF32_R_SYM(rel->r_info);
-               if (offset < 0 || offset >
-                               (symsec->sh_size / sizeof(Elf32_Sym))) {
-                       printk(KERN_ERR "%s: bad relocation, "
-                                       "section %d reloc %d\n",
-                                       module->name, relindex, i);
-                       return -ENOEXEC;
-               }
-
-               sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
-
-               if (rel->r_offset < 0 || rel->r_offset >
-                               dstsec->sh_size - sizeof(u32)) {
-                       printk(KERN_ERR "%s: out of bounds relocation, "
-                               "section %d reloc %d offset %d size %d\n",
-                               module->name, relindex, i, rel->r_offset,
-                               dstsec->sh_size);
-                       return -ENOEXEC;
-               }
-
-               loc = dstsec->sh_addr + rel->r_offset;
-
-               switch (ELF32_R_TYPE(rel->r_info)) {
-               case R_UNICORE_NONE:
-                       /* ignore */
-                       break;
-
-               case R_UNICORE_ABS32:
-                       *(u32 *)loc += sym->st_value;
-                       break;
-
-               case R_UNICORE_PC24:
-               case R_UNICORE_CALL:
-               case R_UNICORE_JUMP24:
-                       offset = (*(u32 *)loc & 0x00ffffff) << 2;
-                       if (offset & 0x02000000)
-                               offset -= 0x04000000;
-
-                       offset += sym->st_value - loc;
-                       if (offset & 3 ||
-                           offset <= (s32)0xfe000000 ||
-                           offset >= (s32)0x02000000) {
-                               printk(KERN_ERR
-                                      "%s: relocation out of range, section "
-                                      "%d reloc %d sym '%s'\n", module->name,
-                                      relindex, i, strtab + sym->st_name);
-                               return -ENOEXEC;
-                       }
-
-                       offset >>= 2;
-
-                       *(u32 *)loc &= 0xff000000;
-                       *(u32 *)loc |= offset & 0x00ffffff;
-                       break;
-
-               default:
-                       printk(KERN_ERR "%s: unknown relocation: %u\n",
-                              module->name, ELF32_R_TYPE(rel->r_info));
-                       return -ENOEXEC;
-               }
-       }
-       return 0;
-}
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
deleted file mode 100644 (file)
index 0d098aa..0000000
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/pci.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  PCI bios-type initialisation for PCI machines
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-static int debug_pci;
-
-#define CONFIG_CMD(bus, devfn, where)  \
-       (0x80000000 | (bus->number << 16) | (devfn << 8) | (where & ~3))
-
-static int
-puv3_read_config(struct pci_bus *bus, unsigned int devfn, int where,
-                       int size, u32 *value)
-{
-       writel(CONFIG_CMD(bus, devfn, where), PCICFG_ADDR);
-       switch (size) {
-       case 1:
-               *value = (readl(PCICFG_DATA) >> ((where & 3) * 8)) & 0xFF;
-               break;
-       case 2:
-               *value = (readl(PCICFG_DATA) >> ((where & 2) * 8)) & 0xFFFF;
-               break;
-       case 4:
-               *value = readl(PCICFG_DATA);
-               break;
-       }
-       return PCIBIOS_SUCCESSFUL;
-}
-
-static int
-puv3_write_config(struct pci_bus *bus, unsigned int devfn, int where,
-                       int size, u32 value)
-{
-       writel(CONFIG_CMD(bus, devfn, where), PCICFG_ADDR);
-       switch (size) {
-       case 1:
-               writel((readl(PCICFG_DATA) & ~FMASK(8, (where&3)*8))
-                       | FIELD(value, 8, (where&3)*8), PCICFG_DATA);
-               break;
-       case 2:
-               writel((readl(PCICFG_DATA) & ~FMASK(16, (where&2)*8))
-                       | FIELD(value, 16, (where&2)*8), PCICFG_DATA);
-               break;
-       case 4:
-               writel(value, PCICFG_DATA);
-               break;
-       }
-       return PCIBIOS_SUCCESSFUL;
-}
-
-struct pci_ops pci_puv3_ops = {
-       .read  = puv3_read_config,
-       .write = puv3_write_config,
-};
-
-void pci_puv3_preinit(void)
-{
-       printk(KERN_DEBUG "PCI: PKUnity PCI Controller Initializing ...\n");
-       /* config PCI bridge base */
-       writel(io_v2p(PKUNITY_PCIBRI_BASE), PCICFG_BRIBASE);
-
-       writel(0, PCIBRI_AHBCTL0);
-       writel(io_v2p(PKUNITY_PCIBRI_BASE) | PCIBRI_BARx_MEM, PCIBRI_AHBBAR0);
-       writel(0xFFFF0000, PCIBRI_AHBAMR0);
-       writel(0, PCIBRI_AHBTAR0);
-
-       writel(PCIBRI_CTLx_AT, PCIBRI_AHBCTL1);
-       writel(io_v2p(PKUNITY_PCILIO_BASE) | PCIBRI_BARx_IO, PCIBRI_AHBBAR1);
-       writel(0xFFFF0000, PCIBRI_AHBAMR1);
-       writel(0x00000000, PCIBRI_AHBTAR1);
-
-       writel(PCIBRI_CTLx_PREF, PCIBRI_AHBCTL2);
-       writel(io_v2p(PKUNITY_PCIMEM_BASE) | PCIBRI_BARx_MEM, PCIBRI_AHBBAR2);
-       writel(0xF8000000, PCIBRI_AHBAMR2);
-       writel(0, PCIBRI_AHBTAR2);
-
-       writel(io_v2p(PKUNITY_PCIAHB_BASE) | PCIBRI_BARx_MEM, PCIBRI_BAR1);
-
-       writel(PCIBRI_CTLx_AT | PCIBRI_CTLx_PREF, PCIBRI_PCICTL0);
-       writel(io_v2p(PKUNITY_PCIAHB_BASE) | PCIBRI_BARx_MEM, PCIBRI_PCIBAR0);
-       writel(0xF8000000, PCIBRI_PCIAMR0);
-       writel(PKUNITY_SDRAM_BASE, PCIBRI_PCITAR0);
-
-       writel(readl(PCIBRI_CMD) | PCIBRI_CMD_IO | PCIBRI_CMD_MEM, PCIBRI_CMD);
-}
-
-static int pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-       if (dev->bus->number == 0) {
-#ifdef CONFIG_ARCH_FPGA /* 4 pci slots */
-               if      (dev->devfn == 0x00)
-                       return IRQ_PCIINTA;
-               else if (dev->devfn == 0x08)
-                       return IRQ_PCIINTB;
-               else if (dev->devfn == 0x10)
-                       return IRQ_PCIINTC;
-               else if (dev->devfn == 0x18)
-                       return IRQ_PCIINTD;
-#endif
-#ifdef CONFIG_PUV3_DB0913 /* 3 pci slots */
-               if      (dev->devfn == 0x30)
-                       return IRQ_PCIINTB;
-               else if (dev->devfn == 0x60)
-                       return IRQ_PCIINTC;
-               else if (dev->devfn == 0x58)
-                       return IRQ_PCIINTD;
-#endif
-#if    defined(CONFIG_PUV3_NB0916) || defined(CONFIG_PUV3_SMW0919)
-               /* only support 2 pci devices */
-               if      (dev->devfn == 0x00)
-                       return IRQ_PCIINTC; /* sata */
-#endif
-       }
-       return -1;
-}
-
-/*
- * Only first 128MB of memory can be accessed via PCI.
- * We use GFP_DMA to allocate safe buffers to do map/unmap.
- * This is really ugly and we need a better way of specifying
- * DMA-capable regions of memory.
- */
-void __init puv3_pci_adjust_zones(unsigned long max_zone_pfn)
-{
-       unsigned int sz = SZ_128M >> PAGE_SHIFT;
-
-       max_zone_pfn[ZONE_DMA] = sz;
-}
-
-/*
- * If the bus contains any of these devices, then we must not turn on
- * parity checking of any kind.
- */
-static inline int pdev_bad_for_parity(struct pci_dev *dev)
-{
-       return 0;
-}
-
-/*
- * pcibios_fixup_bus - Called after each bus is probed,
- * but before its children are examined.
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-       struct pci_dev *dev;
-       u16 features = PCI_COMMAND_SERR
-               | PCI_COMMAND_PARITY
-               | PCI_COMMAND_FAST_BACK;
-
-       bus->resource[0] = &ioport_resource;
-       bus->resource[1] = &iomem_resource;
-
-       /*
-        * Walk the devices on this bus, working out what we can
-        * and can't support.
-        */
-       list_for_each_entry(dev, &bus->devices, bus_list) {
-               u16 status;
-
-               pci_read_config_word(dev, PCI_STATUS, &status);
-
-               /*
-                * If any device on this bus does not support fast back
-                * to back transfers, then the bus as a whole is not able
-                * to support them.  Having fast back to back transfers
-                * on saves us one PCI cycle per transaction.
-                */
-               if (!(status & PCI_STATUS_FAST_BACK))
-                       features &= ~PCI_COMMAND_FAST_BACK;
-
-               if (pdev_bad_for_parity(dev))
-                       features &= ~(PCI_COMMAND_SERR
-                                       | PCI_COMMAND_PARITY);
-
-               switch (dev->class >> 8) {
-               case PCI_CLASS_BRIDGE_PCI:
-                       pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &status);
-                       status |= PCI_BRIDGE_CTL_PARITY
-                               | PCI_BRIDGE_CTL_MASTER_ABORT;
-                       status &= ~(PCI_BRIDGE_CTL_BUS_RESET
-                               | PCI_BRIDGE_CTL_FAST_BACK);
-                       pci_write_config_word(dev, PCI_BRIDGE_CONTROL, status);
-                       break;
-
-               case PCI_CLASS_BRIDGE_CARDBUS:
-                       pci_read_config_word(dev, PCI_CB_BRIDGE_CONTROL,
-                                       &status);
-                       status |= PCI_CB_BRIDGE_CTL_PARITY
-                               | PCI_CB_BRIDGE_CTL_MASTER_ABORT;
-                       pci_write_config_word(dev, PCI_CB_BRIDGE_CONTROL,
-                                       status);
-                       break;
-               }
-       }
-
-       /*
-        * Now walk the devices again, this time setting them up.
-        */
-       list_for_each_entry(dev, &bus->devices, bus_list) {
-               u16 cmd;
-
-               pci_read_config_word(dev, PCI_COMMAND, &cmd);
-               cmd |= features;
-               pci_write_config_word(dev, PCI_COMMAND, cmd);
-
-               pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-                                     L1_CACHE_BYTES >> 2);
-       }
-
-       /*
-        * Propagate the flags to the PCI bridge.
-        */
-       if (bus->self && bus->self->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-               if (features & PCI_COMMAND_FAST_BACK)
-                       bus->bridge_ctl |= PCI_BRIDGE_CTL_FAST_BACK;
-               if (features & PCI_COMMAND_PARITY)
-                       bus->bridge_ctl |= PCI_BRIDGE_CTL_PARITY;
-       }
-
-       /*
-        * Report what we did for this bus
-        */
-       printk(KERN_INFO "PCI: bus%d: Fast back to back transfers %sabled\n",
-               bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis");
-}
-EXPORT_SYMBOL(pcibios_fixup_bus);
-
-static struct resource busn_resource = {
-       .name   = "PCI busn",
-       .start  = 0,
-       .end    = 255,
-       .flags  = IORESOURCE_BUS,
-};
-
-static int __init pci_common_init(void)
-{
-       struct pci_bus *puv3_bus;
-       struct pci_host_bridge *bridge;
-       int ret;
-
-       bridge = pci_alloc_host_bridge(0);
-       if (!bridge)
-               return -ENOMEM;
-
-       pci_puv3_preinit();
-
-       pci_add_resource(&bridge->windows, &ioport_resource);
-       pci_add_resource(&bridge->windows, &iomem_resource);
-       pci_add_resource(&bridge->windows, &busn_resource);
-       bridge->sysdata = NULL;
-       bridge->busnr = 0;
-       bridge->ops = &pci_puv3_ops;
-       bridge->swizzle_irq = pci_common_swizzle;
-       bridge->map_irq = pci_puv3_map_irq;
-
-       /* Scan our single hose.  */
-       ret = pci_scan_root_bus_bridge(bridge);
-       if (ret) {
-               pci_free_host_bridge(bridge);
-               return;
-       }
-
-       puv3_bus = bridge->bus;
-
-       if (!puv3_bus)
-               panic("PCI: unable to scan bus!");
-
-       pci_bus_size_bridges(puv3_bus);
-       pci_bus_assign_resources(puv3_bus);
-       pci_bus_add_devices(puv3_bus);
-       return 0;
-}
-subsys_initcall(pci_common_init);
-
-char * __init pcibios_setup(char *str)
-{
-       if (!strcmp(str, "debug")) {
-               debug_pci = 1;
-               return NULL;
-       }
-       return str;
-}
-
-void pcibios_set_master(struct pci_dev *dev)
-{
-       /* No special bus mastering setup handling */
-}
-
-/*
- * From arch/i386/kernel/pci-i386.c:
- *
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might be mirrored at 0x0100-0x03ff..
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
-                               resource_size_t size, resource_size_t align)
-{
-       resource_size_t start = res->start;
-
-       if (res->flags & IORESOURCE_IO && start & 0x300)
-               start = (start + 0x3ff) & ~0x3ff;
-
-       start = (start + align - 1) & ~(align - 1);
-
-       return start;
-}
-
-/**
- * pcibios_enable_device - Enable I/O and memory.
- * @dev: PCI device to be enabled
- */
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-       u16 cmd, old_cmd;
-       int idx;
-       struct resource *r;
-
-       pci_read_config_word(dev, PCI_COMMAND, &cmd);
-       old_cmd = cmd;
-       for (idx = 0; idx < 6; idx++) {
-               /* Only set up the requested stuff */
-               if (!(mask & (1 << idx)))
-                       continue;
-
-               r = dev->resource + idx;
-               if (!r->start && r->end) {
-                       printk(KERN_ERR "PCI: Device %s not available because"
-                              " of resource collisions\n", pci_name(dev));
-                       return -EINVAL;
-               }
-               if (r->flags & IORESOURCE_IO)
-                       cmd |= PCI_COMMAND_IO;
-               if (r->flags & IORESOURCE_MEM)
-                       cmd |= PCI_COMMAND_MEMORY;
-       }
-
-       /*
-        * Bridges (eg, cardbus bridges) need to be fully enabled
-        */
-       if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
-               cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
-
-       if (cmd != old_cmd) {
-               printk("PCI: enabling device %s (%04x -> %04x)\n",
-                      pci_name(dev), old_cmd, cmd);
-               pci_write_config_word(dev, PCI_COMMAND, cmd);
-       }
-       return 0;
-}
diff --git a/arch/unicore32/kernel/pm.c b/arch/unicore32/kernel/pm.c
deleted file mode 100644 (file)
index 94b7f9d..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/pm.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/suspend.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <mach/pm.h>
-
-#include "setup.h"
-
-struct puv3_cpu_pm_fns *puv3_cpu_pm_fns;
-static unsigned long *sleep_save;
-
-int puv3_pm_enter(suspend_state_t state)
-{
-       unsigned long sleep_save_checksum = 0, checksum = 0;
-       int i;
-
-       /* skip registers saving for standby */
-       if (state != PM_SUSPEND_STANDBY) {
-               puv3_cpu_pm_fns->save(sleep_save);
-               /* before sleeping, calculate and save a checksum */
-               for (i = 0; i < puv3_cpu_pm_fns->save_count - 1; i++)
-                       sleep_save_checksum += sleep_save[i];
-       }
-
-       /* *** go zzz *** */
-       puv3_cpu_pm_fns->enter(state);
-       cpu_init();
-#ifdef CONFIG_INPUT_KEYBOARD
-       puv3_ps2_init();
-#endif
-#ifdef CONFIG_PCI
-       pci_puv3_preinit();
-#endif
-       if (state != PM_SUSPEND_STANDBY) {
-               /* after sleeping, validate the checksum */
-               for (i = 0; i < puv3_cpu_pm_fns->save_count - 1; i++)
-                       checksum += sleep_save[i];
-
-               /* if invalid, display message and wait for a hardware reset */
-               if (checksum != sleep_save_checksum) {
-                       while (1)
-                               puv3_cpu_pm_fns->enter(state);
-               }
-               puv3_cpu_pm_fns->restore(sleep_save);
-       }
-
-       pr_debug("*** made it back from resume\n");
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(puv3_pm_enter);
-
-unsigned long sleep_phys_sp(void *sp)
-{
-       return virt_to_phys(sp);
-}
-
-static int puv3_pm_valid(suspend_state_t state)
-{
-       if (puv3_cpu_pm_fns)
-               return puv3_cpu_pm_fns->valid(state);
-
-       return -EINVAL;
-}
-
-static int puv3_pm_prepare(void)
-{
-       int ret = 0;
-
-       if (puv3_cpu_pm_fns && puv3_cpu_pm_fns->prepare)
-               ret = puv3_cpu_pm_fns->prepare();
-
-       return ret;
-}
-
-static void puv3_pm_finish(void)
-{
-       if (puv3_cpu_pm_fns && puv3_cpu_pm_fns->finish)
-               puv3_cpu_pm_fns->finish();
-}
-
-static struct platform_suspend_ops puv3_pm_ops = {
-       .valid          = puv3_pm_valid,
-       .enter          = puv3_pm_enter,
-       .prepare        = puv3_pm_prepare,
-       .finish         = puv3_pm_finish,
-};
-
-static int __init puv3_pm_init(void)
-{
-       if (!puv3_cpu_pm_fns) {
-               printk(KERN_ERR "no valid puv3_cpu_pm_fns defined\n");
-               return -EINVAL;
-       }
-
-       sleep_save = kmalloc_array(puv3_cpu_pm_fns->save_count,
-                                  sizeof(unsigned long),
-                                  GFP_KERNEL);
-       if (!sleep_save) {
-               printk(KERN_ERR "failed to alloc memory for pm save\n");
-               return -ENOMEM;
-       }
-
-       suspend_set_ops(&puv3_pm_ops);
-       return 0;
-}
-
-device_initcall(puv3_pm_init);
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
deleted file mode 100644 (file)
index b4fd3a6..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/process.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <stdarg.h>
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-#include <linux/sched/task.h>
-#include <linux/sched/task_stack.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/cpu.h>
-#include <linux/elfcore.h>
-#include <linux/pm.h>
-#include <linux/tick.h>
-#include <linux/utsname.h>
-#include <linux/uaccess.h>
-#include <linux/random.h>
-#include <linux/gpio.h>
-#include <linux/stacktrace.h>
-
-#include <asm/cacheflush.h>
-#include <asm/processor.h>
-#include <asm/stacktrace.h>
-
-#include "setup.h"
-
-static const char * const processor_modes[] = {
-       "UK00", "UK01", "UK02", "UK03", "UK04", "UK05", "UK06", "UK07",
-       "UK08", "UK09", "UK0A", "UK0B", "UK0C", "UK0D", "UK0E", "UK0F",
-       "USER", "REAL", "INTR", "PRIV", "UK14", "UK15", "UK16", "ABRT",
-       "UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR"
-};
-
-void arch_cpu_idle(void)
-{
-       cpu_do_idle();
-       local_irq_enable();
-}
-
-void machine_halt(void)
-{
-       gpio_set_value(GPO_SOFT_OFF, 0);
-}
-
-/*
- * Function pointers to optional machine specific functions
- */
-void (*pm_power_off)(void) = NULL;
-EXPORT_SYMBOL(pm_power_off);
-
-void machine_power_off(void)
-{
-       if (pm_power_off)
-               pm_power_off();
-       machine_halt();
-}
-
-void machine_restart(char *cmd)
-{
-       /* Disable interrupts first */
-       local_irq_disable();
-
-       /*
-        * Tell the mm system that we are going to reboot -
-        * we may need it to insert some 1:1 mappings so that
-        * soft boot works.
-        */
-       setup_mm_for_reboot();
-
-       /* Clean and invalidate caches */
-       flush_cache_all();
-
-       /* Turn off caching */
-       cpu_proc_fin();
-
-       /* Push out any further dirty data, and ensure cache is empty */
-       flush_cache_all();
-
-       /*
-        * Now handle reboot code.
-        */
-       if (reboot_mode == REBOOT_SOFT) {
-               /* Jump into ROM at address 0xffff0000 */
-               cpu_reset(VECTORS_BASE);
-       } else {
-               writel(0x00002001, PM_PLLSYSCFG); /* cpu clk = 250M */
-               writel(0x00100800, PM_PLLDDRCFG); /* ddr clk =  44M */
-               writel(0x00002001, PM_PLLVGACFG); /* vga clk = 250M */
-
-               /* Use on-chip reset capability */
-               /* following instructions must be in one icache line */
-               __asm__ __volatile__(
-                       "       .align 5\n\t"
-                       "       stw     %1, [%0]\n\t"
-                       "201:   ldw     r0, [%0]\n\t"
-                       "       cmpsub.a        r0, #0\n\t"
-                       "       bne     201b\n\t"
-                       "       stw     %3, [%2]\n\t"
-                       "       nop; nop; nop\n\t"
-                       /* prefetch 3 instructions at most */
-                       :
-                       : "r" (PM_PMCR),
-                         "r" (PM_PMCR_CFBSYS | PM_PMCR_CFBDDR
-                               | PM_PMCR_CFBVGA),
-                         "r" (RESETC_SWRR),
-                         "r" (RESETC_SWRR_SRB)
-                       : "r0", "memory");
-       }
-
-       /*
-        * Whoops - the architecture was unable to reboot.
-        * Tell the user!
-        */
-       mdelay(1000);
-       printk(KERN_EMERG "Reboot failed -- System halted\n");
-       do { } while (1);
-}
-
-void __show_regs(struct pt_regs *regs)
-{
-       unsigned long flags;
-       char buf[64];
-
-       show_regs_print_info(KERN_DEFAULT);
-       printk("PC is at %pS\n", (void *)instruction_pointer(regs));
-       printk("LR is at %pS\n", (void *)regs->UCreg_lr);
-       printk(KERN_DEFAULT "pc : [<%08lx>]    lr : [<%08lx>]    psr: %08lx\n"
-              "sp : %08lx  ip : %08lx  fp : %08lx\n",
-               regs->UCreg_pc, regs->UCreg_lr, regs->UCreg_asr,
-               regs->UCreg_sp, regs->UCreg_ip, regs->UCreg_fp);
-       printk(KERN_DEFAULT "r26: %08lx  r25: %08lx  r24: %08lx\n",
-               regs->UCreg_26, regs->UCreg_25,
-               regs->UCreg_24);
-       printk(KERN_DEFAULT "r23: %08lx  r22: %08lx  r21: %08lx  r20: %08lx\n",
-               regs->UCreg_23, regs->UCreg_22,
-               regs->UCreg_21, regs->UCreg_20);
-       printk(KERN_DEFAULT "r19: %08lx  r18: %08lx  r17: %08lx  r16: %08lx\n",
-               regs->UCreg_19, regs->UCreg_18,
-               regs->UCreg_17, regs->UCreg_16);
-       printk(KERN_DEFAULT "r15: %08lx  r14: %08lx  r13: %08lx  r12: %08lx\n",
-               regs->UCreg_15, regs->UCreg_14,
-               regs->UCreg_13, regs->UCreg_12);
-       printk(KERN_DEFAULT "r11: %08lx  r10: %08lx  r9 : %08lx  r8 : %08lx\n",
-               regs->UCreg_11, regs->UCreg_10,
-               regs->UCreg_09, regs->UCreg_08);
-       printk(KERN_DEFAULT "r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
-               regs->UCreg_07, regs->UCreg_06,
-               regs->UCreg_05, regs->UCreg_04);
-       printk(KERN_DEFAULT "r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
-               regs->UCreg_03, regs->UCreg_02,
-               regs->UCreg_01, regs->UCreg_00);
-
-       flags = regs->UCreg_asr;
-       buf[0] = flags & PSR_S_BIT ? 'S' : 's';
-       buf[1] = flags & PSR_Z_BIT ? 'Z' : 'z';
-       buf[2] = flags & PSR_C_BIT ? 'C' : 'c';
-       buf[3] = flags & PSR_V_BIT ? 'V' : 'v';
-       buf[4] = '\0';
-
-       printk(KERN_DEFAULT "Flags: %s  INTR o%s  REAL o%s  Mode %s  Segment %s\n",
-               buf, interrupts_enabled(regs) ? "n" : "ff",
-               fast_interrupts_enabled(regs) ? "n" : "ff",
-               processor_modes[processor_mode(regs)],
-               uaccess_kernel() ? "kernel" : "user");
-       {
-               unsigned int ctrl;
-
-               buf[0] = '\0';
-               {
-                       unsigned int transbase;
-                       asm("movc %0, p0.c2, #0\n"
-                           : "=r" (transbase));
-                       snprintf(buf, sizeof(buf), "  Table: %08x", transbase);
-               }
-               asm("movc %0, p0.c1, #0\n" : "=r" (ctrl));
-
-               printk(KERN_DEFAULT "Control: %08x%s\n", ctrl, buf);
-       }
-}
-
-void show_regs(struct pt_regs *regs)
-{
-       printk(KERN_DEFAULT "\n");
-       printk(KERN_DEFAULT "Pid: %d, comm: %20s\n",
-                       task_pid_nr(current), current->comm);
-       __show_regs(regs);
-       __backtrace();
-}
-
-void flush_thread(void)
-{
-       struct thread_info *thread = current_thread_info();
-       struct task_struct *tsk = current;
-
-       memset(thread->used_cp, 0, sizeof(thread->used_cp));
-       memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
-#ifdef CONFIG_UNICORE_FPU_F64
-       memset(&thread->fpstate, 0, sizeof(struct fp_state));
-#endif
-}
-
-void release_thread(struct task_struct *dead_task)
-{
-}
-
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
-
-int
-copy_thread(unsigned long clone_flags, unsigned long stack_start,
-           unsigned long stk_sz, struct task_struct *p)
-{
-       struct thread_info *thread = task_thread_info(p);
-       struct pt_regs *childregs = task_pt_regs(p);
-
-       memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
-       thread->cpu_context.sp = (unsigned long)childregs;
-       if (unlikely(p->flags & PF_KTHREAD)) {
-               thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread;
-               thread->cpu_context.r4 = stack_start;
-               thread->cpu_context.r5 = stk_sz;
-               memset(childregs, 0, sizeof(struct pt_regs));
-       } else {
-               thread->cpu_context.pc = (unsigned long)ret_from_fork;
-               *childregs = *current_pt_regs();
-               childregs->UCreg_00 = 0;
-               if (stack_start)
-                       childregs->UCreg_sp = stack_start;
-
-               if (clone_flags & CLONE_SETTLS)
-                       childregs->UCreg_16 = childregs->UCreg_03;
-       }
-       return 0;
-}
-
-/*
- * Fill in the task's elfregs structure for a core dump.
- */
-int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
-{
-       elf_core_copy_regs(elfregs, task_pt_regs(t));
-       return 1;
-}
-
-/*
- * fill in the fpe structure for a core dump...
- */
-int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fp)
-{
-       struct thread_info *thread = current_thread_info();
-       int used_math = thread->used_cp[1] | thread->used_cp[2];
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       if (used_math)
-               memcpy(fp, &thread->fpstate, sizeof(*fp));
-#endif
-       return used_math != 0;
-}
-EXPORT_SYMBOL(dump_fpu);
-
-unsigned long get_wchan(struct task_struct *p)
-{
-       struct stackframe frame;
-       int count = 0;
-       if (!p || p == current || p->state == TASK_RUNNING)
-               return 0;
-
-       frame.fp = thread_saved_fp(p);
-       frame.sp = thread_saved_sp(p);
-       frame.lr = 0;                   /* recovered from the stack */
-       frame.pc = thread_saved_pc(p);
-       do {
-               int ret = unwind_frame(&frame);
-               if (ret < 0)
-                       return 0;
-               if (!in_sched_functions(frame.pc))
-                       return frame.pc;
-       } while ((count++) < 16);
-       return 0;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       return randomize_page(mm->brk, 0x02000000);
-}
-
-/*
- * The vectors page is always readable from user space for the
- * atomic helpers and the signal restart code.  Let's declare a mapping
- * for it so it is visible through ptrace and /proc/<pid>/mem.
- */
-
-int vectors_user_mapping(void)
-{
-       struct mm_struct *mm = current->mm;
-       return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
-                                      VM_READ | VM_EXEC |
-                                      VM_MAYREAD | VM_MAYEXEC |
-                                      VM_DONTEXPAND | VM_DONTDUMP,
-                                      NULL);
-}
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
-       return (vma->vm_start == 0xffff0000) ? "[vectors]" : NULL;
-}
diff --git a/arch/unicore32/kernel/ptrace.c b/arch/unicore32/kernel/ptrace.c
deleted file mode 100644 (file)
index 0f21656..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/ptrace.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * By Ross Biro 1/23/92
- */
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/signal.h>
-#include <linux/uaccess.h>
-#include <linux/sched/task_stack.h>
-
-/*
- * this routine will get a word off of the processes privileged stack.
- * the offset is how far from the base addr as stored in the THREAD.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline long get_user_reg(struct task_struct *task, int offset)
-{
-       return task_pt_regs(task)->uregs[offset];
-}
-
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the THREAD.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int
-put_user_reg(struct task_struct *task, int offset, long data)
-{
-       struct pt_regs newregs, *regs = task_pt_regs(task);
-       int ret = -EINVAL;
-
-       newregs = *regs;
-       newregs.uregs[offset] = data;
-
-       if (valid_user_regs(&newregs)) {
-               regs->uregs[offset] = data;
-               ret = 0;
-       }
-
-       return ret;
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- */
-void ptrace_disable(struct task_struct *child)
-{
-}
-
-/*
- * We actually access the pt_regs stored on the kernel stack.
- */
-static int ptrace_read_user(struct task_struct *tsk, unsigned long off,
-                           unsigned long __user *ret)
-{
-       unsigned long tmp;
-
-       tmp = 0;
-       if (off < sizeof(struct pt_regs))
-               tmp = get_user_reg(tsk, off >> 2);
-
-       return put_user(tmp, ret);
-}
-
-/*
- * We actually access the pt_regs stored on the kernel stack.
- */
-static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
-                            unsigned long val)
-{
-       if (off >= sizeof(struct pt_regs))
-               return 0;
-
-       return put_user_reg(tsk, off >> 2, val);
-}
-
-long arch_ptrace(struct task_struct *child, long request,
-                unsigned long addr, unsigned long data)
-{
-       int ret;
-       unsigned long __user *datap = (unsigned long __user *) data;
-
-       switch (request) {
-       case PTRACE_PEEKUSR:
-               ret = ptrace_read_user(child, addr, datap);
-               break;
-
-       case PTRACE_POKEUSR:
-               ret = ptrace_write_user(child, addr, data);
-               break;
-
-       case PTRACE_GET_THREAD_AREA:
-               ret = put_user(task_pt_regs(child)->UCreg_16,
-                              datap);
-               break;
-
-       default:
-               ret = ptrace_request(child, request, addr, data);
-               break;
-       }
-
-       return ret;
-}
-
-asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
-{
-       unsigned long ip;
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE))
-               return scno;
-       if (!(current->ptrace & PT_PTRACED))
-               return scno;
-
-       /*
-        * Save IP.  IP is used to denote syscall entry/exit:
-        *  IP = 0 -> entry, = 1 -> exit
-        */
-       ip = regs->UCreg_ip;
-       regs->UCreg_ip = why;
-
-       current_thread_info()->syscall = scno;
-
-       /* the 0x80 provides a way for the tracing parent to distinguish
-          between a syscall stop and SIGTRAP delivery */
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-                                ? 0x80 : 0));
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
-       regs->UCreg_ip = ip;
-
-       return current_thread_info()->syscall;
-}
diff --git a/arch/unicore32/kernel/puv3-core.c b/arch/unicore32/kernel/puv3-core.c
deleted file mode 100644 (file)
index 78f12e6..0000000
+++ /dev/null
@@ -1,276 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/arch/unicore32/kernel/puv3-core.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/amba/bus.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/cnt32_to_63.h>
-#include <linux/usb/musb.h>
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <mach/pm.h>
-
-/*
- * This is the PKUnity sched_clock implementation.  This has
- * a resolution of 271ns, and a maximum value of 32025597s (370 days).
- *
- * The return value is guaranteed to be monotonic in that range as
- * long as there is always less than 582 seconds between successive
- * calls to this function.
- *
- *  ( * 1E9 / CLOCK_TICK_RATE ) -> about 2235/32
- */
-unsigned long long sched_clock(void)
-{
-       unsigned long long v = cnt32_to_63(readl(OST_OSCR));
-
-       /* original conservative method, but overflow frequently
-        * v *= NSEC_PER_SEC >> 12;
-        * do_div(v, CLOCK_TICK_RATE >> 12);
-        */
-       v = ((v & 0x7fffffffffffffffULL) * 2235) >> 5;
-
-       return v;
-}
-
-static struct resource puv3_usb_resources[] = {
-       /* order is significant! */
-       {
-               .start          = io_v2p(PKUNITY_USB_BASE),
-               .end            = io_v2p(PKUNITY_USB_BASE) + 0x3ff,
-               .flags          = IORESOURCE_MEM,
-       }, {
-               .start          = IRQ_USB,
-               .flags          = IORESOURCE_IRQ,
-       }, {
-               .start          = IRQ_USB,
-               .flags          = IORESOURCE_IRQ,
-       },
-};
-
-static struct musb_hdrc_config puv3_usb_config[] = {
-       {
-               .num_eps = 16,
-               .multipoint = 1,
-#ifdef CONFIG_USB_INVENTRA_DMA
-               .dma = 1,
-               .dma_channels = 8,
-#endif
-       },
-};
-
-static struct musb_hdrc_platform_data puv3_usb_plat = {
-       .mode           = MUSB_HOST,
-       .min_power      = 100,
-       .clock          = 0,
-       .config         = puv3_usb_config,
-};
-
-static struct resource puv3_mmc_resources[] = {
-       [0] = {
-               .start  = io_v2p(PKUNITY_SDC_BASE),
-               .end    = io_v2p(PKUNITY_SDC_BASE) + 0xfff,
-               .flags  = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start  = IRQ_SDC,
-               .end    = IRQ_SDC,
-               .flags  = IORESOURCE_IRQ,
-       },
-};
-
-static struct resource puv3_unigfx_resources[] = {
-       [0] = {
-               .start  = io_v2p(PKUNITY_UNIGFX_BASE),
-               .end    = io_v2p(PKUNITY_UNIGFX_BASE) + 0xfff,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static struct resource puv3_rtc_resources[] = {
-       [0] = {
-               .start = io_v2p(PKUNITY_RTC_BASE),
-               .end   = io_v2p(PKUNITY_RTC_BASE) + 0xff,
-               .flags = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start = IRQ_RTCAlarm,
-               .end   = IRQ_RTCAlarm,
-               .flags = IORESOURCE_IRQ,
-       },
-       [2] = {
-               .start = IRQ_RTC,
-               .end   = IRQ_RTC,
-               .flags = IORESOURCE_IRQ
-       }
-};
-
-static struct resource puv3_pwm_resources[] = {
-       [0] = {
-               .start  = io_v2p(PKUNITY_OST_BASE) + 0x80,
-               .end    = io_v2p(PKUNITY_OST_BASE) + 0xff,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static struct resource puv3_uart0_resources[] = {
-       [0] = {
-               .start = io_v2p(PKUNITY_UART0_BASE),
-               .end   = io_v2p(PKUNITY_UART0_BASE) + 0xff,
-               .flags = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start = IRQ_UART0,
-               .end   = IRQ_UART0,
-               .flags = IORESOURCE_IRQ
-       }
-};
-
-static struct resource puv3_uart1_resources[] = {
-       [0] = {
-               .start = io_v2p(PKUNITY_UART1_BASE),
-               .end   = io_v2p(PKUNITY_UART1_BASE) + 0xff,
-               .flags = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start = IRQ_UART1,
-               .end   = IRQ_UART1,
-               .flags = IORESOURCE_IRQ
-       }
-};
-
-static struct resource puv3_umal_resources[] = {
-       [0] = {
-               .start = io_v2p(PKUNITY_UMAL_BASE),
-               .end   = io_v2p(PKUNITY_UMAL_BASE) + 0x1fff,
-               .flags = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start = IRQ_UMAL,
-               .end   = IRQ_UMAL,
-               .flags = IORESOURCE_IRQ
-       }
-};
-
-#ifdef CONFIG_PUV3_PM
-
-#define SAVE(x)                sleep_save[SLEEP_SAVE_##x] = x
-#define RESTORE(x)     x = sleep_save[SLEEP_SAVE_##x]
-
-/*
- * List of global PXA peripheral registers to preserve.
- * More ones like CP and general purpose register values are preserved
- * with the stack pointer in sleep.S.
- */
-enum {
-       SLEEP_SAVE_PM_PLLDDRCFG,
-       SLEEP_SAVE_COUNT
-};
-
-
-static void puv3_cpu_pm_save(unsigned long *sleep_save)
-{
-/*     SAVE(PM_PLLDDRCFG); */
-}
-
-static void puv3_cpu_pm_restore(unsigned long *sleep_save)
-{
-/*     RESTORE(PM_PLLDDRCFG); */
-}
-
-static int puv3_cpu_pm_prepare(void)
-{
-       /* set resume return address */
-       writel(virt_to_phys(puv3_cpu_resume), PM_DIVCFG);
-       return 0;
-}
-
-static void puv3_cpu_pm_enter(suspend_state_t state)
-{
-       /* Clear reset status */
-       writel(RESETC_RSSR_HWR | RESETC_RSSR_WDR
-                       | RESETC_RSSR_SMR | RESETC_RSSR_SWR, RESETC_RSSR);
-
-       switch (state) {
-/*     case PM_SUSPEND_ON:
-               puv3_cpu_idle();
-               break; */
-       case PM_SUSPEND_MEM:
-               puv3_cpu_pm_prepare();
-               puv3_cpu_suspend(PM_PMCR_SFB);
-               break;
-       }
-}
-
-static int puv3_cpu_pm_valid(suspend_state_t state)
-{
-       return state == PM_SUSPEND_MEM;
-}
-
-static void puv3_cpu_pm_finish(void)
-{
-       /* ensure not to come back here if it wasn't intended */
-       /* PSPR = 0; */
-}
-
-static struct puv3_cpu_pm_fns puv3_cpu_pm_fnss = {
-       .save_count     = SLEEP_SAVE_COUNT,
-       .valid          = puv3_cpu_pm_valid,
-       .save           = puv3_cpu_pm_save,
-       .restore        = puv3_cpu_pm_restore,
-       .enter          = puv3_cpu_pm_enter,
-       .prepare        = puv3_cpu_pm_prepare,
-       .finish         = puv3_cpu_pm_finish,
-};
-
-static void __init puv3_init_pm(void)
-{
-       puv3_cpu_pm_fns = &puv3_cpu_pm_fnss;
-}
-#else
-static inline void puv3_init_pm(void) {}
-#endif
-
-void puv3_ps2_init(void)
-{
-       struct clk *bclk32;
-
-       bclk32 = clk_get(NULL, "BUS32_CLK");
-       writel(clk_get_rate(bclk32) / 200000, PS2_CNT); /* should > 5us */
-}
-
-void __init puv3_core_init(void)
-{
-       puv3_init_pm();
-       puv3_ps2_init();
-
-       platform_device_register_simple("PKUnity-v3-RTC", -1,
-                       puv3_rtc_resources, ARRAY_SIZE(puv3_rtc_resources));
-       platform_device_register_simple("PKUnity-v3-UMAL", -1,
-                       puv3_umal_resources, ARRAY_SIZE(puv3_umal_resources));
-       platform_device_register_simple("PKUnity-v3-MMC", -1,
-                       puv3_mmc_resources, ARRAY_SIZE(puv3_mmc_resources));
-       platform_device_register_simple("PKUnity-v3-UNIGFX", -1,
-                       puv3_unigfx_resources, ARRAY_SIZE(puv3_unigfx_resources));
-       platform_device_register_simple("PKUnity-v3-PWM", -1,
-                       puv3_pwm_resources, ARRAY_SIZE(puv3_pwm_resources));
-       platform_device_register_simple("PKUnity-v3-UART", 0,
-                       puv3_uart0_resources, ARRAY_SIZE(puv3_uart0_resources));
-       platform_device_register_simple("PKUnity-v3-UART", 1,
-                       puv3_uart1_resources, ARRAY_SIZE(puv3_uart1_resources));
-       platform_device_register_simple("PKUnity-v3-AC97", -1, NULL, 0);
-       platform_device_register_resndata(NULL, "musb_hdrc", -1,
-                       puv3_usb_resources, ARRAY_SIZE(puv3_usb_resources),
-                       &puv3_usb_plat, sizeof(puv3_usb_plat));
-}
-
diff --git a/arch/unicore32/kernel/puv3-nb0916.c b/arch/unicore32/kernel/puv3-nb0916.c
deleted file mode 100644 (file)
index e251f50..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/puv3-nb0916.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/physmap.h>
-#include <linux/io.h>
-#include <linux/reboot.h>
-#include <linux/interrupt.h>
-#include <linux/i2c.h>
-#include <linux/pwm.h>
-#include <linux/pwm_backlight.h>
-#include <linux/gpio.h>
-#include <linux/gpio_keys.h>
-#include <linux/input.h>
-
-#include <mach/hardware.h>
-
-static struct physmap_flash_data physmap_flash_data = {
-       .width          = 1,
-};
-
-static struct resource physmap_flash_resource = {
-       .start          = 0xFFF80000,
-       .end            = 0xFFFFFFFF,
-       .flags          = IORESOURCE_MEM,
-};
-
-static struct resource puv3_i2c_resources[] = {
-       [0] = {
-               .start = io_v2p(PKUNITY_I2C_BASE),
-               .end   = io_v2p(PKUNITY_I2C_BASE) + 0xff,
-               .flags = IORESOURCE_MEM,
-       },
-       [1] = {
-               .start = IRQ_I2C,
-               .end   = IRQ_I2C,
-               .flags = IORESOURCE_IRQ,
-       }
-};
-
-static struct pwm_lookup nb0916_pwm_lookup[] = {
-       PWM_LOOKUP("PKUnity-v3-PWM", 0, "pwm-backlight", NULL, 70 * 1024,
-                  PWM_POLARITY_NORMAL),
-};
-
-static struct platform_pwm_backlight_data nb0916_backlight_data = {
-       .max_brightness = 100,
-       .dft_brightness = 100,
-};
-
-static struct gpio_keys_button nb0916_gpio_keys[] = {
-       {
-               .type   = EV_KEY,
-               .code   = KEY_POWER,
-               .gpio   = GPI_SOFF_REQ,
-               .desc   = "Power Button",
-               .wakeup = 1,
-               .active_low = 1,
-       },
-       {
-               .type   = EV_KEY,
-               .code   = BTN_TOUCH,
-               .gpio   = GPI_BTN_TOUCH,
-               .desc   = "Touchpad Button",
-               .wakeup = 1,
-               .active_low = 1,
-       },
-};
-
-static struct gpio_keys_platform_data nb0916_gpio_button_data = {
-       .buttons        = nb0916_gpio_keys,
-       .nbuttons       = ARRAY_SIZE(nb0916_gpio_keys),
-};
-
-static irqreturn_t nb0916_lcdcaseoff_handler(int irq, void *dev_id)
-{
-       if (gpio_get_value(GPI_LCD_CASE_OFF))
-               gpio_set_value(GPO_LCD_EN, 1);
-       else
-               gpio_set_value(GPO_LCD_EN, 0);
-
-       return IRQ_HANDLED;
-}
-
-static irqreturn_t nb0916_overheat_handler(int irq, void *dev_id)
-{
-       machine_halt();
-       /* SYSTEM HALT, NO RETURN */
-       return IRQ_HANDLED;
-}
-
-static struct i2c_board_info __initdata puv3_i2c_devices[] = {
-       {       I2C_BOARD_INFO("lm75",          I2C_TAR_THERMAL),       },
-       {       I2C_BOARD_INFO("bq27200",       I2C_TAR_PWIC),          },
-       {       I2C_BOARD_INFO("24c02",         I2C_TAR_EEPROM),        },
-};
-
-int __init mach_nb0916_init(void)
-{
-       i2c_register_board_info(0, puv3_i2c_devices,
-                       ARRAY_SIZE(puv3_i2c_devices));
-
-       platform_device_register_simple("PKUnity-v3-I2C", -1,
-                       puv3_i2c_resources, ARRAY_SIZE(puv3_i2c_resources));
-
-       pwm_add_table(nb0916_pwm_lookup, ARRAY_SIZE(nb0916_pwm_lookup));
-
-       platform_device_register_data(NULL, "pwm-backlight", -1,
-                       &nb0916_backlight_data, sizeof(nb0916_backlight_data));
-
-       platform_device_register_data(NULL, "gpio-keys", -1,
-                       &nb0916_gpio_button_data, sizeof(nb0916_gpio_button_data));
-
-       platform_device_register_resndata(NULL, "physmap-flash", -1,
-                       &physmap_flash_resource, 1,
-                       &physmap_flash_data, sizeof(physmap_flash_data));
-
-       if (request_irq(gpio_to_irq(GPI_LCD_CASE_OFF),
-               &nb0916_lcdcaseoff_handler,
-               IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-               "NB0916 lcd case off", NULL) < 0) {
-
-               printk(KERN_DEBUG "LCD-Case-OFF IRQ %d not available\n",
-                       gpio_to_irq(GPI_LCD_CASE_OFF));
-       }
-
-       if (request_irq(gpio_to_irq(GPI_OTP_INT), &nb0916_overheat_handler,
-               IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-               "NB0916 overheating protection", NULL) < 0) {
-
-               printk(KERN_DEBUG "Overheating Protection IRQ %d not available\n",
-                       gpio_to_irq(GPI_OTP_INT));
-       }
-
-       return 0;
-}
-
-subsys_initcall_sync(mach_nb0916_init);
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
deleted file mode 100644 (file)
index 0c4242a..0000000
+++ /dev/null
@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/setup.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/utsname.h>
-#include <linux/initrd.h>
-#include <linux/console.h>
-#include <linux/memblock.h>
-#include <linux/seq_file.h>
-#include <linux/screen_info.h>
-#include <linux/init.h>
-#include <linux/root_dev.h>
-#include <linux/cpu.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-#include <linux/fs.h>
-#include <linux/proc_fs.h>
-#include <linux/elf.h>
-#include <linux/io.h>
-
-#include <asm/cputype.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/traps.h>
-#include <asm/memblock.h>
-
-#include "setup.h"
-
-#ifndef MEM_SIZE
-#define MEM_SIZE       (16*1024*1024)
-#endif
-
-struct stack {
-       u32 irq[3];
-       u32 abt[3];
-       u32 und[3];
-} ____cacheline_aligned;
-
-static struct stack stacks[NR_CPUS];
-
-#ifdef CONFIG_VGA_CONSOLE
-struct screen_info screen_info;
-#endif
-
-char elf_platform[ELF_PLATFORM_SIZE];
-EXPORT_SYMBOL(elf_platform);
-
-static char __initdata cmd_line[COMMAND_LINE_SIZE];
-
-static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
-
-/*
- * Standard memory resources
- */
-static struct resource mem_res[] = {
-       {
-               .name = "Kernel code",
-               .start = 0,
-               .end = 0,
-               .flags = IORESOURCE_SYSTEM_RAM
-       },
-       {
-               .name = "Kernel data",
-               .start = 0,
-               .end = 0,
-               .flags = IORESOURCE_SYSTEM_RAM
-       }
-};
-
-#define kernel_code mem_res[0]
-#define kernel_data mem_res[1]
-
-/*
- * These functions re-use the assembly code in head.S, which
- * already provide the required functionality.
- */
-static void __init setup_processor(void)
-{
-       printk(KERN_DEFAULT "CPU: UniCore-II [%08x] revision %d, cr=%08lx\n",
-              uc32_cpuid, (int)(uc32_cpuid >> 16) & 15, cr_alignment);
-
-       sprintf(init_utsname()->machine, "puv3");
-       sprintf(elf_platform, "ucv2");
-}
-
-/*
- * cpu_init - initialise one CPU.
- *
- * cpu_init sets up the per-CPU stacks.
- */
-void cpu_init(void)
-{
-       unsigned int cpu = smp_processor_id();
-       struct stack *stk = &stacks[cpu];
-
-       /*
-        * setup stacks for re-entrant exception handlers
-        */
-       __asm__ (
-       "mov.a  asr, %1\n\t"
-       "add    sp, %0, %2\n\t"
-       "mov.a  asr, %3\n\t"
-       "add    sp, %0, %4\n\t"
-       "mov.a  asr, %5\n\t"
-       "add    sp, %0, %6\n\t"
-       "mov.a  asr, %7"
-           :
-           : "r" (stk),
-             "r" (PSR_R_BIT | PSR_I_BIT | INTR_MODE),
-             "I" (offsetof(struct stack, irq[0])),
-             "r" (PSR_R_BIT | PSR_I_BIT | ABRT_MODE),
-             "I" (offsetof(struct stack, abt[0])),
-             "r" (PSR_R_BIT | PSR_I_BIT | EXTN_MODE),
-             "I" (offsetof(struct stack, und[0])),
-             "r" (PSR_R_BIT | PSR_I_BIT | PRIV_MODE)
-       : "r30", "cc");
-}
-
-static int __init uc32_add_memory(unsigned long start, unsigned long size)
-{
-       struct membank *bank = &meminfo.bank[meminfo.nr_banks];
-
-       if (meminfo.nr_banks >= NR_BANKS) {
-               printk(KERN_CRIT "NR_BANKS too low, "
-                       "ignoring memory at %#lx\n", start);
-               return -EINVAL;
-       }
-
-       /*
-        * Ensure that start/size are aligned to a page boundary.
-        * Size is appropriately rounded down, start is rounded up.
-        */
-       size -= start & ~PAGE_MASK;
-
-       bank->start = PAGE_ALIGN(start);
-       bank->size  = size & PAGE_MASK;
-
-       /*
-        * Check whether this memory region has non-zero size or
-        * invalid node number.
-        */
-       if (bank->size == 0)
-               return -EINVAL;
-
-       meminfo.nr_banks++;
-       return 0;
-}
-
-/*
- * Pick out the memory size.  We look for mem=size@start,
- * where start and size are "size[KkMm]"
- */
-static int __init early_mem(char *p)
-{
-       static int usermem __initdata = 1;
-       unsigned long size, start;
-       char *endp;
-
-       /*
-        * If the user specifies memory size, we
-        * blow away any automatically generated
-        * size.
-        */
-       if (usermem) {
-               usermem = 0;
-               meminfo.nr_banks = 0;
-       }
-
-       start = PHYS_OFFSET;
-       size  = memparse(p, &endp);
-       if (*endp == '@')
-               start = memparse(endp + 1, NULL);
-
-       uc32_add_memory(start, size);
-
-       return 0;
-}
-early_param("mem", early_mem);
-
-static void __init
-request_standard_resources(struct meminfo *mi)
-{
-       struct resource *res;
-       int i;
-
-       kernel_code.start   = virt_to_phys(_stext);
-       kernel_code.end     = virt_to_phys(_etext - 1);
-       kernel_data.start   = virt_to_phys(_sdata);
-       kernel_data.end     = virt_to_phys(_end - 1);
-
-       for (i = 0; i < mi->nr_banks; i++) {
-               if (mi->bank[i].size == 0)
-                       continue;
-
-               res = memblock_alloc_low(sizeof(*res), SMP_CACHE_BYTES);
-               if (!res)
-                       panic("%s: Failed to allocate %zu bytes align=%x\n",
-                             __func__, sizeof(*res), SMP_CACHE_BYTES);
-
-               res->name  = "System RAM";
-               res->start = mi->bank[i].start;
-               res->end   = mi->bank[i].start + mi->bank[i].size - 1;
-               res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
-
-               request_resource(&iomem_resource, res);
-
-               if (kernel_code.start >= res->start &&
-                   kernel_code.end <= res->end)
-                       request_resource(res, &kernel_code);
-               if (kernel_data.start >= res->start &&
-                   kernel_data.end <= res->end)
-                       request_resource(res, &kernel_data);
-       }
-}
-
-static void (*init_machine)(void) __initdata;
-
-static int __init customize_machine(void)
-{
-       /* customizes platform devices, or adds new ones */
-       if (init_machine)
-               init_machine();
-       return 0;
-}
-arch_initcall(customize_machine);
-
-void __init setup_arch(char **cmdline_p)
-{
-       char *from = default_command_line;
-
-       setup_processor();
-
-       init_mm.start_code = (unsigned long) _stext;
-       init_mm.end_code   = (unsigned long) _etext;
-       init_mm.end_data   = (unsigned long) _edata;
-       init_mm.brk        = (unsigned long) _end;
-
-       /* parse_early_param needs a boot_command_line */
-       strlcpy(boot_command_line, from, COMMAND_LINE_SIZE);
-
-       /* populate cmd_line too for later use, preserving boot_command_line */
-       strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
-       *cmdline_p = cmd_line;
-
-       parse_early_param();
-
-       uc32_memblock_init(&meminfo);
-
-       paging_init();
-       request_standard_resources(&meminfo);
-
-       cpu_init();
-
-       /*
-        * Set up various architecture-specific pointers
-        */
-       init_machine = puv3_core_init;
-
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
-       conswitchp = &vga_con;
-#endif
-#endif
-       early_trap_init();
-}
-
-static struct cpu cpuinfo_unicore;
-
-static int __init topology_init(void)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               register_cpu(&cpuinfo_unicore, i);
-
-       return 0;
-}
-subsys_initcall(topology_init);
-
-#ifdef CONFIG_HAVE_PROC_CPU
-static int __init proc_cpu_init(void)
-{
-       struct proc_dir_entry *res;
-
-       res = proc_mkdir("cpu", NULL);
-       if (!res)
-               return -ENOMEM;
-       return 0;
-}
-fs_initcall(proc_cpu_init);
-#endif
-
-static int c_show(struct seq_file *m, void *v)
-{
-       seq_printf(m, "Processor\t: UniCore-II rev %d (%s)\n",
-                  (int)(uc32_cpuid >> 16) & 15, elf_platform);
-
-       seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
-                  loops_per_jiffy / (500000/HZ),
-                  (loops_per_jiffy / (5000/HZ)) % 100);
-
-       /* dump out the processor features */
-       seq_puts(m, "Features\t: CMOV UC-F64");
-
-       seq_printf(m, "\nCPU implementer\t: 0x%02x\n", uc32_cpuid >> 24);
-       seq_printf(m, "CPU architecture: 2\n");
-       seq_printf(m, "CPU revision\t: %d\n", (uc32_cpuid >> 16) & 15);
-
-       seq_printf(m, "Cache type\t: write-back\n"
-                       "Cache clean\t: cp0 c5 ops\n"
-                       "Cache lockdown\t: not support\n"
-                       "Cache format\t: Harvard\n");
-
-       seq_puts(m, "\n");
-
-       seq_printf(m, "Hardware\t: PKUnity v3\n");
-
-       return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-       return *pos < 1 ? (void *)1 : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       ++*pos;
-       return NULL;
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-       .start  = c_start,
-       .next   = c_next,
-       .stop   = c_stop,
-       .show   = c_show
-};
diff --git a/arch/unicore32/kernel/setup.h b/arch/unicore32/kernel/setup.h
deleted file mode 100644 (file)
index 9673523..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/setup.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#ifndef __UNICORE_KERNEL_SETUP_H__
-#define __UNICORE_KERNEL_SETUP_H__
-
-#include <asm/hwdef-copro.h>
-
-extern void paging_init(void);
-extern void puv3_core_init(void);
-extern void cpu_init(void);
-
-extern void puv3_ps2_init(void);
-extern void pci_puv3_preinit(void);
-extern void __init puv3_init_gpio(void);
-
-extern void setup_mm_for_reboot(void);
-
-extern char __stubs_start[], __stubs_end[];
-extern char __vectors_start[], __vectors_end[];
-
-extern void kernel_thread_helper(void);
-
-extern void __init early_signal_init(void);
-
-extern asmlinkage void __backtrace(void);
-extern asmlinkage void c_backtrace(unsigned long fp, const char *loglvl);
-
-extern void __show_regs(struct pt_regs *);
-
-#endif
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
deleted file mode 100644 (file)
index 3946182..0000000
+++ /dev/null
@@ -1,424 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/signal.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/uaccess.h>
-#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/unistd.h>
-
-#include <asm/cacheflush.h>
-#include <asm/ucontext.h>
-
-/*
- * For UniCore syscalls, we encode the syscall number into the instruction.
- */
-#define SWI_SYS_SIGRETURN      (0xff000000) /* error number for new abi */
-#define SWI_SYS_RT_SIGRETURN   (0xff000000 | (__NR_rt_sigreturn))
-#define SWI_SYS_RESTART                (0xff000000 | (__NR_restart_syscall))
-
-#define KERN_SIGRETURN_CODE    (KUSER_VECPAGE_BASE + 0x00000500)
-#define KERN_RESTART_CODE      (KERN_SIGRETURN_CODE + sizeof(sigreturn_codes))
-
-const unsigned long sigreturn_codes[3] = {
-       SWI_SYS_SIGRETURN, SWI_SYS_RT_SIGRETURN,
-};
-
-const unsigned long syscall_restart_code[2] = {
-       SWI_SYS_RESTART,        /* swi  __NR_restart_syscall */
-       0x69efc004,             /* ldr  pc, [sp], #4 */
-};
-
-/*
- * Do a signal return; undo the signal stack.  These are aligned to 64-bit.
- */
-struct sigframe {
-       struct ucontext uc;
-       unsigned long retcode[2];
-};
-
-struct rt_sigframe {
-       struct siginfo info;
-       struct sigframe sig;
-};
-
-static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
-{
-       sigset_t set;
-       int err;
-
-       err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
-       if (err == 0)
-               set_current_blocked(&set);
-
-       err |= __get_user(regs->UCreg_00, &sf->uc.uc_mcontext.regs.UCreg_00);
-       err |= __get_user(regs->UCreg_01, &sf->uc.uc_mcontext.regs.UCreg_01);
-       err |= __get_user(regs->UCreg_02, &sf->uc.uc_mcontext.regs.UCreg_02);
-       err |= __get_user(regs->UCreg_03, &sf->uc.uc_mcontext.regs.UCreg_03);
-       err |= __get_user(regs->UCreg_04, &sf->uc.uc_mcontext.regs.UCreg_04);
-       err |= __get_user(regs->UCreg_05, &sf->uc.uc_mcontext.regs.UCreg_05);
-       err |= __get_user(regs->UCreg_06, &sf->uc.uc_mcontext.regs.UCreg_06);
-       err |= __get_user(regs->UCreg_07, &sf->uc.uc_mcontext.regs.UCreg_07);
-       err |= __get_user(regs->UCreg_08, &sf->uc.uc_mcontext.regs.UCreg_08);
-       err |= __get_user(regs->UCreg_09, &sf->uc.uc_mcontext.regs.UCreg_09);
-       err |= __get_user(regs->UCreg_10, &sf->uc.uc_mcontext.regs.UCreg_10);
-       err |= __get_user(regs->UCreg_11, &sf->uc.uc_mcontext.regs.UCreg_11);
-       err |= __get_user(regs->UCreg_12, &sf->uc.uc_mcontext.regs.UCreg_12);
-       err |= __get_user(regs->UCreg_13, &sf->uc.uc_mcontext.regs.UCreg_13);
-       err |= __get_user(regs->UCreg_14, &sf->uc.uc_mcontext.regs.UCreg_14);
-       err |= __get_user(regs->UCreg_15, &sf->uc.uc_mcontext.regs.UCreg_15);
-       err |= __get_user(regs->UCreg_16, &sf->uc.uc_mcontext.regs.UCreg_16);
-       err |= __get_user(regs->UCreg_17, &sf->uc.uc_mcontext.regs.UCreg_17);
-       err |= __get_user(regs->UCreg_18, &sf->uc.uc_mcontext.regs.UCreg_18);
-       err |= __get_user(regs->UCreg_19, &sf->uc.uc_mcontext.regs.UCreg_19);
-       err |= __get_user(regs->UCreg_20, &sf->uc.uc_mcontext.regs.UCreg_20);
-       err |= __get_user(regs->UCreg_21, &sf->uc.uc_mcontext.regs.UCreg_21);
-       err |= __get_user(regs->UCreg_22, &sf->uc.uc_mcontext.regs.UCreg_22);
-       err |= __get_user(regs->UCreg_23, &sf->uc.uc_mcontext.regs.UCreg_23);
-       err |= __get_user(regs->UCreg_24, &sf->uc.uc_mcontext.regs.UCreg_24);
-       err |= __get_user(regs->UCreg_25, &sf->uc.uc_mcontext.regs.UCreg_25);
-       err |= __get_user(regs->UCreg_26, &sf->uc.uc_mcontext.regs.UCreg_26);
-       err |= __get_user(regs->UCreg_fp, &sf->uc.uc_mcontext.regs.UCreg_fp);
-       err |= __get_user(regs->UCreg_ip, &sf->uc.uc_mcontext.regs.UCreg_ip);
-       err |= __get_user(regs->UCreg_sp, &sf->uc.uc_mcontext.regs.UCreg_sp);
-       err |= __get_user(regs->UCreg_lr, &sf->uc.uc_mcontext.regs.UCreg_lr);
-       err |= __get_user(regs->UCreg_pc, &sf->uc.uc_mcontext.regs.UCreg_pc);
-       err |= __get_user(regs->UCreg_asr, &sf->uc.uc_mcontext.regs.UCreg_asr);
-
-       err |= !valid_user_regs(regs);
-
-       return err;
-}
-
-asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs)
-{
-       struct rt_sigframe __user *frame;
-
-       /* Always make any pending restarted system calls return -EINTR */
-       current->restart_block.fn = do_no_restart_syscall;
-
-       /*
-        * Since we stacked the signal on a 64-bit boundary,
-        * then 'sp' should be word aligned here.  If it's
-        * not, then the user is trying to mess with us.
-        */
-       if (regs->UCreg_sp & 7)
-               goto badframe;
-
-       frame = (struct rt_sigframe __user *)regs->UCreg_sp;
-
-       if (!access_ok(frame, sizeof(*frame)))
-               goto badframe;
-
-       if (restore_sigframe(regs, &frame->sig))
-               goto badframe;
-
-       if (restore_altstack(&frame->sig.uc.uc_stack))
-               goto badframe;
-
-       return regs->UCreg_00;
-
-badframe:
-       force_sig(SIGSEGV);
-       return 0;
-}
-
-static int setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs,
-               sigset_t *set)
-{
-       int err = 0;
-
-       err |= __put_user(regs->UCreg_00, &sf->uc.uc_mcontext.regs.UCreg_00);
-       err |= __put_user(regs->UCreg_01, &sf->uc.uc_mcontext.regs.UCreg_01);
-       err |= __put_user(regs->UCreg_02, &sf->uc.uc_mcontext.regs.UCreg_02);
-       err |= __put_user(regs->UCreg_03, &sf->uc.uc_mcontext.regs.UCreg_03);
-       err |= __put_user(regs->UCreg_04, &sf->uc.uc_mcontext.regs.UCreg_04);
-       err |= __put_user(regs->UCreg_05, &sf->uc.uc_mcontext.regs.UCreg_05);
-       err |= __put_user(regs->UCreg_06, &sf->uc.uc_mcontext.regs.UCreg_06);
-       err |= __put_user(regs->UCreg_07, &sf->uc.uc_mcontext.regs.UCreg_07);
-       err |= __put_user(regs->UCreg_08, &sf->uc.uc_mcontext.regs.UCreg_08);
-       err |= __put_user(regs->UCreg_09, &sf->uc.uc_mcontext.regs.UCreg_09);
-       err |= __put_user(regs->UCreg_10, &sf->uc.uc_mcontext.regs.UCreg_10);
-       err |= __put_user(regs->UCreg_11, &sf->uc.uc_mcontext.regs.UCreg_11);
-       err |= __put_user(regs->UCreg_12, &sf->uc.uc_mcontext.regs.UCreg_12);
-       err |= __put_user(regs->UCreg_13, &sf->uc.uc_mcontext.regs.UCreg_13);
-       err |= __put_user(regs->UCreg_14, &sf->uc.uc_mcontext.regs.UCreg_14);
-       err |= __put_user(regs->UCreg_15, &sf->uc.uc_mcontext.regs.UCreg_15);
-       err |= __put_user(regs->UCreg_16, &sf->uc.uc_mcontext.regs.UCreg_16);
-       err |= __put_user(regs->UCreg_17, &sf->uc.uc_mcontext.regs.UCreg_17);
-       err |= __put_user(regs->UCreg_18, &sf->uc.uc_mcontext.regs.UCreg_18);
-       err |= __put_user(regs->UCreg_19, &sf->uc.uc_mcontext.regs.UCreg_19);
-       err |= __put_user(regs->UCreg_20, &sf->uc.uc_mcontext.regs.UCreg_20);
-       err |= __put_user(regs->UCreg_21, &sf->uc.uc_mcontext.regs.UCreg_21);
-       err |= __put_user(regs->UCreg_22, &sf->uc.uc_mcontext.regs.UCreg_22);
-       err |= __put_user(regs->UCreg_23, &sf->uc.uc_mcontext.regs.UCreg_23);
-       err |= __put_user(regs->UCreg_24, &sf->uc.uc_mcontext.regs.UCreg_24);
-       err |= __put_user(regs->UCreg_25, &sf->uc.uc_mcontext.regs.UCreg_25);
-       err |= __put_user(regs->UCreg_26, &sf->uc.uc_mcontext.regs.UCreg_26);
-       err |= __put_user(regs->UCreg_fp, &sf->uc.uc_mcontext.regs.UCreg_fp);
-       err |= __put_user(regs->UCreg_ip, &sf->uc.uc_mcontext.regs.UCreg_ip);
-       err |= __put_user(regs->UCreg_sp, &sf->uc.uc_mcontext.regs.UCreg_sp);
-       err |= __put_user(regs->UCreg_lr, &sf->uc.uc_mcontext.regs.UCreg_lr);
-       err |= __put_user(regs->UCreg_pc, &sf->uc.uc_mcontext.regs.UCreg_pc);
-       err |= __put_user(regs->UCreg_asr, &sf->uc.uc_mcontext.regs.UCreg_asr);
-
-       err |= __put_user(current->thread.trap_no,
-                       &sf->uc.uc_mcontext.trap_no);
-       err |= __put_user(current->thread.error_code,
-                       &sf->uc.uc_mcontext.error_code);
-       err |= __put_user(current->thread.address,
-                       &sf->uc.uc_mcontext.fault_address);
-       err |= __put_user(set->sig[0], &sf->uc.uc_mcontext.oldmask);
-
-       err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
-
-       return err;
-}
-
-static inline void __user *get_sigframe(struct k_sigaction *ka,
-               struct pt_regs *regs, int framesize)
-{
-       unsigned long sp = regs->UCreg_sp;
-       void __user *frame;
-
-       /*
-        * This is the X/Open sanctioned signal stack switching.
-        */
-       if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
-               sp = current->sas_ss_sp + current->sas_ss_size;
-
-       /*
-        * ATPCS B01 mandates 8-byte alignment
-        */
-       frame = (void __user *)((sp - framesize) & ~7);
-
-       /*
-        * Check that we can actually write to the signal frame.
-        */
-       if (!access_ok(frame, framesize))
-               frame = NULL;
-
-       return frame;
-}
-
-static int setup_return(struct pt_regs *regs, struct k_sigaction *ka,
-            unsigned long __user *rc, void __user *frame, int usig)
-{
-       unsigned long handler = (unsigned long)ka->sa.sa_handler;
-       unsigned long retcode;
-       unsigned long asr = regs->UCreg_asr & ~PSR_f;
-
-       unsigned int idx = 0;
-
-       if (ka->sa.sa_flags & SA_SIGINFO)
-               idx += 1;
-
-       if (__put_user(sigreturn_codes[idx],   rc) ||
-           __put_user(sigreturn_codes[idx+1], rc+1))
-               return 1;
-
-       retcode = KERN_SIGRETURN_CODE + (idx << 2);
-
-       regs->UCreg_00 = usig;
-       regs->UCreg_sp = (unsigned long)frame;
-       regs->UCreg_lr = retcode;
-       regs->UCreg_pc = handler;
-       regs->UCreg_asr = asr;
-
-       return 0;
-}
-
-static int setup_frame(struct ksignal *ksig, sigset_t *set,
-                      struct pt_regs *regs)
-{
-       struct sigframe __user *frame = get_sigframe(&ksig->ka, regs, sizeof(*frame));
-       int err = 0;
-
-       if (!frame)
-               return 1;
-
-       /*
-        * Set uc.uc_flags to a value which sc.trap_no would never have.
-        */
-       err |= __put_user(0x5ac3c35a, &frame->uc.uc_flags);
-
-       err |= setup_sigframe(frame, regs, set);
-       if (err == 0)
-               err |= setup_return(regs, &ksig->ka, frame->retcode, frame,
-                                   ksig->sig);
-
-       return err;
-}
-
-static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
-                         struct pt_regs *regs)
-{
-       struct rt_sigframe __user *frame =
-                       get_sigframe(&ksig->ka, regs, sizeof(*frame));
-       int err = 0;
-
-       if (!frame)
-               return 1;
-
-       err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-
-       err |= __put_user(0, &frame->sig.uc.uc_flags);
-       err |= __put_user(NULL, &frame->sig.uc.uc_link);
-       err |= __save_altstack(&frame->sig.uc.uc_stack, regs->UCreg_sp);
-       err |= setup_sigframe(&frame->sig, regs, set);
-       if (err == 0)
-               err |= setup_return(regs, &ksig->ka, frame->sig.retcode, frame,
-                                   ksig->sig);
-
-       if (err == 0) {
-               /*
-                * For realtime signals we must also set the second and third
-                * arguments for the signal handler.
-                */
-               regs->UCreg_01 = (unsigned long)&frame->info;
-               regs->UCreg_02 = (unsigned long)&frame->sig.uc;
-       }
-
-       return err;
-}
-
-static inline void setup_syscall_restart(struct pt_regs *regs)
-{
-       regs->UCreg_00 = regs->UCreg_ORIG_00;
-       regs->UCreg_pc -= 4;
-}
-
-/*
- * OK, we're invoking a handler
- */
-static void handle_signal(struct ksignal *ksig, struct pt_regs *regs,
-                         int syscall)
-{
-       struct thread_info *thread = current_thread_info();
-       sigset_t *oldset = sigmask_to_save();
-       int usig = ksig->sig;
-       int ret;
-
-       /*
-        * If we were from a system call, check for system call restarting...
-        */
-       if (syscall) {
-               switch (regs->UCreg_00) {
-               case -ERESTART_RESTARTBLOCK:
-               case -ERESTARTNOHAND:
-                       regs->UCreg_00 = -EINTR;
-                       break;
-               case -ERESTARTSYS:
-                       if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
-                               regs->UCreg_00 = -EINTR;
-                               break;
-                       }
-                       /* fallthrough */
-               case -ERESTARTNOINTR:
-                       setup_syscall_restart(regs);
-               }
-       }
-
-       /*
-        * Set up the stack frame
-        */
-       if (ksig->ka.sa.sa_flags & SA_SIGINFO)
-               ret = setup_rt_frame(ksig, oldset, regs);
-       else
-               ret = setup_frame(ksig, oldset, regs);
-
-       /*
-        * Check that the resulting registers are actually sane.
-        */
-       ret |= !valid_user_regs(regs);
-
-       signal_setup_done(ret, ksig, 0);
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
- * Note that we go through the signals twice: once to check the signals that
- * the kernel can handle, and then we build all the user-level signal handling
- * stack-frames in one go after that.
- */
-static void do_signal(struct pt_regs *regs, int syscall)
-{
-       struct ksignal ksig;
-
-       /*
-        * We want the common case to go fast, which
-        * is why we may in certain cases get here from
-        * kernel mode. Just return without doing anything
-        * if so.
-        */
-       if (!user_mode(regs))
-               return;
-
-       if (get_signal(&ksig)) {
-               handle_signal(&ksig, regs, syscall);
-               return;
-       }
-
-       /*
-        * No signal to deliver to the process - restart the syscall.
-        */
-       if (syscall) {
-               if (regs->UCreg_00 == -ERESTART_RESTARTBLOCK) {
-                               u32 __user *usp;
-
-                               regs->UCreg_sp -= 4;
-                               usp = (u32 __user *)regs->UCreg_sp;
-
-                               if (put_user(regs->UCreg_pc, usp) == 0) {
-                                       regs->UCreg_pc = KERN_RESTART_CODE;
-                               } else {
-                                       regs->UCreg_sp += 4;
-                                       force_sigsegv(0);
-                               }
-               }
-               if (regs->UCreg_00 == -ERESTARTNOHAND ||
-                   regs->UCreg_00 == -ERESTARTSYS ||
-                   regs->UCreg_00 == -ERESTARTNOINTR) {
-                       setup_syscall_restart(regs);
-               }
-       }
-       /* If there's no signal to deliver, we just put the saved
-        * sigmask back.
-        */
-       restore_saved_sigmask();
-}
-
-asmlinkage void do_notify_resume(struct pt_regs *regs,
-               unsigned int thread_flags, int syscall)
-{
-       if (thread_flags & _TIF_SIGPENDING)
-               do_signal(regs, syscall);
-
-       if (thread_flags & _TIF_NOTIFY_RESUME) {
-               clear_thread_flag(TIF_NOTIFY_RESUME);
-               tracehook_notify_resume(regs);
-       }
-}
-
-/*
- * Copy signal return handlers into the vector page, and
- * set sigreturn to be a pointer to these.
- */
-void __init early_signal_init(void)
-{
-       memcpy((void *)kuser_vecpage_to_vectors(KERN_SIGRETURN_CODE),
-                       sigreturn_codes, sizeof(sigreturn_codes));
-       memcpy((void *)kuser_vecpage_to_vectors(KERN_RESTART_CODE),
-                       syscall_restart_code, sizeof(syscall_restart_code));
-       /* Need not to flush icache, since early_trap_init will do it last. */
-}
diff --git a/arch/unicore32/kernel/sleep.S b/arch/unicore32/kernel/sleep.S
deleted file mode 100644 (file)
index 23151ab..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/sleep.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-
-               .text
-
-pkunity_cpu_save_cp:
-
-       @ get coprocessor registers
-
-       movc    r3, p0.c7, #0                   @ PID
-       movc    r4, p0.c2, #0                   @ translation table base addr
-       movc    r5, p0.c1, #0                   @ control reg
-
-
-       @ store them plus current virtual stack ptr on stack
-       mov     r6, sp
-       stm.w   (r3 - r6), [sp-]
-
-       mov     pc, lr
-
-pkunity_cpu_save_sp:
-       @ preserve phys address of stack
-       mov     r0, sp
-       stw.w   lr, [sp+], #-4
-       b.l     sleep_phys_sp
-       ldw     r1, =sleep_save_sp
-       stw     r0, [r1]
-       ldw.w   pc, [sp]+, #4
-
-/*
- * puv3_cpu_suspend()
- *
- * Forces CPU into sleep state.
- *
- * r0 = value for PWRMODE M field for desired sleep state
- */
-
-ENTRY(puv3_cpu_suspend)
-       stm.w   (r16 - r27, lr), [sp-]          @ save registers on stack
-       stm.w   (r4 - r15), [sp-]               @ save registers on stack
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       sfm.w   (f0  - f7 ), [sp-]
-       sfm.w   (f8  - f15), [sp-]
-       sfm.w   (f16 - f23), [sp-]
-       sfm.w   (f24 - f31), [sp-]
-       cff     r4, s31
-       stm.w   (r4), [sp-]
-#endif
-       b.l     pkunity_cpu_save_cp
-
-       b.l     pkunity_cpu_save_sp
-
-       @ clean data cache
-       mov     r1, #0
-       movc    p0.c5, r1, #14
-       nop
-       nop
-       nop
-       nop
-
-
-
-       @ DDR2 BaseAddr
-       ldw     r0, =(PKUNITY_DDR2CTRL_BASE)
-
-       @ PM BaseAddr
-       ldw     r1, =(PKUNITY_PM_BASE)
-
-       @ set PLL_SYS_CFG reg, 275
-       movl    r6, #0x00002401
-       stw     r6, [r1+], #0x18
-       @ set PLL_DDR_CFG reg, 66MHz
-       movl    r6, #0x00100c00
-       stw     r6, [r1+], #0x1c
-
-       @ set wake up source
-       movl    r8, #0x800001ff         @ epip4d
-       stw     r8, [r1+], #0xc
-
-       @ set PGSR
-       movl    r5, #0x40000
-       stw     r5, [r1+], #0x10
-
-       @ prepare DDR2 refresh settings
-       ldw     r5, [r0+], #0x24
-       or      r5, r5, #0x00000001
-
-       @ prepare PMCR for PLL changing
-       movl    r6, #0xc
-
-       @ prepare for closing PLL
-       movl    r7, #0x1
-
-       @ prepare sleep mode
-       mov     r8, #0x1
-
-@      movl    r0, 0x11111111
-@      put_word_ocd r0
-       b       pkunity_cpu_do_suspend
-
-       .ltorg
-       .align  5
-pkunity_cpu_do_suspend:
-       b       101f
-       @ put DDR2 into self-refresh
-100:   stw     r5, [r0+], #0x24
-       @ change PLL
-       stw     r6, [r1]
-       b       1f
-
-       .ltorg
-       .align  5
-101:   b       102f
-       @ wait for PLL changing complete
-1:     ldw     r6, [r1+], #0x44
-       csub.a  r6, #0x1
-       bne     1b
-       b       2f
-
-       .ltorg
-       .align  5
-102:   b       100b
-       @ close PLL
-2:     stw     r7, [r1+], #0x4
-       @ enter sleep mode
-       stw     r8, [r1]
-3:     b       3b
-
-
-
-
-/*
- * puv3_cpu_resume()
- *
- * entry point from bootloader into kernel during resume
- *
- * Note: Yes, part of the following code is located into the .data section.
- *       This is to allow sleep_save_sp to be accessed with a relative load
- *       while we can't rely on any MMU translation.  We could have put
- *       sleep_save_sp in the .text section as well, but some setups might
- *       insist on it to be truly read-only.
- */
-
-       .data
-       .align 5
-ENTRY(puv3_cpu_resume)
-@      movl    r0, 0x20202020
-@      put_word_ocd r0
-
-       ldw     r0, sleep_save_sp               @ stack phys addr
-       ldw     r2, =resume_after_mmu           @ its absolute virtual address
-       ldm     (r3 - r6), [r0]+                @ CP regs + virt stack ptr
-       mov     sp, r6                          @ CP regs + virt stack ptr
-
-       mov     r1, #0
-       movc    p0.c6, r1, #6                   @ invalidate I & D TLBs
-       movc    p0.c5, r1, #28                  @ invalidate I & D caches, BTB
-
-       movc    p0.c7, r3, #0                   @ PID
-       movc    p0.c2, r4, #0                   @ translation table base addr
-       movc    p0.c1, r5, #0                   @ control reg, turn on mmu
-       nop
-       jump    r2
-       nop
-       nop
-       nop
-       nop
-       nop
-
-sleep_save_sp:
-       .word   0                               @ preserve stack phys ptr here
-
-       .text
-resume_after_mmu:
-@      movl    r0, 0x30303030
-@      put_word_ocd r0
-
-#ifdef CONFIG_UNICORE_FPU_F64
-       lfm.w   (f0  - f7 ), [sp]+
-       lfm.w   (f8  - f15), [sp]+
-       lfm.w   (f16 - f23), [sp]+
-       lfm.w   (f24 - f31), [sp]+
-       ldm.w   (r4), [sp]+
-       ctf     r4, s31
-#endif
-       ldm.w   (r4 - r15), [sp]+               @ restore registers from stack
-       ldm.w   (r16 - r27, pc), [sp]+          @ return to caller
diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c
deleted file mode 100644 (file)
index c9d8650..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/stacktrace.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-#include <linux/stacktrace.h>
-
-#include <asm/stacktrace.h>
-
-#if defined(CONFIG_FRAME_POINTER)
-/*
- * Unwind the current stack frame and store the new register values in the
- * structure passed as argument. Unwinding is equivalent to a function return,
- * hence the new PC value rather than LR should be used for backtrace.
- *
- * With framepointer enabled, a simple function prologue looks like this:
- *     mov     ip, sp
- *     stmdb   sp!, {fp, ip, lr, pc}
- *     sub     fp, ip, #4
- *
- * A simple function epilogue looks like this:
- *     ldm     sp, {fp, sp, pc}
- *
- * Note that with framepointer enabled, even the leaf functions have the same
- * prologue and epilogue, therefore we can ignore the LR value in this case.
- */
-int notrace unwind_frame(struct stackframe *frame)
-{
-       unsigned long high, low;
-       unsigned long fp = frame->fp;
-
-       /* only go to a higher address on the stack */
-       low = frame->sp;
-       high = ALIGN(low, THREAD_SIZE);
-
-       /* check current frame pointer is within bounds */
-       if (fp < (low + 12) || fp + 4 >= high)
-               return -EINVAL;
-
-       /* restore the registers from the stack frame */
-       frame->fp = *(unsigned long *)(fp - 12);
-       frame->sp = *(unsigned long *)(fp - 8);
-       frame->pc = *(unsigned long *)(fp - 4);
-
-       return 0;
-}
-#endif
-
-void notrace walk_stackframe(struct stackframe *frame,
-                    int (*fn)(struct stackframe *, void *), void *data)
-{
-       while (1) {
-               int ret;
-
-               if (fn(frame, data))
-                       break;
-               ret = unwind_frame(frame);
-               if (ret < 0)
-                       break;
-       }
-}
-EXPORT_SYMBOL(walk_stackframe);
-
-#ifdef CONFIG_STACKTRACE
-struct stack_trace_data {
-       struct stack_trace *trace;
-       unsigned int no_sched_functions;
-       unsigned int skip;
-};
-
-static int save_trace(struct stackframe *frame, void *d)
-{
-       struct stack_trace_data *data = d;
-       struct stack_trace *trace = data->trace;
-       unsigned long addr = frame->pc;
-
-       if (data->no_sched_functions && in_sched_functions(addr))
-               return 0;
-       if (data->skip) {
-               data->skip--;
-               return 0;
-       }
-
-       trace->entries[trace->nr_entries++] = addr;
-
-       return trace->nr_entries >= trace->max_entries;
-}
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-       struct stack_trace_data data;
-       struct stackframe frame;
-
-       data.trace = trace;
-       data.skip = trace->skip;
-
-       if (tsk != current) {
-               data.no_sched_functions = 1;
-               frame.fp = thread_saved_fp(tsk);
-               frame.sp = thread_saved_sp(tsk);
-               frame.lr = 0;           /* recovered from the stack */
-               frame.pc = thread_saved_pc(tsk);
-       } else {
-               register unsigned long current_sp asm("sp");
-
-               data.no_sched_functions = 0;
-               frame.fp = (unsigned long)__builtin_frame_address(0);
-               frame.sp = current_sp;
-               frame.lr = (unsigned long)__builtin_return_address(0);
-               frame.pc = (unsigned long)save_stack_trace_tsk;
-       }
-
-       walk_stackframe(&frame, save_trace, &data);
-}
-
-void save_stack_trace(struct stack_trace *trace)
-{
-       save_stack_trace_tsk(current, trace);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-#endif
diff --git a/arch/unicore32/kernel/sys.c b/arch/unicore32/kernel/sys.c
deleted file mode 100644 (file)
index 256fb40..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/sys.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/ipc.h>
-#include <linux/uaccess.h>
-
-#include <asm/syscalls.h>
-#include <asm/cacheflush.h>
-
-/* Provide the actual syscall number to call mapping. */
-#undef __SYSCALL
-#define __SYSCALL(nr, call)    [nr] = (call),
-
-#define sys_mmap2 sys_mmap_pgoff
-/* Note that we don't include <linux/unistd.h> but <asm/unistd.h> */
-void *sys_call_table[__NR_syscalls] = {
-       [0 ... __NR_syscalls-1] = sys_ni_syscall,
-#include <asm/unistd.h>
-};
diff --git a/arch/unicore32/kernel/time.c b/arch/unicore32/kernel/time.c
deleted file mode 100644 (file)
index c3a37ed..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/time.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/timex.h>
-#include <linux/clockchips.h>
-
-#include <mach/hardware.h>
-
-#define MIN_OSCR_DELTA 2
-
-static irqreturn_t puv3_ost0_interrupt(int irq, void *dev_id)
-{
-       struct clock_event_device *c = dev_id;
-
-       /* Disarm the compare/match, signal the event. */
-       writel(readl(OST_OIER) & ~OST_OIER_E0, OST_OIER);
-       writel(readl(OST_OSSR) & ~OST_OSSR_M0, OST_OSSR);
-       c->event_handler(c);
-
-       return IRQ_HANDLED;
-}
-
-static int
-puv3_osmr0_set_next_event(unsigned long delta, struct clock_event_device *c)
-{
-       unsigned long next, oscr;
-
-       writel(readl(OST_OIER) | OST_OIER_E0, OST_OIER);
-       next = readl(OST_OSCR) + delta;
-       writel(next, OST_OSMR0);
-       oscr = readl(OST_OSCR);
-
-       return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0;
-}
-
-static int puv3_osmr0_shutdown(struct clock_event_device *evt)
-{
-       writel(readl(OST_OIER) & ~OST_OIER_E0, OST_OIER);
-       writel(readl(OST_OSSR) & ~OST_OSSR_M0, OST_OSSR);
-       return 0;
-}
-
-static struct clock_event_device ckevt_puv3_osmr0 = {
-       .name                   = "osmr0",
-       .features               = CLOCK_EVT_FEAT_ONESHOT,
-       .rating                 = 200,
-       .set_next_event         = puv3_osmr0_set_next_event,
-       .set_state_shutdown     = puv3_osmr0_shutdown,
-       .set_state_oneshot      = puv3_osmr0_shutdown,
-};
-
-static u64 puv3_read_oscr(struct clocksource *cs)
-{
-       return readl(OST_OSCR);
-}
-
-static struct clocksource cksrc_puv3_oscr = {
-       .name           = "oscr",
-       .rating         = 200,
-       .read           = puv3_read_oscr,
-       .mask           = CLOCKSOURCE_MASK(32),
-       .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-void __init time_init(void)
-{
-       writel(0, OST_OIER);            /* disable any timer interrupts */
-       writel(0, OST_OSSR);            /* clear status on all timers */
-
-       clockevents_calc_mult_shift(&ckevt_puv3_osmr0, CLOCK_TICK_RATE, 5);
-
-       ckevt_puv3_osmr0.max_delta_ns =
-               clockevent_delta2ns(0x7fffffff, &ckevt_puv3_osmr0);
-       ckevt_puv3_osmr0.max_delta_ticks = 0x7fffffff;
-       ckevt_puv3_osmr0.min_delta_ns =
-               clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_puv3_osmr0) + 1;
-       ckevt_puv3_osmr0.min_delta_ticks = MIN_OSCR_DELTA * 2;
-       ckevt_puv3_osmr0.cpumask = cpumask_of(0);
-
-       if (request_irq(IRQ_TIMER0, puv3_ost0_interrupt,
-                       IRQF_TIMER | IRQF_IRQPOLL, "ost0", &ckevt_puv3_osmr0))
-               pr_err("Failed to register ost0 interrupt\n");
-
-       clocksource_register_hz(&cksrc_puv3_oscr, CLOCK_TICK_RATE);
-       clockevents_register_device(&ckevt_puv3_osmr0);
-}
-
-#ifdef CONFIG_PM
-unsigned long osmr[4], oier;
-
-void puv3_timer_suspend(void)
-{
-       osmr[0] = readl(OST_OSMR0);
-       osmr[1] = readl(OST_OSMR1);
-       osmr[2] = readl(OST_OSMR2);
-       osmr[3] = readl(OST_OSMR3);
-       oier = readl(OST_OIER);
-}
-
-void puv3_timer_resume(void)
-{
-       writel(0, OST_OSSR);
-       writel(osmr[0], OST_OSMR0);
-       writel(osmr[1], OST_OSMR1);
-       writel(osmr[2], OST_OSMR2);
-       writel(osmr[3], OST_OSMR3);
-       writel(oier, OST_OIER);
-
-       /*
-        * OSMR0 is the system timer: make sure OSCR is sufficiently behind
-        */
-       writel(readl(OST_OSMR0) - LATCH, OST_OSCR);
-}
-#else
-void puv3_timer_suspend(void) { };
-void puv3_timer_resume(void) { };
-#endif
-
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
deleted file mode 100644 (file)
index a3ac01d..0000000
+++ /dev/null
@@ -1,322 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/traps.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  'traps.c' handles hardware exceptions after we have saved some state.
- *  Mostly a debugging aid, but will probably kill the offending process.
- */
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/debug.h>
-#include <linux/sched/task_stack.h>
-#include <linux/spinlock.h>
-#include <linux/personality.h>
-#include <linux/kallsyms.h>
-#include <linux/kdebug.h>
-#include <linux/uaccess.h>
-#include <linux/delay.h>
-#include <linux/hardirq.h>
-#include <linux/init.h>
-#include <linux/atomic.h>
-#include <linux/unistd.h>
-
-#include <asm/cacheflush.h>
-#include <asm/traps.h>
-
-#include "setup.h"
-
-static void dump_mem(const char *, const char *, unsigned long, unsigned long);
-
-void dump_backtrace_entry(unsigned long where,
-               unsigned long from, unsigned long frame)
-{
-#ifdef CONFIG_KALLSYMS
-       printk(KERN_DEFAULT "[<%08lx>] (%pS) from [<%08lx>] (%pS)\n",
-                       where, (void *)where, from, (void *)from);
-#else
-       printk(KERN_DEFAULT "Function entered at [<%08lx>] from [<%08lx>]\n",
-                       where, from);
-#endif
-}
-
-/*
- * Stack pointers should always be within the kernels view of
- * physical memory.  If it is not there, then we can't dump
- * out any information relating to the stack.
- */
-static int verify_stack(unsigned long sp)
-{
-       if (sp < PAGE_OFFSET ||
-           (sp > (unsigned long)high_memory && high_memory != NULL))
-               return -EFAULT;
-
-       return 0;
-}
-
-/*
- * Dump out the contents of some memory nicely...
- */
-static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
-                    unsigned long top)
-{
-       unsigned long first;
-       mm_segment_t fs;
-       int i;
-
-       /*
-        * We need to switch to kernel mode so that we can use __get_user
-        * to safely read from kernel space.  Note that we now dump the
-        * code first, just in case the backtrace kills us.
-        */
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-
-       printk(KERN_DEFAULT "%s%s(0x%08lx to 0x%08lx)\n",
-                       lvl, str, bottom, top);
-
-       for (first = bottom & ~31; first < top; first += 32) {
-               unsigned long p;
-               char str[sizeof(" 12345678") * 8 + 1];
-
-               memset(str, ' ', sizeof(str));
-               str[sizeof(str) - 1] = '\0';
-
-               for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
-                       if (p >= bottom && p < top) {
-                               unsigned long val;
-                               if (__get_user(val, (unsigned long *)p) == 0)
-                                       sprintf(str + i * 9, " %08lx", val);
-                               else
-                                       sprintf(str + i * 9, " ????????");
-                       }
-               }
-               printk(KERN_DEFAULT "%s%04lx:%s\n", lvl, first & 0xffff, str);
-       }
-
-       set_fs(fs);
-}
-
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
-       unsigned long addr = instruction_pointer(regs);
-       const int width = 8;
-       mm_segment_t fs;
-       char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
-       int i;
-
-       /*
-        * We need to switch to kernel mode so that we can use __get_user
-        * to safely read from kernel space.  Note that we now dump the
-        * code first, just in case the backtrace kills us.
-        */
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-
-       for (i = -4; i < 1; i++) {
-               unsigned int val, bad;
-
-               bad = __get_user(val, &((u32 *)addr)[i]);
-
-               if (!bad)
-                       p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ",
-                                       width, val);
-               else {
-                       p += sprintf(p, "bad PC value");
-                       break;
-               }
-       }
-       printk(KERN_DEFAULT "%sCode: %s\n", lvl, str);
-
-       set_fs(fs);
-}
-
-static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
-                          const char *loglvl)
-{
-       unsigned int fp;
-       int ok = 1;
-
-       printk("%sBacktrace: ", loglvl);
-
-       if (!tsk)
-               tsk = current;
-
-       if (regs)
-               fp = regs->UCreg_fp;
-       else if (tsk != current)
-               fp = thread_saved_fp(tsk);
-       else
-               asm("mov %0, fp" : "=r" (fp) : : "cc");
-
-       if (!fp) {
-               printk("%sno frame pointer", loglvl);
-               ok = 0;
-       } else if (verify_stack(fp)) {
-               printk("%sinvalid frame pointer 0x%08x", loglvl, fp);
-               ok = 0;
-       } else if (fp < (unsigned long)end_of_stack(tsk))
-               printk("%sframe pointer underflow", loglvl);
-       printk("%s\n", loglvl);
-
-       if (ok)
-               c_backtrace(fp, loglvl);
-}
-
-void show_stack(struct task_struct *tsk, unsigned long *sp,
-                      const char *loglvl)
-{
-       dump_backtrace(NULL, tsk, loglvl);
-       barrier();
-}
-
-static int __die(const char *str, int err, struct thread_info *thread,
-               struct pt_regs *regs)
-{
-       struct task_struct *tsk = thread->task;
-       static int die_counter;
-       int ret;
-
-       printk(KERN_EMERG "Internal error: %s: %x [#%d]\n",
-              str, err, ++die_counter);
-
-       /* trap and error numbers are mostly meaningless on UniCore */
-       ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \
-                       SIGSEGV);
-       if (ret == NOTIFY_STOP)
-               return ret;
-
-       print_modules();
-       __show_regs(regs);
-       printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
-               TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
-
-       if (!user_mode(regs) || in_interrupt()) {
-               dump_mem(KERN_EMERG, "Stack: ", regs->UCreg_sp,
-                        THREAD_SIZE + (unsigned long)task_stack_page(tsk));
-               dump_backtrace(regs, tsk, KERN_EMERG);
-               dump_instr(KERN_EMERG, regs);
-       }
-
-       return ret;
-}
-
-DEFINE_SPINLOCK(die_lock);
-
-/*
- * This function is protected against re-entrancy.
- */
-void die(const char *str, struct pt_regs *regs, int err)
-{
-       struct thread_info *thread = current_thread_info();
-       int ret;
-
-       oops_enter();
-
-       spin_lock_irq(&die_lock);
-       console_verbose();
-       bust_spinlocks(1);
-       ret = __die(str, err, thread, regs);
-
-       bust_spinlocks(0);
-       add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
-       spin_unlock_irq(&die_lock);
-       oops_exit();
-
-       if (in_interrupt())
-               panic("Fatal exception in interrupt");
-       if (panic_on_oops)
-               panic("Fatal exception");
-       if (ret != NOTIFY_STOP)
-               do_exit(SIGSEGV);
-}
-
-void uc32_notify_die(const char *str, struct pt_regs *regs,
-               int sig, int code, void __user *addr,
-               unsigned long err, unsigned long trap)
-{
-       if (user_mode(regs)) {
-               current->thread.error_code = err;
-               current->thread.trap_no = trap;
-
-               force_sig_fault(sig, code, addr);
-       } else
-               die(str, regs, err);
-}
-
-/*
- * bad_mode handles the impossible case in the vectors.  If you see one of
- * these, then it's extremely serious, and could mean you have buggy hardware.
- * It never returns, and never tries to sync.  We hope that we can at least
- * dump out some state information...
- */
-asmlinkage void bad_mode(struct pt_regs *regs, unsigned int reason)
-{
-       console_verbose();
-
-       printk(KERN_CRIT "Bad mode detected with reason 0x%x\n", reason);
-
-       die("Oops - bad mode", regs, 0);
-       local_irq_disable();
-       panic("bad mode");
-}
-
-void __pte_error(const char *file, int line, unsigned long val)
-{
-       printk(KERN_DEFAULT "%s:%d: bad pte %08lx.\n", file, line, val);
-}
-
-void __pmd_error(const char *file, int line, unsigned long val)
-{
-       printk(KERN_DEFAULT "%s:%d: bad pmd %08lx.\n", file, line, val);
-}
-
-void __pgd_error(const char *file, int line, unsigned long val)
-{
-       printk(KERN_DEFAULT "%s:%d: bad pgd %08lx.\n", file, line, val);
-}
-
-asmlinkage void __div0(void)
-{
-       printk(KERN_DEFAULT "Division by zero in kernel.\n");
-       dump_stack();
-}
-EXPORT_SYMBOL(__div0);
-
-void abort(void)
-{
-       BUG();
-
-       /* if that doesn't kill us, halt */
-       panic("Oops failed to kill thread");
-}
-
-void __init trap_init(void)
-{
-       return;
-}
-
-void __init early_trap_init(void)
-{
-       unsigned long vectors = VECTORS_BASE;
-
-       /*
-        * Copy the vectors, stubs (in entry-unicore.S)
-        * into the vector page, mapped at 0xffff0000, and ensure these
-        * are visible to the instruction stream.
-        */
-       memcpy((void *)vectors,
-                       __vectors_start,
-                       __vectors_end - __vectors_start);
-       memcpy((void *)vectors + 0x200,
-                       __stubs_start,
-                       __stubs_end - __stubs_start);
-
-       early_signal_init();
-
-       flush_icache_range(vectors, vectors + PAGE_SIZE);
-}
diff --git a/arch/unicore32/kernel/vmlinux.lds.S b/arch/unicore32/kernel/vmlinux.lds.S
deleted file mode 100644 (file)
index 6fb320b..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/vmlinux.lds.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-
-OUTPUT_ARCH(unicore32)
-ENTRY(stext)
-
-jiffies = jiffies_64;
-
-SECTIONS
-{
-       . = PAGE_OFFSET + KERNEL_IMAGE_START;
-
-       _text = .;
-       __init_begin = .;
-       HEAD_TEXT_SECTION
-       INIT_TEXT_SECTION(PAGE_SIZE)
-       INIT_DATA_SECTION(16)
-       PERCPU_SECTION(L1_CACHE_BYTES)
-       __init_end = .;
-
-       _stext = .;
-       .text : {               /* Real text segment */
-               TEXT_TEXT
-               SCHED_TEXT
-               CPUIDLE_TEXT
-               LOCK_TEXT
-
-               *(.fixup)
-               *(.gnu.warning)
-       }
-       _etext = .;
-
-       _sdata = .;
-       RO_DATA(PAGE_SIZE)
-       RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-       _edata = .;
-
-       EXCEPTION_TABLE(L1_CACHE_BYTES)
-
-       BSS_SECTION(0, 0, 0)
-       _end = .;
-
-       STABS_DEBUG
-       DWARF_DEBUG
-
-       DISCARDS                /* Exit code and data */
-}
diff --git a/arch/unicore32/lib/Makefile b/arch/unicore32/lib/Makefile
deleted file mode 100644 (file)
index 5af0664..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# linux/arch/unicore32/lib/Makefile
-#
-# Copyright (C) 2001-2010 GUAN Xue-tao
-#
-
-lib-y  := backtrace.o delay.o findbit.o
-lib-y  += strncpy_from_user.o strnlen_user.o
-lib-y  += clear_user.o copy_page.o
-lib-y  += copy_from_user.o copy_to_user.o
-
-GNU_LIBC_A             = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libc.a)
-GNU_LIBC_A_OBJS                := memchr.o memcpy.o memmove.o memset.o
-GNU_LIBC_A_OBJS                += strchr.o strrchr.o
-GNU_LIBC_A_OBJS                += rawmemchr.o                  # needed by strrchr.o
-
-GNU_LIBGCC_A           = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libgcc.a)
-GNU_LIBGCC_A_OBJS      := _ashldi3.o _ashrdi3.o _lshrdi3.o
-GNU_LIBGCC_A_OBJS      += _divsi3.o _modsi3.o _ucmpdi2.o _umodsi3.o _udivsi3.o
-
-lib-y  += $(GNU_LIBC_A_OBJS) $(GNU_LIBGCC_A_OBJS)
-
-$(addprefix $(obj)/, $(GNU_LIBC_A_OBJS)):
-       $(Q)$(AR) p $(GNU_LIBC_A) $(notdir $@) > $@
-
-$(addprefix $(obj)/, $(GNU_LIBGCC_A_OBJS)):
-       $(Q)$(AR) p $(GNU_LIBGCC_A) $(notdir $@) > $@
diff --git a/arch/unicore32/lib/backtrace.S b/arch/unicore32/lib/backtrace.S
deleted file mode 100644 (file)
index 6221944..0000000
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/backtrace.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-               .text
-
-@ fp is 0 or stack frame
-
-#define frame  v4
-#define sv_fp  v5
-#define sv_pc  v6
-#define offset v8
-#define loglvl v9
-
-ENTRY(__backtrace)
-               mov     r0, fp
-
-ENTRY(c_backtrace)
-
-#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-               mov     pc, lr
-ENDPROC(__backtrace)
-ENDPROC(c_backtrace)
-#else
-               stm.w   (v4 - v10, lr), [sp-]   @ Save an extra register
-                                               @ so we have a location...
-               mov.a   frame, r0               @ if frame pointer is zero
-               beq     no_frame                @ we have no stack frames
-               mov     loglvl, r1
-
-1:             stm.w   (pc), [sp-]             @ calculate offset of PC stored
-               ldw.w   r0, [sp]+, #4           @ by stmfd for this CPU
-               adr     r1, 1b
-               sub     offset, r0, r1
-
-/*
- * Stack frame layout:
- *             optionally saved caller registers (r4 - r10)
- *             saved fp
- *             saved sp
- *             saved lr
- *    frame => saved pc
- *             optionally saved arguments (r0 - r3)
- * saved sp => <next word>
- *
- * Functions start with the following code sequence:
- *                  mov   ip, sp
- *                  stm.w (r0 - r3), [sp-] (optional)
- * corrected pc =>  stm.w sp, (..., fp, ip, lr, pc)
- */
-for_each_frame:
-
-1001:          ldw     sv_pc, [frame+], #0     @ get saved pc
-1002:          ldw     sv_fp, [frame+], #-12   @ get saved fp
-
-               sub     sv_pc, sv_pc, offset    @ Correct PC for prefetching
-
-1003:          ldw     r2, [sv_pc+], #-4       @ if stmfd sp, {args} exists,
-               ldw     r3, .Ldsi+4             @ adjust saved 'pc' back one
-               cxor.a  r3, r2 >> #14           @ instruction
-               beq     201f
-               sub     r0, sv_pc, #4           @ allow for mov
-               b       202f
-201:
-               sub     r0, sv_pc, #8           @ allow for mov + stmia
-202:
-               ldw     r1, [frame+], #-4       @ get saved lr
-               mov     r2, frame
-               b.l     dump_backtrace_entry
-
-               ldw     r1, [sv_pc+], #-4       @ if stmfd sp, {args} exists,
-               ldw     r3, .Ldsi+4
-               cxor.a  r3, r1 >> #14
-               bne     1004f
-               ldw     r0, [frame+], #-8       @ get sp
-               sub     r0, r0, #4              @ point at the last arg
-               b.l     .Ldumpstm               @ dump saved registers
-
-1004:          ldw     r1, [sv_pc+], #0        @ if stmfd {, fp, ip, lr, pc}
-               ldw     r3, .Ldsi               @ instruction exists,
-               cxor.a  r3, r1 >> #14
-               bne     201f
-               sub     r0, frame, #16
-               b.l     .Ldumpstm               @ dump saved registers
-201:
-               cxor.a  sv_fp, #0               @ zero saved fp means
-               beq     no_frame                @ no further frames
-
-               csub.a  sv_fp, frame            @ next frame must be
-               mov     frame, sv_fp            @ above the current frame
-               bua     for_each_frame
-
-1006:          adr     r0, .Lbad
-               mov     r1, loglvl
-               mov     r2, frame
-               b.l     printk
-no_frame:      ldm.w   (v4 - v10, pc), [sp]+
-ENDPROC(__backtrace)
-ENDPROC(c_backtrace)
-
-               .pushsection __ex_table,"a"
-               .align  3
-               .long   1001b, 1006b
-               .long   1002b, 1006b
-               .long   1003b, 1006b
-               .long   1004b, 1006b
-               .popsection
-
-#define instr v4
-#define reg   v5
-#define stack v6
-
-.Ldumpstm:     stm.w   (instr, reg, stack, v7, lr), [sp-]
-               mov     stack, r0
-               mov     instr, r1
-               mov     reg, #14
-               mov     v7, #0
-1:             mov     r3, #1
-               csub.a  reg, #8
-               bne     201f
-               sub     reg, reg, #3
-201:
-               cand.a  instr, r3 << reg
-               beq     2f
-               add     v7, v7, #1
-               cxor.a  v7, #6
-               cmoveq  v7, #1
-               bne     201f
-               adr     r0, .Lcr
-               mov     r1, loglvl
-               b.l     printk
-201:
-               ldw.w   r3, [stack]+, #-4
-               mov     r2, reg
-               csub.a  r2, #8
-               bsl     201f
-               sub     r2, r2, #3
-201:
-               cand.a  instr, #0x40            @ if H is 1, high 16 regs
-               beq     201f
-               add     r2, r2, #0x10           @ so r2 need add 16
-201:
-               adr     r0, .Lfp
-               mov     r1, loglvl
-               b.l     printk
-2:             sub.a   reg, reg, #1
-               bns     1b
-               cxor.a  v7, #0
-               beq     201f
-               adr     r0, .Lcr
-               mov     r1, loglvl
-               b.l     printk
-201:           ldm.w   (instr, reg, stack, v7, pc), [sp]+
-
-.Lfp:          .asciz  "%sr%d:%08x "
-.Lcr:          .asciz  "%s\n"
-.Lbad:         .asciz  "%sBacktrace aborted due to bad frame pointer <%p>\n"
-               .align
-.Ldsi:         .word   0x92eec000 >> 14        @ stm.w sp, (... fp, ip, lr, pc)
-               .word   0x92e10000 >> 14        @ stm.w sp, ()
-
-#endif
diff --git a/arch/unicore32/lib/clear_user.S b/arch/unicore32/lib/clear_user.S
deleted file mode 100644 (file)
index c6ca431..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/clear_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-               .text
-
-/* Prototype: int __clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-WEAK(__clear_user)
-               stm.w   (lr), [sp-]
-               stm.w   (r1), [sp-]
-               mov     r2, #0
-               csub.a  r1, #4
-               bsl     2f
-               and.a   ip, r0, #3
-               beq     1f
-               csub.a  ip, #2
-               strusr  r2, r0, 1
-               strusr  r2, r0, 1, el
-               strusr  r2, r0, 1, sl
-               rsub    ip, ip, #4
-               sub     r1, r1, ip              @  7  6  5  4  3  2  1
-1:             sub.a   r1, r1, #8              @ -1 -2 -3 -4 -5 -6 -7
-               strusr  r2, r0, 4, ns, rept=2
-               bns     1b
-               add.a   r1, r1, #4              @  3  2  1  0 -1 -2 -3
-               strusr  r2, r0, 4, ns
-2:             cand.a  r1, #2                  @ 1x 1x 0x 0x 1x 1x 0x
-               strusr  r2, r0, 1, ne, rept=2
-               cand.a  r1, #1                  @ x1 x0 x1 x0 x1 x0 x1
-               beq     3f
-USER(          stb.u   r2, [r0])
-3:             mov     r0, #0
-               ldm.w   (r1), [sp]+
-               ldm.w   (pc), [sp]+
-ENDPROC(__clear_user)
-
-               .pushsection .fixup,"ax"
-               .align  0
-9001:          ldm.w   (r0), [sp]+
-               ldm.w   (pc), [sp]+
-               .popsection
-
diff --git a/arch/unicore32/lib/copy_from_user.S b/arch/unicore32/lib/copy_from_user.S
deleted file mode 100644 (file)
index affb439..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_from_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Prototype:
- *
- *     size_t raw_copy_from_user(void *to, const void *from, size_t n)
- *
- * Purpose:
- *
- *     copy a block to kernel memory from user memory
- *
- * Params:
- *
- *     to = kernel memory
- *     from = user memory
- *     n = number of bytes to copy
- *
- * Return value:
- *
- *     Number of bytes NOT copied.
- */
-
-       .macro ldr1w ptr reg abort
-       ldrusr  \reg, \ptr, 4, abort=\abort
-       .endm
-
-       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-100:   ldm.w   (\reg1, \reg2, \reg3, \reg4), [\ptr]+
-       .pushsection __ex_table, "a"
-       .align  3
-       .long 100b, \abort
-       .popsection
-       .endm
-
-       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-100:   ldm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-       .pushsection __ex_table, "a"
-       .align  3
-       .long 100b, \abort
-       .popsection
-       .endm
-
-       .macro ldr1b ptr reg cond=al abort
-       ldrusr  \reg, \ptr, 1, \cond, abort=\abort
-       .endm
-
-       .macro str1w ptr reg abort
-       stw.w \reg, [\ptr]+, #4
-       .endm
-
-       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-       stm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-       .endm
-
-       .macro str1b ptr reg cond=al abort
-       .ifnc   \cond, al
-       b\cond  201f
-       b       202f
-       .endif
-201:   stb.w \reg, [\ptr]+, #1
-202:
-       .endm
-
-       .macro enter
-       mov     r3, #0
-       stm.w   (r0, r2, r3), [sp-]
-       .endm
-
-       .macro exit
-       add     sp, sp, #8
-       ldm.w   (r0), [sp]+
-       mov     pc, lr
-       .endm
-
-       .text
-
-ENTRY(raw_copy_from_user)
-
-#include "copy_template.S"
-
-ENDPROC(raw_copy_from_user)
-
-       .pushsection .fixup,"ax"
-       .align 0
-       copy_abort_preamble
-       ldm.w   (r1, r2, r3), [sp]+
-       sub     r0, r0, r1
-       rsub    r0, r0, r2
-       copy_abort_end
-       .popsection
-
diff --git a/arch/unicore32/lib/copy_page.S b/arch/unicore32/lib/copy_page.S
deleted file mode 100644 (file)
index dc163f2..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_page.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <generated/asm-offsets.h>
-#include <asm/cache.h>
-
-#define COPY_COUNT (PAGE_SZ/256)
-
-               .text
-               .align  5
-/*
- * UniCore optimised copy_page routine
- */
-ENTRY(copy_page)
-               stm.w   (r17 - r19, lr), [sp-]
-               mov     r17, r0
-               mov     r18, r1
-               mov     r19, #COPY_COUNT
-1:
-       .rept   4
-               ldm.w   (r0 - r15), [r18]+
-               stm.w   (r0 - r15), [r17]+
-       .endr
-               sub.a   r19, r19, #1
-               bne     1b
-               ldm.w   (r17 - r19, pc), [sp]+
-ENDPROC(copy_page)
diff --git a/arch/unicore32/lib/copy_template.S b/arch/unicore32/lib/copy_template.S
deleted file mode 100644 (file)
index 02a7aef..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_template.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-/*
- * Theory of operation
- * -------------------
- *
- * This file provides the core code for a forward memory copy used in
- * the implementation of memcopy(), copy_to_user() and copy_from_user().
- *
- * The including file must define the following accessor macros
- * according to the need of the given function:
- *
- * ldr1w ptr reg abort
- *
- *     This loads one word from 'ptr', stores it in 'reg' and increments
- *     'ptr' to the next word. The 'abort' argument is used for fixup tables.
- *
- * ldr4w ptr reg1 reg2 reg3 reg4 abort
- * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- *
- *     This loads four or eight words starting from 'ptr', stores them
- *     in provided registers and increments 'ptr' past those words.
- *     The'abort' argument is used for fixup tables.
- *
- * ldr1b ptr reg cond abort
- *
- *     Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
- *     It also must apply the condition code if provided, otherwise the
- *     "al" condition is assumed by default.
- *
- * str1w ptr reg abort
- * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- * str1b ptr reg cond abort
- *
- *     Same as their ldr* counterparts, but data is stored to 'ptr' location
- *     rather than being loaded.
- *
- * enter
- *
- *     Preserve the provided registers on the stack plus any additional
- *     data as needed by the implementation including this code. Called
- *     upon code entry.
- *
- * exit
- *
- *     Restore registers with the values previously saved with the
- *     'preserv' macro. Called upon code termination.
- */
-
-
-               enter
-
-               sub.a   r2, r2, #4
-               bsl     8f
-               and.a   ip, r0, #3
-               bne     9f
-               and.a   ip, r1, #3
-               bne     10f
-
-1:             sub.a   r2, r2, #(28)
-               stm.w   (r5 - r8), [sp-]
-               bsl     5f
-
-3:
-4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, r10, r11, abort=20f
-               sub.a   r2, r2, #32
-               str8w   r0, r3, r4, r5, r6, r7, r8, r10, r11, abort=20f
-               beg     3b
-
-5:             and.a   ip, r2, #28
-               rsub    ip, ip, #32
-               beq     7f
-               add     pc, pc, ip              @ C is always clear here
-               nop
-
-               ldr1w   r1, r3, abort=20f
-               ldr1w   r1, r4, abort=20f
-               ldr1w   r1, r5, abort=20f
-               ldr1w   r1, r6, abort=20f
-               ldr1w   r1, r7, abort=20f
-               ldr1w   r1, r8, abort=20f
-               ldr1w   r1, r11, abort=20f
-
-               add     pc, pc, ip
-               nop
-
-               str1w   r0, r3, abort=20f
-               str1w   r0, r4, abort=20f
-               str1w   r0, r5, abort=20f
-               str1w   r0, r6, abort=20f
-               str1w   r0, r7, abort=20f
-               str1w   r0, r8, abort=20f
-               str1w   r0, r11, abort=20f
-
-7:             ldm.w   (r5 - r8), [sp]+
-
-8:             mov.a   r2, r2 << #31
-               ldr1b   r1, r3, ne, abort=21f
-               ldr1b   r1, r4, ea, abort=21f
-               ldr1b   r1, r10, ea, abort=21f
-               str1b   r0, r3, ne, abort=21f
-               str1b   r0, r4, ea, abort=21f
-               str1b   r0, r10, ea, abort=21f
-
-               exit
-
-9:             rsub    ip, ip, #4
-               csub.a  ip, #2
-               ldr1b   r1, r3, sg, abort=21f
-               ldr1b   r1, r4, eg, abort=21f
-               ldr1b   r1, r11, abort=21f
-               str1b   r0, r3, sg, abort=21f
-               str1b   r0, r4, eg, abort=21f
-               sub.a   r2, r2, ip
-               str1b   r0, r11, abort=21f
-               bsl     8b
-               and.a   ip, r1, #3
-               beq     1b
-
-10:            andn    r1, r1, #3
-               csub.a  ip, #2
-               ldr1w   r1, r11, abort=21f
-               beq     17f
-               bsg     18f
-
-
-               .macro  forward_copy_shift a b
-
-               sub.a   r2, r2, #28
-               bsl     14f
-
-11:            stm.w   (r5 - r9), [sp-]
-
-12:
-               ldr4w   r1, r4, r5, r6, r7, abort=19f
-               mov     r3, r11 pull #\a
-               sub.a   r2, r2, #32
-               ldr4w   r1, r8, r9, r10, r11, abort=19f
-               or      r3, r3, r4 push #\b
-               mov     r4, r4 pull #\a
-               or      r4, r4, r5 push #\b
-               mov     r5, r5 pull #\a
-               or      r5, r5, r6 push #\b
-               mov     r6, r6 pull #\a
-               or      r6, r6, r7 push #\b
-               mov     r7, r7 pull #\a
-               or      r7, r7, r8 push #\b
-               mov     r8, r8 pull #\a
-               or      r8, r8, r9 push #\b
-               mov     r9, r9 pull #\a
-               or      r9, r9, r10 push #\b
-               mov     r10, r10 pull #\a
-               or      r10, r10, r11 push #\b
-               str8w   r0, r3, r4, r5, r6, r7, r8, r9, r10, , abort=19f
-               beg     12b
-
-               ldm.w   (r5 - r9), [sp]+
-
-14:            and.a   ip, r2, #28
-               beq     16f
-
-15:            mov     r3, r11 pull #\a
-               ldr1w   r1, r11, abort=21f
-               sub.a   ip, ip, #4
-               or      r3, r3, r11 push #\b
-               str1w   r0, r3, abort=21f
-               bsg     15b
-
-16:            sub     r1, r1, #(\b / 8)
-               b       8b
-
-               .endm
-
-
-               forward_copy_shift      a=8     b=24
-
-17:            forward_copy_shift      a=16    b=16
-
-18:            forward_copy_shift      a=24    b=8
-
-
-/*
- * Abort preamble and completion macros.
- * If a fixup handler is required then those macros must surround it.
- * It is assumed that the fixup code will handle the private part of
- * the exit macro.
- */
-
-       .macro  copy_abort_preamble
-19:    ldm.w   (r5 - r9), [sp]+
-       b       21f
-299:   .word   0                       @ store lr
-                                       @ to avoid function call in fixup
-20:    ldm.w   (r5 - r8), [sp]+
-21:
-       adr     r1, 299b
-       stw     lr, [r1]
-       .endm
-
-       .macro  copy_abort_end
-       adr     lr, 299b
-       ldw     pc, [lr]
-       .endm
-
diff --git a/arch/unicore32/lib/copy_to_user.S b/arch/unicore32/lib/copy_to_user.S
deleted file mode 100644 (file)
index c867f08..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_to_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Prototype:
- *
- *     size_t raw_copy_to_user(void *to, const void *from, size_t n)
- *
- * Purpose:
- *
- *     copy a block to user memory from kernel memory
- *
- * Params:
- *
- *     to = user memory
- *     from = kernel memory
- *     n = number of bytes to copy
- *
- * Return value:
- *
- *     Number of bytes NOT copied.
- */
-
-       .macro ldr1w ptr reg abort
-       ldw.w \reg, [\ptr]+, #4
-       .endm
-
-       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-       ldm.w   (\reg1, \reg2, \reg3, \reg4), [\ptr]+
-       .endm
-
-       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-       ldm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-       .endm
-
-       .macro ldr1b ptr reg cond=al abort
-       notcond \cond, .+8
-       ldb.w \reg, [\ptr]+, #1
-       .endm
-
-       .macro str1w ptr reg abort
-       strusr  \reg, \ptr, 4, abort=\abort
-       .endm
-
-       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-100:   stm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-
-       .pushsection __ex_table, "a"
-       .long 100b, \abort
-       .popsection
-       .endm
-
-       .macro str1b ptr reg cond=al abort
-       strusr  \reg, \ptr, 1, \cond, abort=\abort
-       .endm
-
-       .macro enter
-       mov     r3, #0
-       stm.w   (r0, r2, r3), [sp-]
-       .endm
-
-       .macro exit
-       add     sp, sp, #8
-       ldm.w   (r0), [sp]+
-       mov     pc, lr
-       .endm
-
-       .text
-
-WEAK(raw_copy_to_user)
-
-#include "copy_template.S"
-
-ENDPROC(raw_copy_to_user)
-
-       .pushsection .fixup,"ax"
-       .align 0
-       copy_abort_preamble
-       ldm.w   (r1, r2, r3), [sp]+
-       sub     r0, r0, r1
-       rsub    r0, r0, r2
-       copy_abort_end
-       .popsection
-
diff --git a/arch/unicore32/lib/delay.S b/arch/unicore32/lib/delay.S
deleted file mode 100644 (file)
index 6a359dd..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/delay.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/param.h>
-               .text
-
-.LC0:          .word   loops_per_jiffy
-.LC1:          .word   (2199023*HZ)>>11
-
-/*
- * r0  <= 2000
- * lpj <= 0x01ffffff (max. 3355 bogomips)
- * HZ  <= 1000
- */
-
-ENTRY(__udelay)
-               ldw     r2, .LC1
-               mul     r0, r2, r0
-ENTRY(__const_udelay)                          @ 0 <= r0 <= 0x7fffff06
-               ldw     r2, .LC0
-               ldw     r2, [r2]                @ max = 0x01ffffff
-               mov     r0, r0 >> #14           @ max = 0x0001ffff
-               mov     r2, r2 >> #10           @ max = 0x00007fff
-               mul     r0, r2, r0              @ max = 2^32-1
-               mov.a   r0, r0 >> #6
-               cmoveq  pc, lr
-
-/*
- * loops = r0 * HZ * loops_per_jiffy / 1000000
- *
- * Oh, if only we had a cycle counter...
- */
-
-@ Delay routine
-ENTRY(__delay)
-               sub.a   r0, r0, #2
-               bua     __delay
-               mov     pc, lr
-ENDPROC(__udelay)
-ENDPROC(__const_udelay)
-ENDPROC(__delay)
diff --git a/arch/unicore32/lib/findbit.S b/arch/unicore32/lib/findbit.S
deleted file mode 100644 (file)
index 42f1282..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/findbit.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-                .text
-
-/*
- * Purpose  : Find a 'zero' bit
- * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
- */
-ENTRY(find_first_zero_bit)
-               cxor.a  r1, #0
-               beq     3f
-               mov     r2, #0
-1:             ldb     r3, [r0+], r2 >> #3
-               xor.a   r3, r3, #0xff           @ invert bits
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             csub.a  r2, r1                  @ any more?
-               bub     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
-ENDPROC(find_first_zero_bit)
-
-/*
- * Purpose  : Find next 'zero' bit
- * Prototype: int find_next_zero_bit
- *             (void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(find_next_zero_bit)
-               cxor.a  r1, #0
-               beq     3b
-               and.a   ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
-               ldb     r3, [r0+], r2 >> #3
-               xor     r3, r3, #0xff           @ now looking for a 1 bit
-               mov.a   r3, r3 >> ip            @ shift off unused bits
-               bne     .L_found
-               or      r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
-ENDPROC(find_next_zero_bit)
-
-/*
- * Purpose  : Find a 'one' bit
- * Prototype: int find_first_bit
- *             (const unsigned long *addr, unsigned int maxbit);
- */
-ENTRY(find_first_bit)
-               cxor.a  r1, #0
-               beq     3f
-               mov     r2, #0
-1:             ldb     r3, [r0+], r2 >> #3
-               mov.a   r3, r3
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             csub.a  r2, r1                  @ any more?
-               bub     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
-ENDPROC(find_first_bit)
-
-/*
- * Purpose  : Find next 'one' bit
- * Prototype: int find_next_zero_bit
- *             (void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(find_next_bit)
-               cxor.a  r1, #0
-               beq     3b
-               and.a   ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
-               ldb     r3, [r0+], r2 >> #3
-               mov.a   r3, r3 >> ip            @ shift off unused bits
-               bne     .L_found
-               or      r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
-ENDPROC(find_next_bit)
-
-/*
- * One or more bits in the LSB of r3 are assumed to be set.
- */
-.L_found:
-               rsub    r1, r3, #0
-               and     r3, r3, r1
-               cntlz   r3, r3
-               rsub    r3, r3, #31
-               add     r0, r2, r3
-               mov     pc, lr
-
diff --git a/arch/unicore32/lib/strncpy_from_user.S b/arch/unicore32/lib/strncpy_from_user.S
deleted file mode 100644 (file)
index f227b82..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/strncpy_from_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-       .text
-       .align  5
-
-/*
- * Copy a string from user space to kernel space.
- *  r0 = dst, r1 = src, r2 = byte length
- * returns the number of characters copied (strlen of copied string),
- *  -EFAULT on exception, or "len" if we fill the whole buffer
- */
-ENTRY(__strncpy_from_user)
-       mov     ip, r1
-1:     sub.a   r2, r2, #1
-       ldrusr  r3, r1, 1, ns
-       bfs     2f
-       stb.w   r3, [r0]+, #1
-       cxor.a  r3, #0
-       bne     1b
-       sub     r1, r1, #1      @ take NUL character out of count
-2:     sub     r0, r1, ip
-       mov     pc, lr
-ENDPROC(__strncpy_from_user)
-
-       .pushsection .fixup,"ax"
-       .align  0
-9001:  mov     r3, #0
-       stb     r3, [r0+], #0   @ null terminate
-       mov     r0, #-EFAULT
-       mov     pc, lr
-       .popsection
-
diff --git a/arch/unicore32/lib/strnlen_user.S b/arch/unicore32/lib/strnlen_user.S
deleted file mode 100644 (file)
index c836b12..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/strnlen_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-       .text
-       .align  5
-
-/* Prototype: unsigned long __strnlen_user(const char *str, long n)
- * Purpose  : get length of a string in user memory
- * Params   : str - address of string in user memory
- * Returns  : length of string *including terminator*
- *           or zero on exception, or n + 1 if too long
- */
-ENTRY(__strnlen_user)
-       mov     r2, r0
-1:
-       ldrusr  r3, r0, 1
-       cxor.a  r3, #0
-       beq     2f
-       sub.a   r1, r1, #1
-       bne     1b
-       add     r0, r0, #1
-2:     sub     r0, r0, r2
-       mov     pc, lr
-ENDPROC(__strnlen_user)
-
-       .pushsection .fixup,"ax"
-       .align  0
-9001:  mov     r0, #0
-       mov     pc, lr
-       .popsection
diff --git a/arch/unicore32/mm/Kconfig b/arch/unicore32/mm/Kconfig
deleted file mode 100644 (file)
index 82759b6..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-comment "Processor Type"
-
-# Select CPU types depending on the architecture selected.  This selects
-# which CPUs we support in the kernel image, and the compiler instruction
-# optimiser behaviour.
-
-config CPU_UCV2
-       def_bool y
-
-comment "Processor Features"
-
-config CPU_ICACHE_DISABLE
-       bool "Disable I-Cache (I-bit)"
-       help
-         Say Y here to disable the processor instruction cache. Unless
-         you have a reason not to or are unsure, say N.
-
-config CPU_DCACHE_DISABLE
-       bool "Disable D-Cache (D-bit)"
-       help
-         Say Y here to disable the processor data cache. Unless
-         you have a reason not to or are unsure, say N.
-
-config CPU_DCACHE_WRITETHROUGH
-       bool "Force write through D-cache"
-       help
-         Say Y here to use the data cache in writethrough mode. Unless you
-         specifically require this or are unsure, say N.
-
-config CPU_DCACHE_LINE_DISABLE
-       bool "Disable D-cache line ops"
-       default y
-       help
-         Say Y here to disable the data cache line operations.
-
-config CPU_TLB_SINGLE_ENTRY_DISABLE
-       bool "Disable TLB single entry ops"
-       default y
-       help
-         Say Y here to disable the TLB single entry operations.
diff --git a/arch/unicore32/mm/Makefile b/arch/unicore32/mm/Makefile
deleted file mode 100644 (file)
index 8106260..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the linux unicore-specific parts of the memory manager.
-#
-
-obj-y                          := extable.o fault.o init.o pgd.o mmu.o
-obj-y                          += flush.o ioremap.o
-
-obj-$(CONFIG_MODULES)          += proc-syms.o
-
-obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
-
-obj-$(CONFIG_CPU_UCV2)         += cache-ucv2.o tlb-ucv2.o proc-ucv2.o
-
diff --git a/arch/unicore32/mm/alignment.c b/arch/unicore32/mm/alignment.c
deleted file mode 100644 (file)
index 2ea98f7..0000000
+++ /dev/null
@@ -1,524 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/alignment.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-/*
- * TODO:
- *  FPU ldm/stm not handling
- */
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/sched/debug.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/pgtable.h>
-
-#include <asm/tlbflush.h>
-#include <asm/unaligned.h>
-
-#include "mm.h"
-
-#define CODING_BITS(i) (i & 0xe0000120)
-
-#define LDST_P_BIT(i)  (i & (1 << 28)) /* Preindex             */
-#define LDST_U_BIT(i)  (i & (1 << 27)) /* Add offset           */
-#define LDST_W_BIT(i)  (i & (1 << 25)) /* Writeback            */
-#define LDST_L_BIT(i)  (i & (1 << 24)) /* Load                 */
-
-#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 27)) == 0)
-
-#define LDSTH_I_BIT(i) (i & (1 << 26)) /* half-word immed      */
-#define LDM_S_BIT(i)   (i & (1 << 26)) /* write ASR from BSR */
-#define LDM_H_BIT(i)   (i & (1 << 6))  /* select r0-r15 or r16-r31 */
-
-#define RN_BITS(i)     ((i >> 19) & 31)        /* Rn                   */
-#define RD_BITS(i)     ((i >> 14) & 31)        /* Rd                   */
-#define RM_BITS(i)     (i & 31)        /* Rm                   */
-
-#define REGMASK_BITS(i)        (((i & 0x7fe00) >> 3) | (i & 0x3f))
-#define OFFSET_BITS(i) (i & 0x03fff)
-
-#define SHIFT_BITS(i)  ((i >> 9) & 0x1f)
-#define SHIFT_TYPE(i)  (i & 0xc0)
-#define SHIFT_LSL      0x00
-#define SHIFT_LSR      0x40
-#define SHIFT_ASR      0x80
-#define SHIFT_RORRRX   0xc0
-
-union offset_union {
-       unsigned long un;
-       signed long sn;
-};
-
-#define TYPE_ERROR     0
-#define TYPE_FAULT     1
-#define TYPE_LDST      2
-#define TYPE_DONE      3
-#define TYPE_SWAP  4
-#define TYPE_COLS  5           /* Coprocessor load/store */
-
-#define get8_unaligned_check(val, addr, err)           \
-       __asm__(                                        \
-       "1:     ldb.u   %1, [%2], #1\n"                 \
-       "2:\n"                                          \
-       "       .pushsection .fixup,\"ax\"\n"           \
-       "       .align  2\n"                            \
-       "3:     mov     %0, #1\n"                       \
-       "       b       2b\n"                           \
-       "       .popsection\n"                          \
-       "       .pushsection __ex_table,\"a\"\n"                \
-       "       .align  3\n"                            \
-       "       .long   1b, 3b\n"                       \
-       "       .popsection\n"                          \
-       : "=r" (err), "=&r" (val), "=r" (addr)          \
-       : "0" (err), "2" (addr))
-
-#define get8t_unaligned_check(val, addr, err)          \
-       __asm__(                                        \
-       "1:     ldb.u   %1, [%2], #1\n"                 \
-       "2:\n"                                          \
-       "       .pushsection .fixup,\"ax\"\n"           \
-       "       .align  2\n"                            \
-       "3:     mov     %0, #1\n"                       \
-       "       b       2b\n"                           \
-       "       .popsection\n"                          \
-       "       .pushsection __ex_table,\"a\"\n"                \
-       "       .align  3\n"                            \
-       "       .long   1b, 3b\n"                       \
-       "       .popsection\n"                          \
-       : "=r" (err), "=&r" (val), "=r" (addr)          \
-       : "0" (err), "2" (addr))
-
-#define get16_unaligned_check(val, addr)                       \
-       do {                                                    \
-               unsigned int err = 0, v, a = addr;              \
-               get8_unaligned_check(val, a, err);              \
-               get8_unaligned_check(v, a, err);                \
-               val |= v << 8;                                  \
-               if (err)                                        \
-                       goto fault;                             \
-       } while (0)
-
-#define put16_unaligned_check(val, addr)                       \
-       do {                                                    \
-               unsigned int err = 0, v = val, a = addr;        \
-               __asm__(                                        \
-               "1:     stb.u   %1, [%2], #1\n"                 \
-               "       mov     %1, %1 >> #8\n"                 \
-               "2:     stb.u   %1, [%2]\n"                     \
-               "3:\n"                                          \
-               "       .pushsection .fixup,\"ax\"\n"           \
-               "       .align  2\n"                            \
-               "4:     mov     %0, #1\n"                       \
-               "       b       3b\n"                           \
-               "       .popsection\n"                          \
-               "       .pushsection __ex_table,\"a\"\n"                \
-               "       .align  3\n"                            \
-               "       .long   1b, 4b\n"                       \
-               "       .long   2b, 4b\n"                       \
-               "       .popsection\n"                          \
-               : "=r" (err), "=&r" (v), "=&r" (a)              \
-               : "0" (err), "1" (v), "2" (a));                 \
-               if (err)                                        \
-                       goto fault;                             \
-       } while (0)
-
-#define __put32_unaligned_check(ins, val, addr)                        \
-       do {                                                    \
-               unsigned int err = 0, v = val, a = addr;        \
-               __asm__(                                        \
-               "1:     "ins"   %1, [%2], #1\n"                 \
-               "       mov     %1, %1 >> #8\n"                 \
-               "2:     "ins"   %1, [%2], #1\n"                 \
-               "       mov     %1, %1 >> #8\n"                 \
-               "3:     "ins"   %1, [%2], #1\n"                 \
-               "       mov     %1, %1 >> #8\n"                 \
-               "4:     "ins"   %1, [%2]\n"                     \
-               "5:\n"                                          \
-               "       .pushsection .fixup,\"ax\"\n"           \
-               "       .align  2\n"                            \
-               "6:     mov     %0, #1\n"                       \
-               "       b       5b\n"                           \
-               "       .popsection\n"                          \
-               "       .pushsection __ex_table,\"a\"\n"                \
-               "       .align  3\n"                            \
-               "       .long   1b, 6b\n"                       \
-               "       .long   2b, 6b\n"                       \
-               "       .long   3b, 6b\n"                       \
-               "       .long   4b, 6b\n"                       \
-               "       .popsection\n"                          \
-               : "=r" (err), "=&r" (v), "=&r" (a)              \
-               : "0" (err), "1" (v), "2" (a));                 \
-               if (err)                                        \
-                       goto fault;                             \
-       } while (0)
-
-#define get32_unaligned_check(val, addr)                       \
-       do {                                                    \
-               unsigned int err = 0, v, a = addr;              \
-               get8_unaligned_check(val, a, err);              \
-               get8_unaligned_check(v, a, err);                \
-               val |= v << 8;                                  \
-               get8_unaligned_check(v, a, err);                \
-               val |= v << 16;                                 \
-               get8_unaligned_check(v, a, err);                \
-               val |= v << 24;                                 \
-               if (err)                                        \
-                       goto fault;                             \
-       } while (0)
-
-#define put32_unaligned_check(val, addr)                       \
-       __put32_unaligned_check("stb.u", val, addr)
-
-#define get32t_unaligned_check(val, addr)                      \
-       do {                                                    \
-               unsigned int err = 0, v, a = addr;              \
-               get8t_unaligned_check(val, a, err);             \
-               get8t_unaligned_check(v, a, err);               \
-               val |= v << 8;                                  \
-               get8t_unaligned_check(v, a, err);               \
-               val |= v << 16;                                 \
-               get8t_unaligned_check(v, a, err);               \
-               val |= v << 24;                                 \
-               if (err)                                        \
-                       goto fault;                             \
-       } while (0)
-
-#define put32t_unaligned_check(val, addr)                      \
-       __put32_unaligned_check("stb.u", val, addr)
-
-static void
-do_alignment_finish_ldst(unsigned long addr, unsigned long instr,
-                        struct pt_regs *regs, union offset_union offset)
-{
-       if (!LDST_U_BIT(instr))
-               offset.un = -offset.un;
-
-       if (!LDST_P_BIT(instr))
-               addr += offset.un;
-
-       if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
-               regs->uregs[RN_BITS(instr)] = addr;
-}
-
-static int
-do_alignment_ldrhstrh(unsigned long addr, unsigned long instr,
-                     struct pt_regs *regs)
-{
-       unsigned int rd = RD_BITS(instr);
-
-       /* old value 0x40002120, can't judge swap instr correctly */
-       if ((instr & 0x4b003fe0) == 0x40000120)
-               goto swp;
-
-       if (LDST_L_BIT(instr)) {
-               unsigned long val;
-               get16_unaligned_check(val, addr);
-
-               /* signed half-word? */
-               if (instr & 0x80)
-                       val = (signed long)((signed short)val);
-
-               regs->uregs[rd] = val;
-       } else
-               put16_unaligned_check(regs->uregs[rd], addr);
-
-       return TYPE_LDST;
-
-swp:
-       /* only handle swap word
-        * for swap byte should not active this alignment exception */
-       get32_unaligned_check(regs->uregs[RD_BITS(instr)], addr);
-       put32_unaligned_check(regs->uregs[RM_BITS(instr)], addr);
-       return TYPE_SWAP;
-
-fault:
-       return TYPE_FAULT;
-}
-
-static int
-do_alignment_ldrstr(unsigned long addr, unsigned long instr,
-                   struct pt_regs *regs)
-{
-       unsigned int rd = RD_BITS(instr);
-
-       if (!LDST_P_BIT(instr) && LDST_W_BIT(instr))
-               goto trans;
-
-       if (LDST_L_BIT(instr))
-               get32_unaligned_check(regs->uregs[rd], addr);
-       else
-               put32_unaligned_check(regs->uregs[rd], addr);
-       return TYPE_LDST;
-
-trans:
-       if (LDST_L_BIT(instr))
-               get32t_unaligned_check(regs->uregs[rd], addr);
-       else
-               put32t_unaligned_check(regs->uregs[rd], addr);
-       return TYPE_LDST;
-
-fault:
-       return TYPE_FAULT;
-}
-
-/*
- * LDM/STM alignment handler.
- *
- * There are 4 variants of this instruction:
- *
- * B = rn pointer before instruction, A = rn pointer after instruction
- *              ------ increasing address ----->
- *             |    | r0 | r1 | ... | rx |    |
- * PU = 01             B                    A
- * PU = 11        B                    A
- * PU = 00        A                    B
- * PU = 10             A                    B
- */
-static int
-do_alignment_ldmstm(unsigned long addr, unsigned long instr,
-                   struct pt_regs *regs)
-{
-       unsigned int rd, rn, pc_correction, reg_correction, nr_regs, regbits;
-       unsigned long eaddr, newaddr;
-
-       if (LDM_S_BIT(instr))
-               goto bad;
-
-       pc_correction = 4;      /* processor implementation defined */
-
-       /* count the number of registers in the mask to be transferred */
-       nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
-
-       rn = RN_BITS(instr);
-       newaddr = eaddr = regs->uregs[rn];
-
-       if (!LDST_U_BIT(instr))
-               nr_regs = -nr_regs;
-       newaddr += nr_regs;
-       if (!LDST_U_BIT(instr))
-               eaddr = newaddr;
-
-       if (LDST_P_EQ_U(instr)) /* U = P */
-               eaddr += 4;
-
-       /*
-        * This is a "hint" - we already have eaddr worked out by the
-        * processor for us.
-        */
-       if (addr != eaddr) {
-               printk(KERN_ERR "LDMSTM: PC = %08lx, instr = %08lx, "
-                      "addr = %08lx, eaddr = %08lx\n",
-                      instruction_pointer(regs), instr, addr, eaddr);
-               show_regs(regs);
-       }
-
-       if (LDM_H_BIT(instr))
-               reg_correction = 0x10;
-       else
-               reg_correction = 0x00;
-
-       for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
-            regbits >>= 1, rd += 1)
-               if (regbits & 1) {
-                       if (LDST_L_BIT(instr))
-                               get32_unaligned_check(regs->
-                                       uregs[rd + reg_correction], eaddr);
-                       else
-                               put32_unaligned_check(regs->
-                                       uregs[rd + reg_correction], eaddr);
-                       eaddr += 4;
-               }
-
-       if (LDST_W_BIT(instr))
-               regs->uregs[rn] = newaddr;
-       return TYPE_DONE;
-
-fault:
-       regs->UCreg_pc -= pc_correction;
-       return TYPE_FAULT;
-
-bad:
-       printk(KERN_ERR "Alignment trap: not handling ldm with s-bit set\n");
-       return TYPE_ERROR;
-}
-
-static int
-do_alignment(unsigned long addr, unsigned int error_code, struct pt_regs *regs)
-{
-       union offset_union offset;
-       unsigned long instr, instrptr;
-       int (*handler) (unsigned long addr, unsigned long instr,
-                       struct pt_regs *regs);
-       unsigned int type;
-
-       instrptr = instruction_pointer(regs);
-       if (instrptr >= PAGE_OFFSET)
-               instr = *(unsigned long *)instrptr;
-       else {
-               __asm__ __volatile__(
-                               "ldw.u  %0, [%1]\n"
-                               : "=&r"(instr)
-                               : "r"(instrptr));
-       }
-
-       regs->UCreg_pc += 4;
-
-       switch (CODING_BITS(instr)) {
-       case 0x40000120:        /* ldrh or strh */
-               if (LDSTH_I_BIT(instr))
-                       offset.un = (instr & 0x3e00) >> 4 | (instr & 31);
-               else
-                       offset.un = regs->uregs[RM_BITS(instr)];
-               handler = do_alignment_ldrhstrh;
-               break;
-
-       case 0x60000000:        /* ldr or str immediate */
-       case 0x60000100:        /* ldr or str immediate */
-       case 0x60000020:        /* ldr or str immediate */
-       case 0x60000120:        /* ldr or str immediate */
-               offset.un = OFFSET_BITS(instr);
-               handler = do_alignment_ldrstr;
-               break;
-
-       case 0x40000000:        /* ldr or str register */
-               offset.un = regs->uregs[RM_BITS(instr)];
-               {
-                       unsigned int shiftval = SHIFT_BITS(instr);
-
-                       switch (SHIFT_TYPE(instr)) {
-                       case SHIFT_LSL:
-                               offset.un <<= shiftval;
-                               break;
-
-                       case SHIFT_LSR:
-                               offset.un >>= shiftval;
-                               break;
-
-                       case SHIFT_ASR:
-                               offset.sn >>= shiftval;
-                               break;
-
-                       case SHIFT_RORRRX:
-                               if (shiftval == 0) {
-                                       offset.un >>= 1;
-                                       if (regs->UCreg_asr & PSR_C_BIT)
-                                               offset.un |= 1 << 31;
-                               } else
-                                       offset.un = offset.un >> shiftval |
-                                           offset.un << (32 - shiftval);
-                               break;
-                       }
-               }
-               handler = do_alignment_ldrstr;
-               break;
-
-       case 0x80000000:        /* ldm or stm */
-       case 0x80000020:        /* ldm or stm */
-               handler = do_alignment_ldmstm;
-               break;
-
-       default:
-               goto bad;
-       }
-
-       type = handler(addr, instr, regs);
-
-       if (type == TYPE_ERROR || type == TYPE_FAULT)
-               goto bad_or_fault;
-
-       if (type == TYPE_LDST)
-               do_alignment_finish_ldst(addr, instr, regs, offset);
-
-       return 0;
-
-bad_or_fault:
-       if (type == TYPE_ERROR)
-               goto bad;
-       regs->UCreg_pc -= 4;
-       /*
-        * We got a fault - fix it up, or die.
-        */
-       do_bad_area(addr, error_code, regs);
-       return 0;
-
-bad:
-       /*
-        * Oops, we didn't handle the instruction.
-        * However, we must handle fpu instr firstly.
-        */
-#ifdef CONFIG_UNICORE_FPU_F64
-       /* handle co.load/store */
-#define CODING_COLS                0xc0000000
-#define COLS_OFFSET_BITS(i)    (i & 0x1FF)
-#define COLS_L_BITS(i)         (i & (1<<24))
-#define COLS_FN_BITS(i)                ((i>>14) & 31)
-       if ((instr & 0xe0000000) == CODING_COLS) {
-               unsigned int fn = COLS_FN_BITS(instr);
-               unsigned long val = 0;
-               if (COLS_L_BITS(instr)) {
-                       get32t_unaligned_check(val, addr);
-                       switch (fn) {
-#define ASM_MTF(n)     case n:                                         \
-                       __asm__ __volatile__("MTF %0, F" __stringify(n) \
-                               : : "r"(val));                          \
-                       break;
-                       ASM_MTF(0); ASM_MTF(1); ASM_MTF(2); ASM_MTF(3);
-                       ASM_MTF(4); ASM_MTF(5); ASM_MTF(6); ASM_MTF(7);
-                       ASM_MTF(8); ASM_MTF(9); ASM_MTF(10); ASM_MTF(11);
-                       ASM_MTF(12); ASM_MTF(13); ASM_MTF(14); ASM_MTF(15);
-                       ASM_MTF(16); ASM_MTF(17); ASM_MTF(18); ASM_MTF(19);
-                       ASM_MTF(20); ASM_MTF(21); ASM_MTF(22); ASM_MTF(23);
-                       ASM_MTF(24); ASM_MTF(25); ASM_MTF(26); ASM_MTF(27);
-                       ASM_MTF(28); ASM_MTF(29); ASM_MTF(30); ASM_MTF(31);
-#undef ASM_MTF
-                       }
-               } else {
-                       switch (fn) {
-#define ASM_MFF(n)     case n:                                         \
-                       __asm__ __volatile__("MFF %0, F" __stringify(n) \
-                               : : "r"(val));                          \
-                       break;
-                       ASM_MFF(0); ASM_MFF(1); ASM_MFF(2); ASM_MFF(3);
-                       ASM_MFF(4); ASM_MFF(5); ASM_MFF(6); ASM_MFF(7);
-                       ASM_MFF(8); ASM_MFF(9); ASM_MFF(10); ASM_MFF(11);
-                       ASM_MFF(12); ASM_MFF(13); ASM_MFF(14); ASM_MFF(15);
-                       ASM_MFF(16); ASM_MFF(17); ASM_MFF(18); ASM_MFF(19);
-                       ASM_MFF(20); ASM_MFF(21); ASM_MFF(22); ASM_MFF(23);
-                       ASM_MFF(24); ASM_MFF(25); ASM_MFF(26); ASM_MFF(27);
-                       ASM_MFF(28); ASM_MFF(29); ASM_MFF(30); ASM_MFF(31);
-#undef ASM_MFF
-                       }
-                       put32t_unaligned_check(val, addr);
-               }
-               return TYPE_COLS;
-       }
-fault:
-       return TYPE_FAULT;
-#endif
-       printk(KERN_ERR "Alignment trap: not handling instruction "
-              "%08lx at [<%08lx>]\n", instr, instrptr);
-       return 1;
-}
-
-/*
- * This needs to be done after sysctl_init, otherwise sys/ will be
- * overwritten.  Actually, this shouldn't be in sys/ at all since
- * it isn't a sysctl, and it doesn't contain sysctl information.
- */
-static int __init alignment_init(void)
-{
-       hook_fault_code(1, do_alignment, SIGBUS, BUS_ADRALN,
-                       "alignment exception");
-
-       return 0;
-}
-
-fs_initcall(alignment_init);
diff --git a/arch/unicore32/mm/cache-ucv2.S b/arch/unicore32/mm/cache-ucv2.S
deleted file mode 100644 (file)
index 2108837..0000000
+++ /dev/null
@@ -1,209 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/cache-ucv2.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  This is the "shell" of the UniCore-v2 processor support.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-
-#include "proc-macros.S"
-
-/*
- *     __cpuc_flush_icache_all()
- *     __cpuc_flush_kern_all()
- *     __cpuc_flush_user_all()
- *
- *     Flush the entire cache.
- */
-ENTRY(__cpuc_flush_icache_all)
-       /*FALLTHROUGH*/
-ENTRY(__cpuc_flush_kern_all)
-       /*FALLTHROUGH*/
-ENTRY(__cpuc_flush_user_all)
-       mov     r0, #0
-       movc    p0.c5, r0, #14                  @ Dcache flush all
-       nop8
-
-       mov     r0, #0
-       movc    p0.c5, r0, #20                  @ Icache invalidate all
-       nop8
-
-       mov     pc, lr
-
-/*
- *     __cpuc_flush_user_range(start, end, flags)
- *
- *     Flush a range of TLB entries in the specified address space.
- *
- *     - start - start address (may not be aligned)
- *     - end   - end address (exclusive, may not be aligned)
- *     - flags - vm_area_struct flags describing address space
- */
-ENTRY(__cpuc_flush_user_range)
-       cxor.a  r2, #0
-       beq     __cpuc_dma_flush_range
-
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       andn    r0, r0, #CACHE_LINESIZE - 1     @ Safety check
-       sub     r1, r1, r0
-       csub.a  r1, #MAX_AREA_SIZE
-       bsg     2f
-
-       andn    r1, r1, #CACHE_LINESIZE - 1
-       add     r1, r1, #CACHE_LINESIZE
-
-101:   dcacheline_flush        r0, r11, r12
-
-       add     r0, r0, #CACHE_LINESIZE
-       sub.a   r1, r1, #CACHE_LINESIZE
-       bns     101b
-       b       3f
-#endif
-2:     mov     ip, #0
-       movc    p0.c5, ip, #14                  @ Dcache flush all
-       nop8
-
-3:     mov     ip, #0
-       movc    p0.c5, ip, #20                  @ Icache invalidate all
-       nop8
-
-       mov     pc, lr
-
-/*
- *     __cpuc_coherent_kern_range(start,end)
- *     __cpuc_coherent_user_range(start,end)
- *
- *     Ensure that the I and D caches are coherent within specified
- *     region.  This is typically used when code has been written to
- *     a memory region, and will be executed.
- *
- *     - start   - virtual start address of region
- *     - end     - virtual end address of region
- */
-ENTRY(__cpuc_coherent_kern_range)
-       /* FALLTHROUGH */
-ENTRY(__cpuc_coherent_user_range)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       andn    r0, r0, #CACHE_LINESIZE - 1     @ Safety check
-       sub     r1, r1, r0
-       csub.a  r1, #MAX_AREA_SIZE
-       bsg     2f
-
-       andn    r1, r1, #CACHE_LINESIZE - 1
-       add     r1, r1, #CACHE_LINESIZE
-
-       @ r0 va2pa r10
-       mov     r9, #PAGE_SZ
-       sub     r9, r9, #1                      @ PAGE_MASK
-101:   va2pa   r0, r10, r11, r12, r13, 2f      @ r10 is PA
-       b       103f
-102:   cand.a  r0, r9
-       beq     101b
-
-103:   movc    p0.c5, r10, #11                 @ Dcache clean line of R10
-       nop8
-
-       add     r0, r0, #CACHE_LINESIZE
-       add     r10, r10, #CACHE_LINESIZE
-       sub.a   r1, r1, #CACHE_LINESIZE
-       bns     102b
-       b       3f
-#endif
-2:     mov     ip, #0
-       movc    p0.c5, ip, #10                  @ Dcache clean all
-       nop8
-
-3:     mov     ip, #0
-       movc    p0.c5, ip, #20                  @ Icache invalidate all
-       nop8
-
-       mov     pc, lr
-
-/*
- *     __cpuc_flush_kern_dcache_area(void *addr, size_t size)
- *
- *     - addr  - kernel address
- *     - size  - region size
- */
-ENTRY(__cpuc_flush_kern_dcache_area)
-       mov     ip, #0
-       movc    p0.c5, ip, #14                  @ Dcache flush all
-       nop8
-       mov     pc, lr
-
-/*
- *     __cpuc_dma_clean_range(start,end)
- *     - start   - virtual start address of region
- *     - end     - virtual end address of region
- */
-ENTRY(__cpuc_dma_clean_range)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       andn    r0, r0, #CACHE_LINESIZE - 1
-       sub     r1, r1, r0
-       andn    r1, r1, #CACHE_LINESIZE - 1
-       add     r1, r1, #CACHE_LINESIZE
-
-       csub.a  r1, #MAX_AREA_SIZE
-       bsg     2f
-
-       @ r0 va2pa r10
-       mov     r9, #PAGE_SZ
-       sub     r9, r9, #1                      @ PAGE_MASK
-101:   va2pa   r0, r10, r11, r12, r13, 2f      @ r10 is PA
-       b       1f
-102:   cand.a  r0, r9
-       beq     101b
-
-1:     movc    p0.c5, r10, #11                 @ Dcache clean line of R10
-       nop8
-       add     r0, r0, #CACHE_LINESIZE
-       add     r10, r10, #CACHE_LINESIZE
-       sub.a   r1, r1, #CACHE_LINESIZE
-       bns     102b
-       mov     pc, lr
-#endif
-2:     mov     ip, #0
-       movc    p0.c5, ip, #10                  @ Dcache clean all
-       nop8
-
-       mov     pc, lr
-
-/*
- *     __cpuc_dma_inv_range(start,end)
- *     __cpuc_dma_flush_range(start,end)
- *     - start   - virtual start address of region
- *     - end     - virtual end address of region
- */
-__cpuc_dma_inv_range:
-       /* FALLTHROUGH */
-ENTRY(__cpuc_dma_flush_range)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       andn    r0, r0, #CACHE_LINESIZE - 1
-       sub     r1, r1, r0
-       andn    r1, r1, #CACHE_LINESIZE - 1
-       add     r1, r1, #CACHE_LINESIZE
-
-       csub.a  r1, #MAX_AREA_SIZE
-       bsg     2f
-
-       @ r0 va2pa r10
-101:   dcacheline_flush        r0, r11, r12
-
-       add     r0, r0, #CACHE_LINESIZE
-       sub.a   r1, r1, #CACHE_LINESIZE
-       bns     101b
-       mov     pc, lr
-#endif
-2:     mov     ip, #0
-       movc    p0.c5, ip, #14                  @ Dcache flush all
-       nop8
-
-       mov     pc, lr
-
diff --git a/arch/unicore32/mm/extable.c b/arch/unicore32/mm/extable.c
deleted file mode 100644 (file)
index e53352b..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/extable.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/extable.h>
-#include <linux/uaccess.h>
-
-int fixup_exception(struct pt_regs *regs)
-{
-       const struct exception_table_entry *fixup;
-
-       fixup = search_exception_tables(instruction_pointer(regs));
-       if (fixup)
-               regs->UCreg_pc = fixup->fixup;
-
-       return fixup != NULL;
-}
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
deleted file mode 100644 (file)
index 7654bdd..0000000
+++ /dev/null
@@ -1,481 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/fault.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/extable.h>
-#include <linux/signal.h>
-#include <linux/mm.h>
-#include <linux/hardirq.h>
-#include <linux/init.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/page-flags.h>
-#include <linux/sched/signal.h>
-#include <linux/io.h>
-
-#include <asm/tlbflush.h>
-
-/*
- * Fault status register encodings.  We steal bit 31 for our own purposes.
- */
-#define FSR_LNX_PF             (1 << 31)
-
-static inline int fsr_fs(unsigned int fsr)
-{
-       /* xyabcde will be abcde+xy */
-       return (fsr & 31) + ((fsr & (3 << 5)) >> 5);
-}
-
-/*
- * This is useful to dump out the page tables associated with
- * 'addr' in mm 'mm'.
- */
-void show_pte(struct mm_struct *mm, unsigned long addr)
-{
-       pgd_t *pgd;
-
-       if (!mm)
-               mm = &init_mm;
-
-       printk(KERN_ALERT "pgd = %p\n", mm->pgd);
-       pgd = pgd_offset(mm, addr);
-       printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
-
-       do {
-               pmd_t *pmd;
-               pte_t *pte;
-
-               if (pgd_none(*pgd))
-                       break;
-
-               if (pgd_bad(*pgd)) {
-                       printk("(bad)");
-                       break;
-               }
-
-               pmd = pmd_offset((pud_t *) pgd, addr);
-               if (PTRS_PER_PMD != 1)
-                       printk(", *pmd=%08lx", pmd_val(*pmd));
-
-               if (pmd_none(*pmd))
-                       break;
-
-               if (pmd_bad(*pmd)) {
-                       printk("(bad)");
-                       break;
-               }
-
-               /* We must not map this if we have highmem enabled */
-               if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
-                       break;
-
-               pte = pte_offset_map(pmd, addr);
-               printk(", *pte=%08lx", pte_val(*pte));
-               pte_unmap(pte);
-       } while (0);
-
-       printk("\n");
-}
-
-/*
- * Oops.  The kernel tried to access some page that wasn't present.
- */
-static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
-               unsigned int fsr, struct pt_regs *regs)
-{
-       /*
-        * Are we prepared to handle this kernel fault?
-        */
-       if (fixup_exception(regs))
-               return;
-
-       /*
-        * No handler, we'll have to terminate things with extreme prejudice.
-        */
-       bust_spinlocks(1);
-       printk(KERN_ALERT
-              "Unable to handle kernel %s at virtual address %08lx\n",
-              (addr < PAGE_SIZE) ? "NULL pointer dereference" :
-              "paging request", addr);
-
-       show_pte(mm, addr);
-       die("Oops", regs, fsr);
-       bust_spinlocks(0);
-       do_exit(SIGKILL);
-}
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * User mode accesses just cause a SIGSEGV
- */
-static void __do_user_fault(unsigned long addr, unsigned int fsr,
-                           unsigned int sig, int code, struct pt_regs *regs)
-{
-       struct task_struct *tsk = current;
-
-       tsk->thread.address = addr;
-       tsk->thread.error_code = fsr;
-       tsk->thread.trap_no = 14;
-       force_sig_fault(sig, code, (void __user *)addr);
-}
-
-void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
-{
-       struct task_struct *tsk = current;
-       struct mm_struct *mm = tsk->active_mm;
-
-       /*
-        * If we are in kernel mode at this point, we
-        * have no context to handle this fault with.
-        */
-       if (user_mode(regs))
-               __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
-       else
-               __do_kernel_fault(mm, addr, fsr, regs);
-}
-
-#define VM_FAULT_BADMAP                0x010000
-#define VM_FAULT_BADACCESS     0x020000
-
-/*
- * Check that the permissions on the VMA allow for the fault which occurred.
- * If we encountered a write fault, we must have write permission, otherwise
- * we allow any permission.
- */
-static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
-{
-       unsigned int mask = VM_ACCESS_FLAGS;
-
-       if (!(fsr ^ 0x12))      /* write? */
-               mask = VM_WRITE;
-       if (fsr & FSR_LNX_PF)
-               mask = VM_EXEC;
-
-       return vma->vm_flags & mask ? false : true;
-}
-
-static vm_fault_t __do_pf(struct mm_struct *mm, unsigned long addr,
-               unsigned int fsr, unsigned int flags, struct task_struct *tsk)
-{
-       struct vm_area_struct *vma;
-       vm_fault_t fault;
-
-       vma = find_vma(mm, addr);
-       fault = VM_FAULT_BADMAP;
-       if (unlikely(!vma))
-               goto out;
-       if (unlikely(vma->vm_start > addr))
-               goto check_stack;
-
-       /*
-        * Ok, we have a good vm_area for this
-        * memory access, so we can handle it.
-        */
-good_area:
-       if (access_error(fsr, vma)) {
-               fault = VM_FAULT_BADACCESS;
-               goto out;
-       }
-
-       /*
-        * If for any reason at all we couldn't handle the fault, make
-        * sure we exit gracefully rather than endlessly redo the fault.
-        */
-       fault = handle_mm_fault(vma, addr & PAGE_MASK, flags);
-       return fault;
-
-check_stack:
-       if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
-               goto good_area;
-out:
-       return fault;
-}
-
-static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
-{
-       struct task_struct *tsk;
-       struct mm_struct *mm;
-       int sig, code;
-       vm_fault_t fault;
-       unsigned int flags = FAULT_FLAG_DEFAULT;
-
-       tsk = current;
-       mm = tsk->mm;
-
-       /*
-        * If we're in an interrupt or have no user
-        * context, we must not take the fault..
-        */
-       if (faulthandler_disabled() || !mm)
-               goto no_context;
-
-       if (user_mode(regs))
-               flags |= FAULT_FLAG_USER;
-       if (!(fsr ^ 0x12))
-               flags |= FAULT_FLAG_WRITE;
-
-       /*
-        * As per x86, we may deadlock here.  However, since the kernel only
-        * validly references user space from well defined areas of the code,
-        * we can bug out early if this is from code which shouldn't.
-        */
-       if (!mmap_read_trylock(mm)) {
-               if (!user_mode(regs)
-                   && !search_exception_tables(regs->UCreg_pc))
-                       goto no_context;
-retry:
-               mmap_read_lock(mm);
-       } else {
-               /*
-                * The above down_read_trylock() might have succeeded in
-                * which case, we'll have missed the might_sleep() from
-                * down_read()
-                */
-               might_sleep();
-#ifdef CONFIG_DEBUG_VM
-               if (!user_mode(regs) &&
-                   !search_exception_tables(regs->UCreg_pc))
-                       goto no_context;
-#endif
-       }
-
-       fault = __do_pf(mm, addr, fsr, flags, tsk);
-
-       /* If we need to retry but a fatal signal is pending, handle the
-        * signal first. We do not need to release the mmap_lock because
-        * it would already be released in __lock_page_or_retry in
-        * mm/filemap.c. */
-       if (fault_signal_pending(fault, regs))
-               return 0;
-
-       if (!(fault & VM_FAULT_ERROR) && (flags & FAULT_FLAG_ALLOW_RETRY)) {
-               if (fault & VM_FAULT_MAJOR)
-                       tsk->maj_flt++;
-               else
-                       tsk->min_flt++;
-               if (fault & VM_FAULT_RETRY) {
-                       flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
-       }
-
-       mmap_read_unlock(mm);
-
-       /*
-        * Handle the "normal" case first - VM_FAULT_MAJOR
-        */
-       if (likely(!(fault &
-              (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
-               return 0;
-
-       /*
-        * If we are in kernel mode at this point, we
-        * have no context to handle this fault with.
-        */
-       if (!user_mode(regs))
-               goto no_context;
-
-       if (fault & VM_FAULT_OOM) {
-               /*
-                * We ran out of memory, call the OOM killer, and return to
-                * userspace (which will retry the fault, or kill us if we
-                * got oom-killed)
-                */
-               pagefault_out_of_memory();
-               return 0;
-       }
-
-       if (fault & VM_FAULT_SIGBUS) {
-               /*
-                * We had some memory, but were unable to
-                * successfully fix up this page fault.
-                */
-               sig = SIGBUS;
-               code = BUS_ADRERR;
-       } else {
-               /*
-                * Something tried to access memory that
-                * isn't in our memory map..
-                */
-               sig = SIGSEGV;
-               code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR;
-       }
-
-       __do_user_fault(addr, fsr, sig, code, regs);
-       return 0;
-
-no_context:
-       __do_kernel_fault(mm, addr, fsr, regs);
-       return 0;
-}
-
-/*
- * First Level Translation Fault Handler
- *
- * We enter here because the first level page table doesn't contain
- * a valid entry for the address.
- *
- * If the address is in kernel space (>= TASK_SIZE), then we are
- * probably faulting in the vmalloc() area.
- *
- * If the init_task's first level page tables contains the relevant
- * entry, we copy the it to this task.  If not, we send the process
- * a signal, fixup the exception, or oops the kernel.
- *
- * NOTE! We MUST NOT take any locks for this case. We may be in an
- * interrupt or a critical region, and should only copy the information
- * from the master page table, nothing more.
- */
-static int do_ifault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
-{
-       unsigned int index;
-       pgd_t *pgd, *pgd_k;
-       pmd_t *pmd, *pmd_k;
-
-       if (addr < TASK_SIZE)
-               return do_pf(addr, fsr, regs);
-
-       if (user_mode(regs))
-               goto bad_area;
-
-       index = pgd_index(addr);
-
-       pgd = cpu_get_pgd() + index;
-       pgd_k = init_mm.pgd + index;
-
-       if (pgd_none(*pgd_k))
-               goto bad_area;
-
-       pmd_k = pmd_offset((pud_t *) pgd_k, addr);
-       pmd = pmd_offset((pud_t *) pgd, addr);
-
-       if (pmd_none(*pmd_k))
-               goto bad_area;
-
-       set_pmd(pmd, *pmd_k);
-       flush_pmd_entry(pmd);
-       return 0;
-
-bad_area:
-       do_bad_area(addr, fsr, regs);
-       return 0;
-}
-
-/*
- * This abort handler always returns "fault".
- */
-static int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
-{
-       return 1;
-}
-
-static int do_good(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
-{
-       unsigned int res1, res2;
-
-       printk("dabt exception but no error!\n");
-
-       __asm__ __volatile__(
-                       "mff %0,f0\n"
-                       "mff %1,f1\n"
-                       : "=r"(res1), "=r"(res2)
-                       :
-                       : "memory");
-
-       printk(KERN_EMERG "r0 :%08x  r1 :%08x\n", res1, res2);
-       panic("shut up\n");
-       return 0;
-}
-
-static struct fsr_info {
-       int (*fn) (unsigned long addr, unsigned int fsr, struct pt_regs *regs);
-       int sig;
-       int code;
-       const char *name;
-} fsr_info[] = {
-       /*
-        * The following are the standard Unicore-I and UniCore-II aborts.
-        */
-       { do_good,      SIGBUS,  0,             "no error"              },
-       { do_bad,       SIGBUS,  BUS_ADRALN,    "alignment exception"   },
-       { do_bad,       SIGBUS,  BUS_OBJERR,    "external exception"    },
-       { do_bad,       SIGBUS,  0,             "burst operation"       },
-       { do_bad,       SIGBUS,  0,             "unknown 00100"         },
-       { do_ifault,    SIGSEGV, SEGV_MAPERR,   "2nd level pt non-exist"},
-       { do_bad,       SIGBUS,  0,             "2nd lvl large pt non-exist" },
-       { do_bad,       SIGBUS,  0,             "invalid pte"           },
-       { do_pf,        SIGSEGV, SEGV_MAPERR,   "page miss"             },
-       { do_bad,       SIGBUS,  0,             "middle page miss"      },
-       { do_bad,       SIGBUS,  0,             "large page miss"       },
-       { do_pf,        SIGSEGV, SEGV_MAPERR,   "super page (section) miss" },
-       { do_bad,       SIGBUS,  0,             "unknown 01100"         },
-       { do_bad,       SIGBUS,  0,             "unknown 01101"         },
-       { do_bad,       SIGBUS,  0,             "unknown 01110"         },
-       { do_bad,       SIGBUS,  0,             "unknown 01111"         },
-       { do_bad,       SIGBUS,  0,             "addr: up 3G or IO"     },
-       { do_pf,        SIGSEGV, SEGV_ACCERR,   "read unreadable addr"  },
-       { do_pf,        SIGSEGV, SEGV_ACCERR,   "write unwriteable addr"},
-       { do_pf,        SIGSEGV, SEGV_ACCERR,   "exec unexecutable addr"},
-       { do_bad,       SIGBUS,  0,             "unknown 10100"         },
-       { do_bad,       SIGBUS,  0,             "unknown 10101"         },
-       { do_bad,       SIGBUS,  0,             "unknown 10110"         },
-       { do_bad,       SIGBUS,  0,             "unknown 10111"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11000"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11001"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11010"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11011"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11100"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11101"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11110"         },
-       { do_bad,       SIGBUS,  0,             "unknown 11111"         }
-};
-
-void __init hook_fault_code(int nr,
-               int (*fn) (unsigned long, unsigned int, struct pt_regs *),
-               int sig, int code, const char *name)
-{
-       if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
-               BUG();
-
-       fsr_info[nr].fn   = fn;
-       fsr_info[nr].sig  = sig;
-       fsr_info[nr].code = code;
-       fsr_info[nr].name = name;
-}
-
-/*
- * Dispatch a data abort to the relevant handler.
- */
-asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr,
-                       struct pt_regs *regs)
-{
-       const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
-
-       if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
-               return;
-
-       printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
-              inf->name, fsr, addr);
-
-       uc32_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
-                       fsr, 0);
-}
-
-asmlinkage void do_PrefetchAbort(unsigned long addr,
-                       unsigned int ifsr, struct pt_regs *regs)
-{
-       const struct fsr_info *inf = fsr_info + fsr_fs(ifsr);
-
-       if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
-               return;
-
-       printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
-              inf->name, ifsr, addr);
-
-       uc32_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
-                       ifsr, 0);
-}
diff --git a/arch/unicore32/mm/flush.c b/arch/unicore32/mm/flush.c
deleted file mode 100644 (file)
index 65954f8..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/flush.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-void flush_cache_mm(struct mm_struct *mm)
-{
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-               unsigned long end)
-{
-       if (vma->vm_flags & VM_EXEC)
-               __flush_icache_all();
-}
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr,
-               unsigned long pfn)
-{
-}
-
-static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
-                        unsigned long uaddr, void *kaddr, unsigned long len)
-{
-       /* VIPT non-aliasing D-cache */
-       if (vma->vm_flags & VM_EXEC) {
-               unsigned long addr = (unsigned long)kaddr;
-
-               __cpuc_coherent_kern_range(addr, addr + len);
-       }
-}
-
-/*
- * Copy user data from/to a page which is mapped into a different
- * processes address space.  Really, we want to allow our "user
- * space" model to handle this.
- *
- * Note that this code needs to run on the current CPU.
- */
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-                      unsigned long uaddr, void *dst, const void *src,
-                      unsigned long len)
-{
-       memcpy(dst, src, len);
-       flush_ptrace_access(vma, page, uaddr, dst, len);
-}
-
-void __flush_dcache_page(struct address_space *mapping, struct page *page)
-{
-       /*
-        * Writeback any data associated with the kernel mapping of this
-        * page.  This ensures that data in the physical page is mutually
-        * coherent with the kernels mapping.
-        */
-       __cpuc_flush_kern_dcache_area(page_address(page), PAGE_SIZE);
-}
-
-/*
- * Ensure cache coherency between kernel mapping and userspace mapping
- * of this page.
- */
-void flush_dcache_page(struct page *page)
-{
-       struct address_space *mapping;
-
-       /*
-        * The zero page is never written to, so never has any dirty
-        * cache lines, and therefore never needs to be flushed.
-        */
-       if (page == ZERO_PAGE(0))
-               return;
-
-       mapping = page_mapping_file(page);
-
-       if (mapping && !mapping_mapped(mapping))
-               clear_bit(PG_dcache_clean, &page->flags);
-       else {
-               __flush_dcache_page(mapping, page);
-               if (mapping)
-                       __flush_icache_all();
-               set_bit(PG_dcache_clean, &page->flags);
-       }
-}
-EXPORT_SYMBOL(flush_dcache_page);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
deleted file mode 100644 (file)
index 52425d3..0000000
+++ /dev/null
@@ -1,261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/arch/unicore32/mm/init.c
- *
- *  Copyright (C) 2010 GUAN Xue-tao
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/swap.h>
-#include <linux/init.h>
-#include <linux/memblock.h>
-#include <linux/mman.h>
-#include <linux/nodemask.h>
-#include <linux/initrd.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/sort.h>
-#include <linux/dma-mapping.h>
-#include <linux/export.h>
-
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <linux/sizes.h>
-#include <asm/tlb.h>
-#include <asm/memblock.h>
-#include <mach/map.h>
-
-#include "mm.h"
-
-/*
- * This keeps memory configuration data used by a couple memory
- * initialization functions, as well as show_mem() for the skipping
- * of holes in the memory map.  It is populated by uc32_add_memory().
- */
-struct meminfo meminfo;
-
-static void __init find_limits(unsigned long *min, unsigned long *max_low,
-       unsigned long *max_high)
-{
-       struct meminfo *mi = &meminfo;
-       int i;
-
-       *min = -1UL;
-       *max_low = *max_high = 0;
-
-       for_each_bank(i, mi) {
-               struct membank *bank = &mi->bank[i];
-               unsigned long start, end;
-
-               start = bank_pfn_start(bank);
-               end = bank_pfn_end(bank);
-
-               if (*min > start)
-                       *min = start;
-               if (*max_high < end)
-                       *max_high = end;
-               if (bank->highmem)
-                       continue;
-               if (*max_low < end)
-                       *max_low = end;
-       }
-}
-
-static void __init uc32_bootmem_free(unsigned long max_low)
-{
-       unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
-
-       max_zone_pfn[ZONE_DMA] = max_low;
-       max_zone_pfn[ZONE_NORMAL] = max_low;
-
-       /*
-        * Adjust the sizes according to any special requirements for
-        * this machine type.
-        * This might lower ZONE_DMA limit.
-        */
-       arch_adjust_zones(max_zone_pfn);
-
-       free_area_init(max_zone_pfn);
-}
-
-int pfn_valid(unsigned long pfn)
-{
-       return memblock_is_memory(pfn << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(pfn_valid);
-
-static void uc32_memory_present(void)
-{
-}
-
-static int __init meminfo_cmp(const void *_a, const void *_b)
-{
-       const struct membank *a = _a, *b = _b;
-       long cmp = bank_pfn_start(a) - bank_pfn_start(b);
-       return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-void __init uc32_memblock_init(struct meminfo *mi)
-{
-       int i;
-
-       sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
-               meminfo_cmp, NULL);
-
-       for (i = 0; i < mi->nr_banks; i++)
-               memblock_add(mi->bank[i].start, mi->bank[i].size);
-
-       /* Register the kernel text, kernel data and initrd with memblock. */
-       memblock_reserve(__pa(_text), _end - _text);
-
-#ifdef CONFIG_BLK_DEV_INITRD
-       if (!phys_initrd_size) {
-               phys_initrd_start = 0x01000000;
-               phys_initrd_size = SZ_8M;
-       }
-
-       if (phys_initrd_size) {
-               memblock_reserve(phys_initrd_start, phys_initrd_size);
-
-               /* Now convert initrd to virtual addresses */
-               initrd_start = __phys_to_virt(phys_initrd_start);
-               initrd_end = initrd_start + phys_initrd_size;
-       }
-#endif
-
-       uc32_mm_memblock_reserve();
-
-       memblock_allow_resize();
-       memblock_dump_all();
-}
-
-void __init bootmem_init(void)
-{
-       unsigned long min, max_low, max_high;
-
-       max_low = max_high = 0;
-
-       find_limits(&min, &max_low, &max_high);
-
-       node_set_online(0);
-
-       /*
-        * Sparsemem tries to allocate bootmem in memory_present(),
-        * so must be done after the fixed reservations
-        */
-       uc32_memory_present();
-
-       /*
-        * sparse_init() needs the bootmem allocator up and running.
-        */
-       sparse_init();
-
-       /*
-        * Now free the memory - free_area_init needs
-        * the sparse mem_map arrays initialized by sparse_init()
-        * for memmap_init_zone(), otherwise all PFNs are invalid.
-        */
-       uc32_bootmem_free(max_low);
-
-       high_memory = __va((max_low << PAGE_SHIFT) - 1) + 1;
-
-       /*
-        * This doesn't seem to be used by the Linux memory manager any
-        * more, but is used by ll_rw_block.  If we can get rid of it, we
-        * also get rid of some of the stuff above as well.
-        *
-        * Note: max_low_pfn and max_pfn reflect the number of _pages_ in
-        * the system, not the maximum PFN.
-        */
-       max_low_pfn = max_low - PHYS_PFN_OFFSET;
-       max_pfn = max_high - PHYS_PFN_OFFSET;
-}
-
-static inline void
-free_memmap(unsigned long start_pfn, unsigned long end_pfn)
-{
-       struct page *start_pg, *end_pg;
-       unsigned long pg, pgend;
-
-       /*
-        * Convert start_pfn/end_pfn to a struct page pointer.
-        */
-       start_pg = pfn_to_page(start_pfn - 1) + 1;
-       end_pg = pfn_to_page(end_pfn);
-
-       /*
-        * Convert to physical addresses, and
-        * round start upwards and end downwards.
-        */
-       pg = PAGE_ALIGN(__pa(start_pg));
-       pgend = __pa(end_pg) & PAGE_MASK;
-
-       /*
-        * If there are free pages between these,
-        * free the section of the memmap array.
-        */
-       if (pg < pgend)
-               memblock_free(pg, pgend - pg);
-}
-
-/*
- * The mem_map array can get very big.  Free the unused area of the memory map.
- */
-static void __init free_unused_memmap(struct meminfo *mi)
-{
-       unsigned long bank_start, prev_bank_end = 0;
-       unsigned int i;
-
-       /*
-        * This relies on each bank being in address order.
-        * The banks are sorted previously in bootmem_init().
-        */
-       for_each_bank(i, mi) {
-               struct membank *bank = &mi->bank[i];
-
-               bank_start = bank_pfn_start(bank);
-
-               /*
-                * If we had a previous bank, and there is a space
-                * between the current bank and the previous, free it.
-                */
-               if (prev_bank_end && prev_bank_end < bank_start)
-                       free_memmap(prev_bank_end, bank_start);
-
-               /*
-                * Align up here since the VM subsystem insists that the
-                * memmap entries are valid from the bank end aligned to
-                * MAX_ORDER_NR_PAGES.
-                */
-               prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
-       }
-}
-
-/*
- * mem_init() marks the free areas in the mem_map and tells us how much
- * memory is free.  This is done after various parts of the system have
- * claimed their memory after the kernel image.
- */
-void __init mem_init(void)
-{
-       max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
-
-       free_unused_memmap(&meminfo);
-
-       /* this will put all unused low memory onto the freelists */
-       memblock_free_all();
-
-       mem_init_print_info(NULL);
-
-       BUILD_BUG_ON(TASK_SIZE                          > MODULES_VADDR);
-       BUG_ON(TASK_SIZE                                > MODULES_VADDR);
-
-       if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
-               /*
-                * On a machine this small we won't get
-                * anywhere without overcommit, so turn
-                * it on by default.
-                */
-               sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
-       }
-}
diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c
deleted file mode 100644 (file)
index 46a64bd..0000000
+++ /dev/null
@@ -1,242 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/ioremap.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * Re-map IO memory to kernel address space so that we can access it.
- *
- * This allows a driver to remap an arbitrary region of bus memory into
- * virtual space.  One should *only* use readl, writel, memcpy_toio and
- * so on with such remapped areas.
- *
- * Because UniCore only has a 32-bit address space we can't address the
- * whole of the (physical) PCI space at once.  PCI huge-mode addressing
- * allows us to circumvent this restriction by splitting PCI space into
- * two 2GB chunks and mapping only one at a time into processor memory.
- * We use MMU protection domains to trap any attempt to access the bank
- * that is not currently mapped.  (This isn't fully implemented yet.)
- */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/io.h>
-
-#include <asm/cputype.h>
-#include <asm/cacheflush.h>
-#include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <linux/sizes.h>
-
-#include <mach/map.h>
-#include "mm.h"
-
-/*
- * Used by ioremap() and iounmap() code to mark (super)section-mapped
- * I/O regions in vm_struct->flags field.
- */
-#define VM_UNICORE_SECTION_MAPPING     0x80000000
-
-int ioremap_page(unsigned long virt, unsigned long phys,
-                const struct mem_type *mtype)
-{
-       return ioremap_page_range(virt, virt + PAGE_SIZE, phys,
-                                 __pgprot(mtype->prot_pte));
-}
-EXPORT_SYMBOL(ioremap_page);
-
-/*
- * Section support is unsafe on SMP - If you iounmap and ioremap a region,
- * the other CPUs will not see this change until their next context switch.
- * Meanwhile, (eg) if an interrupt comes in on one of those other CPUs
- * which requires the new ioremap'd region to be referenced, the CPU will
- * reference the _old_ region.
- *
- * Note that get_vm_area_caller() allocates a guard 4K page, so we need to
- * mask the size back to 4MB aligned or we will overflow in the loop below.
- */
-static void unmap_area_sections(unsigned long virt, unsigned long size)
-{
-       unsigned long addr = virt, end = virt + (size & ~(SZ_4M - 1));
-       pgd_t *pgd;
-
-       flush_cache_vunmap(addr, end);
-       pgd = pgd_offset_k(addr);
-       do {
-               pmd_t pmd, *pmdp = pmd_offset((pud_t *)pgd, addr);
-
-               pmd = *pmdp;
-               if (!pmd_none(pmd)) {
-                       /*
-                        * Clear the PMD from the page table, and
-                        * increment the kvm sequence so others
-                        * notice this change.
-                        *
-                        * Note: this is still racy on SMP machines.
-                        */
-                       pmd_clear(pmdp);
-
-                       /*
-                        * Free the page table, if there was one.
-                        */
-                       if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
-                               pte_free_kernel(&init_mm, pmd_page_vaddr(pmd));
-               }
-
-               addr += PGDIR_SIZE;
-               pgd++;
-       } while (addr < end);
-
-       flush_tlb_kernel_range(virt, end);
-}
-
-static int
-remap_area_sections(unsigned long virt, unsigned long pfn,
-                   size_t size, const struct mem_type *type)
-{
-       unsigned long addr = virt, end = virt + size;
-       pgd_t *pgd;
-
-       /*
-        * Remove and free any PTE-based mapping, and
-        * sync the current kernel mapping.
-        */
-       unmap_area_sections(virt, size);
-
-       pgd = pgd_offset_k(addr);
-       do {
-               pmd_t *pmd = pmd_offset((pud_t *)pgd, addr);
-
-               set_pmd(pmd, __pmd(__pfn_to_phys(pfn) | type->prot_sect));
-               pfn += SZ_4M >> PAGE_SHIFT;
-               flush_pmd_entry(pmd);
-
-               addr += PGDIR_SIZE;
-               pgd++;
-       } while (addr < end);
-
-       return 0;
-}
-
-void __iomem *__uc32_ioremap_pfn_caller(unsigned long pfn,
-       unsigned long offset, size_t size, unsigned int mtype, void *caller)
-{
-       const struct mem_type *type;
-       int err;
-       unsigned long addr;
-       struct vm_struct *area;
-
-       /*
-        * High mappings must be section aligned
-        */
-       if (pfn >= 0x100000 && (__pfn_to_phys(pfn) & ~SECTION_MASK))
-               return NULL;
-
-       /*
-        * Don't allow RAM to be mapped
-        */
-       if (pfn_valid(pfn)) {
-               WARN(1, "BUG: Your driver calls ioremap() on\n"
-                       "system memory.  This leads to architecturally\n"
-                       "unpredictable behaviour, and ioremap() will fail in\n"
-                       "the next kernel release. Please fix your driver.\n");
-               return NULL;
-       }
-
-       type = get_mem_type(mtype);
-       if (!type)
-               return NULL;
-
-       /*
-        * Page align the mapping size, taking account of any offset.
-        */
-       size = PAGE_ALIGN(offset + size);
-
-       area = get_vm_area_caller(size, VM_IOREMAP, caller);
-       if (!area)
-               return NULL;
-       addr = (unsigned long)area->addr;
-
-       if (!((__pfn_to_phys(pfn) | size | addr) & ~PMD_MASK)) {
-               area->flags |= VM_UNICORE_SECTION_MAPPING;
-               err = remap_area_sections(addr, pfn, size, type);
-       } else
-               err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn),
-                                        __pgprot(type->prot_pte));
-
-       if (err) {
-               vunmap((void *)addr);
-               return NULL;
-       }
-
-       flush_cache_vmap(addr, addr + size);
-       return (void __iomem *) (offset + addr);
-}
-
-void __iomem *__uc32_ioremap_caller(unsigned long phys_addr, size_t size,
-       unsigned int mtype, void *caller)
-{
-       unsigned long last_addr;
-       unsigned long offset = phys_addr & ~PAGE_MASK;
-       unsigned long pfn = __phys_to_pfn(phys_addr);
-
-       /*
-        * Don't allow wraparound or zero size
-        */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr)
-               return NULL;
-
-       return __uc32_ioremap_pfn_caller(pfn, offset, size, mtype, caller);
-}
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem *
-__uc32_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
-                 unsigned int mtype)
-{
-       return __uc32_ioremap_pfn_caller(pfn, offset, size, mtype,
-                       __builtin_return_address(0));
-}
-EXPORT_SYMBOL(__uc32_ioremap_pfn);
-
-void __iomem *
-__uc32_ioremap(unsigned long phys_addr, size_t size)
-{
-       return __uc32_ioremap_caller(phys_addr, size, MT_DEVICE,
-                       __builtin_return_address(0));
-}
-EXPORT_SYMBOL(__uc32_ioremap);
-
-void __uc32_iounmap(volatile void __iomem *io_addr)
-{
-       void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
-       struct vm_struct *vm;
-
-       /*
-        * If this is a section based mapping we need to handle it
-        * specially as the VM subsystem does not know how to handle
-        * such a beast. We need the lock here b/c we need to clear
-        * all the mappings before the area can be reclaimed
-        * by someone else.
-        */
-       vm = find_vm_area(addr);
-       if (vm && (vm->flags & VM_IOREMAP) &&
-               (vm->flags & VM_UNICORE_SECTION_MAPPING))
-               unmap_area_sections((unsigned long)vm->addr, vm->size);
-
-       vunmap(addr);
-}
-EXPORT_SYMBOL(__uc32_iounmap);
diff --git a/arch/unicore32/mm/mm.h b/arch/unicore32/mm/mm.h
deleted file mode 100644 (file)
index f157f5d..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/mm.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <asm/hwdef-copro.h>
-
-/* the upper-most page table pointer */
-extern pmd_t *top_pmd;
-extern int sysctl_overcommit_memory;
-
-#define TOP_PTE(x)     pte_offset_kernel(top_pmd, x)
-
-struct mem_type {
-       unsigned int prot_pte;
-       unsigned int prot_l1;
-       unsigned int prot_sect;
-};
-
-const struct mem_type *get_mem_type(unsigned int type);
-
-extern void __flush_dcache_page(struct address_space *, struct page *);
-extern void hook_fault_code(int nr, int (*fn)
-               (unsigned long, unsigned int, struct pt_regs *),
-               int sig, int code, const char *name);
-
-void __init bootmem_init(void);
-void uc32_mm_memblock_reserve(void);
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
deleted file mode 100644 (file)
index 183d5b0..0000000
+++ /dev/null
@@ -1,513 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/mmu.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/nodemask.h>
-#include <linux/memblock.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-
-#include <asm/cputype.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <linux/sizes.h>
-#include <asm/tlb.h>
-#include <asm/memblock.h>
-
-#include <mach/map.h>
-
-#include "mm.h"
-
-/*
- * empty_zero_page is a special page that is used for
- * zero-initialized data and COW.
- */
-struct page *empty_zero_page;
-EXPORT_SYMBOL(empty_zero_page);
-
-/*
- * The pmd table for the upper-most set of pages.
- */
-pmd_t *top_pmd;
-
-pgprot_t pgprot_user;
-EXPORT_SYMBOL(pgprot_user);
-
-pgprot_t pgprot_kernel;
-EXPORT_SYMBOL(pgprot_kernel);
-
-static int __init noalign_setup(char *__unused)
-{
-       cr_alignment &= ~CR_A;
-       cr_no_alignment &= ~CR_A;
-       set_cr(cr_alignment);
-       return 1;
-}
-__setup("noalign", noalign_setup);
-
-void adjust_cr(unsigned long mask, unsigned long set)
-{
-       unsigned long flags;
-
-       mask &= ~CR_A;
-
-       set &= mask;
-
-       local_irq_save(flags);
-
-       cr_no_alignment = (cr_no_alignment & ~mask) | set;
-       cr_alignment = (cr_alignment & ~mask) | set;
-
-       set_cr((get_cr() & ~mask) | set);
-
-       local_irq_restore(flags);
-}
-
-struct map_desc {
-       unsigned long virtual;
-       unsigned long pfn;
-       unsigned long length;
-       unsigned int type;
-};
-
-#define PROT_PTE_DEVICE                (PTE_PRESENT | PTE_YOUNG |      \
-                               PTE_DIRTY | PTE_READ | PTE_WRITE)
-#define PROT_SECT_DEVICE       (PMD_TYPE_SECT | PMD_PRESENT |  \
-                               PMD_SECT_READ | PMD_SECT_WRITE)
-
-static struct mem_type mem_types[] = {
-       [MT_DEVICE] = {           /* Strongly ordered */
-               .prot_pte       = PROT_PTE_DEVICE,
-               .prot_l1        = PMD_TYPE_TABLE | PMD_PRESENT,
-               .prot_sect      = PROT_SECT_DEVICE,
-       },
-       /*
-        * MT_KUSER: pte for vecpage -- cacheable,
-        *       and sect for unigfx mmap -- noncacheable
-        */
-       [MT_KUSER] = {
-               .prot_pte  = PTE_PRESENT | PTE_YOUNG | PTE_DIRTY |
-                               PTE_CACHEABLE | PTE_READ | PTE_EXEC,
-               .prot_l1   = PMD_TYPE_TABLE | PMD_PRESENT,
-               .prot_sect = PROT_SECT_DEVICE,
-       },
-       [MT_HIGH_VECTORS] = {
-               .prot_pte  = PTE_PRESENT | PTE_YOUNG | PTE_DIRTY |
-                               PTE_CACHEABLE | PTE_READ | PTE_WRITE |
-                               PTE_EXEC,
-               .prot_l1   = PMD_TYPE_TABLE | PMD_PRESENT,
-       },
-       [MT_MEMORY] = {
-               .prot_pte  = PTE_PRESENT | PTE_YOUNG | PTE_DIRTY |
-                               PTE_WRITE | PTE_EXEC,
-               .prot_l1   = PMD_TYPE_TABLE | PMD_PRESENT,
-               .prot_sect = PMD_TYPE_SECT | PMD_PRESENT | PMD_SECT_CACHEABLE |
-                               PMD_SECT_READ | PMD_SECT_WRITE | PMD_SECT_EXEC,
-       },
-       [MT_ROM] = {
-               .prot_sect = PMD_TYPE_SECT | PMD_PRESENT | PMD_SECT_CACHEABLE |
-                               PMD_SECT_READ,
-       },
-};
-
-const struct mem_type *get_mem_type(unsigned int type)
-{
-       return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
-}
-EXPORT_SYMBOL(get_mem_type);
-
-/*
- * Adjust the PMD section entries according to the CPU in use.
- */
-static void __init build_mem_type_table(void)
-{
-       pgprot_user   = __pgprot(PTE_PRESENT | PTE_YOUNG | PTE_CACHEABLE);
-       pgprot_kernel = __pgprot(PTE_PRESENT | PTE_YOUNG |
-                                PTE_DIRTY | PTE_READ | PTE_WRITE |
-                                PTE_EXEC | PTE_CACHEABLE);
-}
-
-#define vectors_base() (vectors_high() ? 0xffff0000 : 0)
-
-static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr,
-               unsigned long prot)
-{
-       if (pmd_none(*pmd)) {
-               size_t size = PTRS_PER_PTE * sizeof(pte_t);
-               pte_t *pte = memblock_alloc(size, size);
-
-               if (!pte)
-                       panic("%s: Failed to allocate %zu bytes align=%zx\n",
-                             __func__, size, size);
-
-               __pmd_populate(pmd, __pa(pte) | prot);
-       }
-       BUG_ON(pmd_bad(*pmd));
-       return pte_offset_kernel(pmd, addr);
-}
-
-static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
-                                 unsigned long end, unsigned long pfn,
-                                 const struct mem_type *type)
-{
-       pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
-       do {
-               set_pte(pte, pfn_pte(pfn, __pgprot(type->prot_pte)));
-               pfn++;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
-                                     unsigned long end, unsigned long phys,
-                                     const struct mem_type *type)
-{
-       pmd_t *pmd = pmd_offset((pud_t *)pgd, addr);
-
-       /*
-        * Try a section mapping - end, addr and phys must all be aligned
-        * to a section boundary.
-        */
-       if (((addr | end | phys) & ~SECTION_MASK) == 0) {
-               pmd_t *p = pmd;
-
-               do {
-                       set_pmd(pmd, __pmd(phys | type->prot_sect));
-                       phys += SECTION_SIZE;
-               } while (pmd++, addr += SECTION_SIZE, addr != end);
-
-               flush_pmd_entry(p);
-       } else {
-               /*
-                * No need to loop; pte's aren't interested in the
-                * individual L1 entries.
-                */
-               alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
-       }
-}
-
-/*
- * Create the page directory entries and any necessary
- * page tables for the mapping specified by `md'.  We
- * are able to cope here with varying sizes and address
- * offsets, and we take full advantage of sections.
- */
-static void __init create_mapping(struct map_desc *md)
-{
-       unsigned long phys, addr, length, end;
-       const struct mem_type *type;
-       pgd_t *pgd;
-
-       if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
-               printk(KERN_WARNING "BUG: not creating mapping for "
-                      "0x%08llx at 0x%08lx in user region\n",
-                      __pfn_to_phys((u64)md->pfn), md->virtual);
-               return;
-       }
-
-       if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
-           md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
-               printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
-                      "overlaps vmalloc space\n",
-                      __pfn_to_phys((u64)md->pfn), md->virtual);
-       }
-
-       type = &mem_types[md->type];
-
-       addr = md->virtual & PAGE_MASK;
-       phys = (unsigned long)__pfn_to_phys(md->pfn);
-       length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
-
-       if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
-               printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
-                      "be mapped using pages, ignoring.\n",
-                      __pfn_to_phys(md->pfn), addr);
-               return;
-       }
-
-       pgd = pgd_offset_k(addr);
-       end = addr + length;
-       do {
-               unsigned long next = pgd_addr_end(addr, end);
-
-               alloc_init_section(pgd, addr, next, phys, type);
-
-               phys += next - addr;
-               addr = next;
-       } while (pgd++, addr != end);
-}
-
-static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
-
-/*
- * vmalloc=size forces the vmalloc area to be exactly 'size'
- * bytes. This can be used to increase (or decrease) the vmalloc
- * area - the default is 128m.
- */
-static int __init early_vmalloc(char *arg)
-{
-       unsigned long vmalloc_reserve = memparse(arg, NULL);
-
-       if (vmalloc_reserve < SZ_16M) {
-               vmalloc_reserve = SZ_16M;
-               printk(KERN_WARNING
-                       "vmalloc area too small, limiting to %luMB\n",
-                       vmalloc_reserve >> 20);
-       }
-
-       if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
-               vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
-               printk(KERN_WARNING
-                       "vmalloc area is too big, limiting to %luMB\n",
-                       vmalloc_reserve >> 20);
-       }
-
-       vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
-       return 0;
-}
-early_param("vmalloc", early_vmalloc);
-
-static phys_addr_t lowmem_limit __initdata = SZ_1G;
-
-static void __init sanity_check_meminfo(void)
-{
-       int i, j;
-
-       lowmem_limit = __pa(vmalloc_min - 1) + 1;
-       memblock_set_current_limit(lowmem_limit);
-
-       for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
-               struct membank *bank = &meminfo.bank[j];
-               *bank = meminfo.bank[i];
-               j++;
-       }
-       meminfo.nr_banks = j;
-}
-
-static inline void prepare_page_table(void)
-{
-       unsigned long addr;
-       phys_addr_t end;
-
-       /*
-        * Clear out all the mappings below the kernel image.
-        */
-       for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
-               pmd_clear(pmd_off_k(addr));
-
-       for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
-               pmd_clear(pmd_off_k(addr));
-
-       /*
-        * Find the end of the first block of lowmem.
-        */
-       end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
-       if (end >= lowmem_limit)
-               end = lowmem_limit;
-
-       /*
-        * Clear out all the kernel space mappings, except for the first
-        * memory bank, up to the end of the vmalloc region.
-        */
-       for (addr = __phys_to_virt(end);
-            addr < VMALLOC_END; addr += PGDIR_SIZE)
-               pmd_clear(pmd_off_k(addr));
-}
-
-/*
- * Reserve the special regions of memory
- */
-void __init uc32_mm_memblock_reserve(void)
-{
-       /*
-        * Reserve the page tables.  These are already in use,
-        * and can only be in node 0.
-        */
-       memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t));
-}
-
-/*
- * Set up device the mappings.  Since we clear out the page tables for all
- * mappings above VMALLOC_END, we will remove any debug device mappings.
- * This means you have to be careful how you debug this function, or any
- * called function.  This means you can't use any function or debugging
- * method which may touch any device, otherwise the kernel _will_ crash.
- */
-static void __init devicemaps_init(void)
-{
-       struct map_desc map;
-       unsigned long addr;
-       void *vectors;
-
-       /*
-        * Allocate the vector page early.
-        */
-       vectors = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-       if (!vectors)
-               panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-                     __func__, PAGE_SIZE, PAGE_SIZE);
-
-       for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
-               pmd_clear(pmd_off_k(addr));
-
-       /*
-        * Create a mapping for the machine vectors at the high-vectors
-        * location (0xffff0000).  If we aren't using high-vectors, also
-        * create a mapping at the low-vectors virtual address.
-        */
-       map.pfn = __phys_to_pfn(virt_to_phys(vectors));
-       map.virtual = VECTORS_BASE;
-       map.length = PAGE_SIZE;
-       map.type = MT_HIGH_VECTORS;
-       create_mapping(&map);
-
-       /*
-        * Create a mapping for the kuser page at the special
-        * location (0xbfff0000) to the same vectors location.
-        */
-       map.pfn = __phys_to_pfn(virt_to_phys(vectors));
-       map.virtual = KUSER_VECPAGE_BASE;
-       map.length = PAGE_SIZE;
-       map.type = MT_KUSER;
-       create_mapping(&map);
-
-       /*
-        * Finally flush the caches and tlb to ensure that we're in a
-        * consistent state wrt the writebuffer.  This also ensures that
-        * any write-allocated cache lines in the vector page are written
-        * back.  After this point, we can start to touch devices again.
-        */
-       local_flush_tlb_all();
-       flush_cache_all();
-}
-
-static void __init map_lowmem(void)
-{
-       struct memblock_region *reg;
-
-       /* Map all the lowmem memory banks. */
-       for_each_memblock(memory, reg) {
-               phys_addr_t start = reg->base;
-               phys_addr_t end = start + reg->size;
-               struct map_desc map;
-
-               if (end > lowmem_limit)
-                       end = lowmem_limit;
-               if (start >= end)
-                       break;
-
-               map.pfn = __phys_to_pfn(start);
-               map.virtual = __phys_to_virt(start);
-               map.length = end - start;
-               map.type = MT_MEMORY;
-
-               create_mapping(&map);
-       }
-}
-
-/*
- * paging_init() sets up the page tables, initialises the zone memory
- * maps, and sets up the zero page, bad page and bad page tables.
- */
-void __init paging_init(void)
-{
-       void *zero_page;
-
-       build_mem_type_table();
-       sanity_check_meminfo();
-       prepare_page_table();
-       map_lowmem();
-       devicemaps_init();
-
-       top_pmd = pmd_off_k(0xffff0000);
-
-       /* allocate the zero page. */
-       zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-       if (!zero_page)
-               panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-                     __func__, PAGE_SIZE, PAGE_SIZE);
-
-       bootmem_init();
-
-       empty_zero_page = virt_to_page(zero_page);
-       __flush_dcache_page(NULL, empty_zero_page);
-}
-
-/*
- * In order to soft-boot, we need to insert a 1:1 mapping in place of
- * the user-mode pages.  This will then ensure that we have predictable
- * results when turning the mmu off
- */
-void setup_mm_for_reboot(void)
-{
-       unsigned long base_pmdval;
-       pgd_t *pgd;
-       int i;
-
-       /*
-        * We need to access to user-mode page tables here. For kernel threads
-        * we don't have any user-mode mappings so we use the context that we
-        * "borrowed".
-        */
-       pgd = current->active_mm->pgd;
-
-       base_pmdval = PMD_SECT_WRITE | PMD_SECT_READ | PMD_TYPE_SECT;
-
-       for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
-               unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
-               pmd_t *pmd;
-
-               pmd = pmd_off(pgd, i << PGDIR_SHIFT);
-               set_pmd(pmd, __pmd(pmdval));
-               flush_pmd_entry(pmd);
-       }
-
-       local_flush_tlb_all();
-}
-
-/*
- * Take care of architecture specific things when placing a new PTE into
- * a page table, or changing an existing PTE.  Basically, there are two
- * things that we need to take care of:
- *
- *  1. If PG_dcache_clean is not set for the page, we need to ensure
- *     that any cache entries for the kernels virtual memory
- *     range are written back to the page.
- *  2. If we have multiple shared mappings of the same space in
- *     an object, we need to deal with the cache aliasing issues.
- *
- * Note that the pte lock will be held.
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-       pte_t *ptep)
-{
-       unsigned long pfn = pte_pfn(*ptep);
-       struct address_space *mapping;
-       struct page *page;
-
-       if (!pfn_valid(pfn))
-               return;
-
-       /*
-        * The zero page is never written to, so never has any dirty
-        * cache lines, and therefore never needs to be flushed.
-        */
-       page = pfn_to_page(pfn);
-       if (page == ZERO_PAGE(0))
-               return;
-
-       mapping = page_mapping_file(page);
-       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-               __flush_dcache_page(mapping, page);
-       if (mapping)
-               if (vma->vm_flags & VM_EXEC)
-                       __flush_icache_all();
-}
diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c
deleted file mode 100644 (file)
index f01c73e..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/pgd.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/mm.h>
-#include <linux/gfp.h>
-#include <linux/highmem.h>
-
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-
-#include "mm.h"
-
-#define FIRST_KERNEL_PGD_NR    (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
-
-/*
- * need to get a 4k page for level 1
- */
-pgd_t *get_pgd_slow(struct mm_struct *mm)
-{
-       pgd_t *new_pgd, *init_pgd;
-       pmd_t *new_pmd, *init_pmd;
-       pte_t *new_pte, *init_pte;
-
-       new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 0);
-       if (!new_pgd)
-               goto no_pgd;
-
-       memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
-
-       /*
-        * Copy over the kernel and IO PGD entries
-        */
-       init_pgd = pgd_offset_k(0);
-       memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
-                      (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
-
-       clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-
-       if (!vectors_high()) {
-               /*
-                * On UniCore, first page must always be allocated since it
-                * contains the machine vectors.
-                */
-               new_pmd = pmd_alloc(mm, (pud_t *)new_pgd, 0);
-               if (!new_pmd)
-                       goto no_pmd;
-
-               new_pte = pte_alloc_map(mm, new_pmd, 0);
-               if (!new_pte)
-                       goto no_pte;
-
-               init_pmd = pmd_offset((pud_t *)init_pgd, 0);
-               init_pte = pte_offset_map(init_pmd, 0);
-               set_pte(new_pte, *init_pte);
-               pte_unmap(init_pte);
-               pte_unmap(new_pte);
-       }
-
-       return new_pgd;
-
-no_pte:
-       pmd_free(mm, new_pmd);
-       mm_dec_nr_pmds(mm);
-no_pmd:
-       free_pages((unsigned long)new_pgd, 0);
-no_pgd:
-       return NULL;
-}
-
-void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
-{
-       pmd_t *pmd;
-       pgtable_t pte;
-
-       if (!pgd)
-               return;
-
-       /* pgd is always present and good */
-       pmd = pmd_off(pgd, 0);
-       if (pmd_none(*pmd))
-               goto free;
-       if (pmd_bad(*pmd)) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
-               goto free;
-       }
-
-       pte = pmd_pgtable(*pmd);
-       pmd_clear(pmd);
-       pte_free(mm, pte);
-       mm_dec_nr_ptes(mm);
-       pmd_free(mm, pmd);
-       mm_dec_nr_pmds(mm);
-free:
-       free_pages((unsigned long) pgd, 0);
-}
diff --git a/arch/unicore32/mm/proc-macros.S b/arch/unicore32/mm/proc-macros.S
deleted file mode 100644 (file)
index 3b0ae7d..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/proc-macros.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * We need constants.h for:
- *  VMA_VM_MM
- *  VMA_VM_FLAGS
- *  VM_EXEC
- */
-#include <generated/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-
-/*
- * the cache line sizes of the I and D cache are the same
- */
-#define CACHE_LINESIZE 32
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#ifdef CONFIG_CPU_UCV2
-#define MAX_AREA_SIZE  0x800           /* 64 cache line */
-#endif
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-       .macro  vma_vm_mm, rd, rn
-       ldw     \rd, [\rn+], #VMA_VM_MM
-       .endm
-
-/*
- * vma_vm_flags - get vma->vm_flags
- */
-       .macro  vma_vm_flags, rd, rn
-       ldw     \rd, [\rn+], #VMA_VM_FLAGS
-       .endm
-
-       .macro  tsk_mm, rd, rn
-       ldw     \rd, [\rn+], #TI_TASK
-       ldw     \rd, [\rd+], #TSK_ACTIVE_MM
-       .endm
-
-/*
- * act_mm - get current->active_mm
- */
-       .macro  act_mm, rd
-       andn    \rd, sp, #8128
-       andn    \rd, \rd, #63
-       ldw     \rd, [\rd+], #TI_TASK
-       ldw     \rd, [\rd+], #TSK_ACTIVE_MM
-       .endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
-       .macro  mmid, rd, rn
-       ldw     \rd, [\rn+], #MM_CONTEXT_ID
-       .endm
-
-/*
- * mask_asid - mask the ASID from the context ID
- */
-       .macro  asid, rd, rn
-       and     \rd, \rn, #255
-       .endm
-
-       .macro  crval, clear, mmuset, ucset
-       .word   \clear
-       .word   \mmuset
-       .endm
-
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-/*
- * va2pa va, pa, tbl, msk, off, err
- *     This macro is used to translate virtual address to its physical address.
- *
- *     va: virtual address
- *     pa: physical address, result is stored in this register
- *     tbl, msk, off:  temp registers, will be destroyed
- *     err: jump to error label if the physical address not exist
- * NOTE: all regs must be different
- */
-       .macro  va2pa, va, pa, tbl, msk, off, err=990f
-       movc    \pa, p0.c2, #0
-       mov     \off, \va >> #22                @ off <- index of 1st page table
-       adr     \tbl, 910f                      @ tbl <- table of 1st page table
-900:                                           @ ---- handle 1, 2 page table
-       add     \pa, \pa, #PAGE_OFFSET          @ pa <- virt addr of page table
-       ldw     \pa, [\pa+], \off << #2         @ pa <- the content of pt
-       cand.a  \pa, #4                         @ test exist bit
-       beq     \err                            @ if not exist
-       and     \off, \pa, #3                   @ off <- the last 2 bits
-       add     \tbl, \tbl, \off << #3          @ cmove table pointer
-       ldw     \msk, [\tbl+], #0               @ get the mask
-       ldw     pc, [\tbl+], #4
-930:                                           @ ---- handle 2nd page table
-       and     \pa, \pa, \msk                  @ pa <- phys addr of 2nd pt
-       mov     \off, \va << #10
-       cntlo   \tbl, \msk                      @ use tbl as temp reg
-       mov     \off, \off >> \tbl
-       mov     \off, \off >> #2                @ off <- index of 2nd pt
-       adr     \tbl, 920f                      @ tbl <- table of 2nd pt
-       b       900b
-910:                                           @ 1st level page table
-       .word   0xfffff000, 930b                @ second level page table
-       .word   0xfffffc00, 930b                @ second level large page table
-       .word   0x00000000, \err                @ invalid
-       .word   0xffc00000, 980f                @ super page
-
-920:                                           @ 2nd level page table
-       .word   0xfffff000, 980f                @ page
-       .word   0xffffc000, 980f                @ middle page
-       .word   0xffff0000, 980f                @ large page
-       .word   0x00000000, \err                @ invalid
-980:
-       andn    \tbl, \va, \msk
-       and     \pa, \pa, \msk
-       or      \pa, \pa, \tbl
-990:
-       .endm
-#endif
-
-       .macro dcacheline_flush, addr, t1, t2
-       mov     \t1, \addr << #20
-       ldw     \t2, =_stext                    @ _stext must ALIGN(4096)
-       add     \t2, \t2, \t1 >> #20
-       ldw     \t1, [\t2+], #0x0000
-       ldw     \t1, [\t2+], #0x1000
-       ldw     \t1, [\t2+], #0x2000
-       ldw     \t1, [\t2+], #0x3000
-       .endm
diff --git a/arch/unicore32/mm/proc-syms.c b/arch/unicore32/mm/proc-syms.c
deleted file mode 100644 (file)
index 6c08161..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/mm/proc-syms.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/module.h>
-#include <linux/mm.h>
-
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/page.h>
-
-EXPORT_SYMBOL(cpu_dcache_clean_area);
-EXPORT_SYMBOL(cpu_set_pte);
-
-EXPORT_SYMBOL(__cpuc_coherent_kern_range);
diff --git a/arch/unicore32/mm/proc-ucv2.S b/arch/unicore32/mm/proc-ucv2.S
deleted file mode 100644 (file)
index 18f8c4f..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/proc-ucv2.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/pgtable.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-
-#include "proc-macros.S"
-
-ENTRY(cpu_proc_fin)
-       stm.w   (lr), [sp-]
-       mov     ip, #PSR_R_BIT | PSR_I_BIT | PRIV_MODE
-       mov.a   asr, ip
-       b.l     __cpuc_flush_kern_all
-       ldm.w   (pc), [sp]+
-
-/*
- *     cpu_reset(loc)
- *
- *     Perform a soft reset of the system.  Put the CPU into the
- *     same state as it would be if it had been reset, and branch
- *     to what would be the reset vector.
- *
- *     - loc   - location to jump to for soft reset
- */
-       .align  5
-ENTRY(cpu_reset)
-       mov     ip, #0
-       movc    p0.c5, ip, #28                  @ Cache invalidate all
-       nop8
-
-       movc    p0.c6, ip, #6                   @ TLB invalidate all
-       nop8
-
-       movc    ip, p0.c1, #0                   @ ctrl register
-       or      ip, ip, #0x2000                 @ vector base address
-       andn    ip, ip, #0x000f                 @ ............idam
-       movc    p0.c1, ip, #0                   @ disable caches and mmu
-       nop
-       mov     pc, r0                          @ jump to loc
-       nop8
-
-/*
- *     cpu_do_idle()
- *
- *     Idle the processor (eg, wait for interrupt).
- *
- *     IRQs are already disabled.
- */
-ENTRY(cpu_do_idle)
-       mov     r0, #0                          @ PCI address
-       .rept   8
-       ldw     r1, [r0]
-       .endr
-       mov     pc, lr
-
-ENTRY(cpu_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       csub.a  r1, #MAX_AREA_SIZE
-       bsg     101f
-       mov     r9, #PAGE_SZ
-       sub     r9, r9, #1                      @ PAGE_MASK
-1:     va2pa   r0, r10, r11, r12, r13          @ r10 is PA
-       b       3f
-2:     cand.a  r0, r9
-       beq     1b
-3:     movc    p0.c5, r10, #11                 @ clean D entry
-       nop8
-       add     r0, r0, #CACHE_LINESIZE
-       add     r10, r10, #CACHE_LINESIZE
-       sub.a   r1, r1, #CACHE_LINESIZE
-       bua     2b
-       mov     pc, lr
-#endif
-101:   mov     ip, #0
-       movc    p0.c5, ip, #10                  @ Dcache clean all
-       nop8
-
-       mov     pc, lr
-
-/*
- *     cpu_do_switch_mm(pgd_phys)
- *
- *     Set the translation table base pointer to be pgd_phys
- *
- *     - pgd_phys - physical address of new pgd
- *
- *     It is assumed that:
- *     - we are not using split page tables
- */
-       .align  5
-ENTRY(cpu_do_switch_mm)
-       movc    p0.c2, r0, #0                   @ update page table ptr
-       nop8
-
-       movc    p0.c6, ip, #6                   @ TLB invalidate all
-       nop8
-
-       mov     pc, lr
-
-/*
- *     cpu_set_pte(ptep, pte)
- *
- *     Set a level 2 translation table entry.
- *
- *     - ptep  - pointer to level 2 translation table entry
- *     - pte   - PTE value to store
- */
-       .align  5
-ENTRY(cpu_set_pte)
-       stw     r1, [r0]
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-       sub     r2, r0, #PAGE_OFFSET
-       movc    p0.c5, r2, #11                          @ Dcache clean line
-       nop8
-#else
-       mov     ip, #0
-       movc    p0.c5, ip, #10                          @ Dcache clean all
-       nop8
-       @dcacheline_flush       r0, r2, ip
-#endif
-       mov     pc, lr
-
diff --git a/arch/unicore32/mm/tlb-ucv2.S b/arch/unicore32/mm/tlb-ucv2.S
deleted file mode 100644 (file)
index 0ce9c6b..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/tlb-ucv2.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- *     __cpu_flush_user_tlb_range(start, end, vma)
- *
- *     Invalidate a range of TLB entries in the specified address space.
- *
- *     - start - start address (may not be aligned)
- *     - end   - end address (exclusive, may not be aligned)
- *     - vma   - vma_struct describing address range
- */
-ENTRY(__cpu_flush_user_tlb_range)
-#ifndef        CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE
-       mov     r0, r0 >> #PAGE_SHIFT           @ align address
-       mov     r0, r0 << #PAGE_SHIFT
-       vma_vm_flags r2, r2                     @ get vma->vm_flags
-1:
-       movc    p0.c6, r0, #3
-       nop8
-
-       cand.a  r2, #VM_EXEC                    @ Executable area ?
-       beq     2f
-
-       movc    p0.c6, r0, #5
-       nop8
-2:
-       add     r0, r0, #PAGE_SZ
-       csub.a  r0, r1
-       beb     1b
-#else
-       movc    p0.c6, r0, #2
-       nop8
-
-       cand.a  r2, #VM_EXEC                    @ Executable area ?
-       beq     2f
-
-       movc    p0.c6, r0, #4
-       nop8
-2:
-#endif
-       mov     pc, lr
-
-/*
- *     __cpu_flush_kern_tlb_range(start,end)
- *
- *     Invalidate a range of kernel TLB entries
- *
- *     - start - start address (may not be aligned)
- *     - end   - end address (exclusive, may not be aligned)
- */
-ENTRY(__cpu_flush_kern_tlb_range)
-#ifndef        CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE
-       mov     r0, r0 >> #PAGE_SHIFT           @ align address
-       mov     r0, r0 << #PAGE_SHIFT
-1:
-       movc    p0.c6, r0, #3
-       nop8
-
-       movc    p0.c6, r0, #5
-       nop8
-
-       add     r0, r0, #PAGE_SZ
-       csub.a  r0, r1
-       beb     1b
-#else
-       movc    p0.c6, r0, #2
-       nop8
-
-       movc    p0.c6, r0, #4
-       nop8
-#endif
-       mov     pc, lr
-
index 883da0a..4a64395 100644 (file)
@@ -188,6 +188,7 @@ config X86
        select HAVE_KERNEL_LZMA
        select HAVE_KERNEL_LZO
        select HAVE_KERNEL_XZ
+       select HAVE_KERNEL_ZSTD
        select HAVE_KPROBES
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_FUNCTION_ERROR_INJECTION
index 0dd319e..ee1d3c5 100644 (file)
@@ -3,6 +3,9 @@
 config TRACE_IRQFLAGS_SUPPORT
        def_bool y
 
+config TRACE_IRQFLAGS_NMI_SUPPORT
+       def_bool y
+
 config EARLY_PRINTK_USB
        bool
 
index 00e378d..1e634d7 100644 (file)
@@ -47,10 +47,6 @@ export REALMODE_CFLAGS
 # e.g.: obj-y += foo_$(BITS).o
 export BITS
 
-ifdef CONFIG_X86_NEED_RELOCS
-        LDFLAGS_vmlinux := --emit-relocs --discard-none
-endif
-
 #
 # Prevent GCC from generating any FP code by mistake.
 #
@@ -177,17 +173,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
        KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
-
-#
-# The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x200000 to
-# the linker to force 2MB page size regardless of the default page size used
-# by the linker.
-#
-ifdef CONFIG_X86_64
-KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
-endif
-
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
 KBUILD_CFLAGS += -Wno-sign-compare
 #
@@ -207,6 +192,23 @@ ifdef CONFIG_RETPOLINE
   endif
 endif
 
+KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+
+ifdef CONFIG_X86_NEED_RELOCS
+LDFLAGS_vmlinux := --emit-relocs --discard-none
+else
+LDFLAGS_vmlinux :=
+endif
+
+#
+# The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x200000 to
+# the linker to force 2MB page size regardless of the default page size used
+# by the linker.
+#
+ifdef CONFIG_X86_64
+LDFLAGS_vmlinux += -z max-page-size=0x200000
+endif
+
 archscripts: scripts_basic
        $(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
index 5a828fd..c08714a 100644 (file)
@@ -26,7 +26,7 @@ OBJECT_FILES_NON_STANDARD     := y
 KCOV_INSTRUMENT                := n
 
 targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
-       vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
+       vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
 
 KBUILD_CFLAGS := -m$(BITS) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
@@ -42,6 +42,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
 KBUILD_CFLAGS += -Wno-pointer-sign
 KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
@@ -145,6 +146,8 @@ $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lzo)
 $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lz4)
+$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
+       $(call if_changed,zstd22)
 
 suffix-$(CONFIG_KERNEL_GZIP)   := gz
 suffix-$(CONFIG_KERNEL_BZIP2)  := bz2
@@ -152,6 +155,7 @@ suffix-$(CONFIG_KERNEL_LZMA)        := lzma
 suffix-$(CONFIG_KERNEL_XZ)     := xz
 suffix-$(CONFIG_KERNEL_LZO)    := lzo
 suffix-$(CONFIG_KERNEL_LZ4)    := lz4
+suffix-$(CONFIG_KERNEL_ZSTD)   := zst
 
 quiet_cmd_mkpiggy = MKPIGGY $@
       cmd_mkpiggy = $(obj)/mkpiggy $< > $@
index d7408af..0048269 100644 (file)
  */
 #define BOOT_CTYPE_H
 
-/*
- * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
- * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
- * which is meaningless and will cause compiling error in some cases.
- */
-#define __DISABLE_EXPORTS
-
 #include "misc.h"
 #include "error.h"
 #include "../string.h"
index 9652d5c..39e592d 100644 (file)
@@ -77,6 +77,10 @@ static int lines, cols;
 #ifdef CONFIG_KERNEL_LZ4
 #include "../../../../lib/decompress_unlz4.c"
 #endif
+
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
 /*
  * NOTE: When adding a new decompressor, please update the analysis in
  * ../header.S.
index 735ad7f..6dbd7e9 100644 (file)
@@ -539,8 +539,14 @@ pref_address:              .quad LOAD_PHYSICAL_ADDR        # preferred load addr
 # the size-dependent part now grows so fast.
 #
 # extra_bytes = (uncompressed_size >> 8) + 65536
+#
+# ZSTD compressed data grows by at most 3 bytes per 128K, and only has a 22
+# byte fixed overhead but has a maximum block size of 128K, so it needs a
+# larger margin.
+#
+# extra_bytes = (uncompressed_size >> 8) + 131072
 
-#define ZO_z_extra_bytes       ((ZO_z_output_len >> 8) + 65536)
+#define ZO_z_extra_bytes       ((ZO_z_output_len >> 8) + 131072)
 #if ZO_z_output_len > ZO_z_input_len
 # define ZO_z_extract_offset   (ZO_z_output_len + ZO_z_extra_bytes - \
                                 ZO_z_input_len)
index 5509045..d7577fe 100644 (file)
@@ -1,39 +1,29 @@
-# CONFIG_64BIT is not set
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_FHANDLE=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_SCHED=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_SMP=y
 CONFIG_X86_GENERIC=y
 CONFIG_HPET_TIMER=y
-CONFIG_SCHED_SMT=y
-CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
 CONFIG_X86_REBOOTFIXUPS=y
-CONFIG_MICROCODE=y
 CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
@@ -41,28 +31,25 @@ CONFIG_HIGHPTE=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_EFI=y
+CONFIG_EFI_STUB=y
 CONFIG_HZ_1000=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_RANDOMIZE_MEMORY=y
-# CONFIG_COMPAT_VDSO is not set
 CONFIG_HIBERNATION=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TRACE_RTC=y
 CONFIG_ACPI_DOCK=y
-CONFIG_CPU_FREQ=y
-# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_ACPI_BGRT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_X86_ACPI_CPUFREQ=y
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_PCI_MSI=y
-CONFIG_PCCARD=y
-CONFIG_YENTA=y
-CONFIG_HOTPLUG_PCI=y
+CONFIG_EFI_VARS=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -82,16 +69,12 @@ CONFIG_IP_MROUTE=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
 # CONFIG_TCP_CONG_WESTWOOD is not set
 # CONFIG_TCP_CONG_HTCP is not set
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
 CONFIG_INET6_AH=y
 CONFIG_INET6_ESP=y
 CONFIG_NETLABEL=y
@@ -102,6 +85,7 @@ CONFIG_NF_CONNTRACK_FTP=y
 CONFIG_NF_CONNTRACK_IRC=y
 CONFIG_NF_CONNTRACK_SIP=y
 CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_NAT=y
 CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
 CONFIG_NETFILTER_XT_TARGET_NFLOG=y
 CONFIG_NETFILTER_XT_TARGET_SECMARK=y
@@ -109,14 +93,11 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
 CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
 CONFIG_NETFILTER_XT_MATCH_POLICY=y
 CONFIG_NETFILTER_XT_MATCH_STATE=y
-CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_NF_NAT=y
-CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=y
-CONFIG_NF_CONNTRACK_IPV6=y
 CONFIG_IP6_NF_IPTABLES=y
 CONFIG_IP6_NF_MATCH_IPV6HEADER=y
 CONFIG_IP6_NF_FILTER=y
@@ -129,6 +110,12 @@ CONFIG_CFG80211=y
 CONFIG_MAC80211=y
 CONFIG_MAC80211_LEDS=y
 CONFIG_RFKILL=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_MSI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DEBUG_DEVRES=y
@@ -170,15 +157,12 @@ CONFIG_8139TOO=y
 # CONFIG_8139TOO_PIO is not set
 CONFIG_R8169=y
 CONFIG_INPUT_POLLDEV=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_TABLET=y
 CONFIG_INPUT_TOUCHSCREEN=y
 CONFIG_INPUT_MISC=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=32
@@ -187,6 +171,7 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_8250_DETECT_IRQ=y
 CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_HW_RANDOM=y
 CONFIG_NVRAM=y
 CONFIG_HPET=y
@@ -201,19 +186,15 @@ CONFIG_DRM_I915=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
 CONFIG_FB_EFI=y
-# CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_VGACON_SOFT_SCROLLBACK=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 CONFIG_SOUND=y
 CONFIG_SND=y
+CONFIG_SND_HRTIMER=y
 CONFIG_SND_SEQUENCER=y
 CONFIG_SND_SEQ_DUMMY=y
-CONFIG_SND_MIXER_OSS=y
-CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
-CONFIG_SND_HRTIMER=y
 CONFIG_SND_HDA_INTEL=y
 CONFIG_SND_HDA_HWDEP=y
 CONFIG_HIDRAW=y
@@ -234,17 +215,14 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
 CONFIG_DMADEVICES=y
 CONFIG_EEEPC_LAPTOP=y
-CONFIG_EFI_VARS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -270,27 +248,19 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_UTF8=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_PRINTK_TIME=y
-CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
-# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
-CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_DEBUG_RODATA_TEST is not set
 CONFIG_DEBUG_BOOT_PARAMS=y
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_AES_586=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_EFI_STUB=y
-CONFIG_ACPI_BGRT=y
index 6149610..f856001 100644 (file)
@@ -1,36 +1,26 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_FHANDLE=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_SCHED=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_SMP=y
-CONFIG_NR_CPUS=64
-CONFIG_SCHED_SMT=y
-CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_X86_MCE=y
-CONFIG_MICROCODE=y
 CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
@@ -38,30 +28,28 @@ CONFIG_NUMA=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_EFI_MIXED=y
 CONFIG_HZ_1000=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
-CONFIG_RANDOMIZE_BASE=y
-CONFIG_RANDOMIZE_MEMORY=y
-# CONFIG_COMPAT_VDSO is not set
 CONFIG_HIBERNATION=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TRACE_RTC=y
 CONFIG_ACPI_DOCK=y
-CONFIG_CPU_FREQ=y
-# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_ACPI_BGRT=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_X86_ACPI_CPUFREQ=y
-CONFIG_PCI=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_PCCARD=y
-CONFIG_YENTA=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_BINFMT_MISC=y
 CONFIG_IA32_EMULATION=y
+CONFIG_EFI_VARS=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -80,16 +68,12 @@ CONFIG_IP_MROUTE=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
 # CONFIG_TCP_CONG_WESTWOOD is not set
 # CONFIG_TCP_CONG_HTCP is not set
 CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=y
 CONFIG_INET6_AH=y
 CONFIG_INET6_ESP=y
 CONFIG_NETLABEL=y
@@ -100,6 +84,7 @@ CONFIG_NF_CONNTRACK_FTP=y
 CONFIG_NF_CONNTRACK_IRC=y
 CONFIG_NF_CONNTRACK_SIP=y
 CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_NAT=y
 CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
 CONFIG_NETFILTER_XT_TARGET_NFLOG=y
 CONFIG_NETFILTER_XT_TARGET_SECMARK=y
@@ -107,14 +92,11 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
 CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
 CONFIG_NETFILTER_XT_MATCH_POLICY=y
 CONFIG_NETFILTER_XT_MATCH_STATE=y
-CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_NF_NAT=y
-CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=y
-CONFIG_NF_CONNTRACK_IPV6=y
 CONFIG_IP6_NF_IPTABLES=y
 CONFIG_IP6_NF_MATCH_IPV6HEADER=y
 CONFIG_IP6_NF_FILTER=y
@@ -127,6 +109,11 @@ CONFIG_CFG80211=y
 CONFIG_MAC80211=y
 CONFIG_MAC80211_LEDS=y
 CONFIG_RFKILL=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DEBUG_DEVRES=y
@@ -163,15 +150,12 @@ CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
 CONFIG_R8169=y
 CONFIG_INPUT_POLLDEV=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_TABLET=y
 CONFIG_INPUT_TOUCHSCREEN=y
 CONFIG_INPUT_MISC=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=32
@@ -180,6 +164,7 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_8250_DETECT_IRQ=y
 CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_HW_RANDOM=y
 # CONFIG_HW_RANDOM_INTEL is not set
 # CONFIG_HW_RANDOM_AMD is not set
@@ -196,19 +181,15 @@ CONFIG_DRM_I915=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
 CONFIG_FB_EFI=y
-# CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_VGACON_SOFT_SCROLLBACK=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 CONFIG_SOUND=y
 CONFIG_SND=y
+CONFIG_SND_HRTIMER=y
 CONFIG_SND_SEQUENCER=y
 CONFIG_SND_SEQ_DUMMY=y
-CONFIG_SND_MIXER_OSS=y
-CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
-CONFIG_SND_HRTIMER=y
 CONFIG_SND_HDA_INTEL=y
 CONFIG_SND_HDA_HWDEP=y
 CONFIG_HIDRAW=y
@@ -229,12 +210,10 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
 CONFIG_DMADEVICES=y
@@ -242,7 +221,6 @@ CONFIG_EEEPC_LAPTOP=y
 CONFIG_AMD_IOMMU=y
 CONFIG_INTEL_IOMMU=y
 # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
-CONFIG_EFI_VARS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -268,27 +246,18 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_UTF8=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
-# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
-CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_DEBUG_RODATA_TEST is not set
 CONFIG_DEBUG_BOOT_PARAMS=y
-CONFIG_UNWINDER_ORC=y
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_EFI_STUB=y
-CONFIG_EFI_MIXED=y
-CONFIG_ACPI_BGRT=y
index ec437db..3f0fc7d 100644 (file)
@@ -63,7 +63,6 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/inst.h>
 
 #define VMOVDQ         vmovdqu
 
@@ -127,10 +126,6 @@ ddq_add_8:
 
 /* generate a unique variable for ddq_add_x */
 
-.macro setddq n
-       var_ddq_add = ddq_add_\n
-.endm
-
 /* generate a unique variable for xmm register */
 .macro setxdata n
        var_xdata = %xmm\n
@@ -140,9 +135,7 @@ ddq_add_8:
 
 .macro club name, id
 .altmacro
-       .if \name == DDQ_DATA
-               setddq %\id
-       .elseif \name == XDATA
+       .if \name == XDATA
                setxdata %\id
        .endif
 .noaltmacro
@@ -165,9 +158,8 @@ ddq_add_8:
 
        .set i, 1
        .rept (by - 1)
-               club DDQ_DATA, i
                club XDATA, i
-               vpaddq  var_ddq_add(%rip), xcounter, var_xdata
+               vpaddq  (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
                vptest  ddq_low_msk(%rip), var_xdata
                jnz 1f
                vpaddq  ddq_high_add_1(%rip), var_xdata, var_xdata
@@ -180,8 +172,7 @@ ddq_add_8:
        vmovdqa 1*16(p_keys), xkeyA
 
        vpxor   xkey0, xdata0, xdata0
-       club DDQ_DATA, by
-       vpaddq  var_ddq_add(%rip), xcounter, xcounter
+       vpaddq  (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
        vptest  ddq_low_msk(%rip), xcounter
        jnz     1f
        vpaddq  ddq_high_add_1(%rip), xcounter, xcounter
index 54e7d15..1852b19 100644 (file)
@@ -26,7 +26,6 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/inst.h>
 #include <asm/frame.h>
 #include <asm/nospec-branch.h>
 
@@ -201,7 +200,7 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
        mov     \SUBKEY, %r12
        movdqu  (%r12), \TMP3
        movdqa  SHUF_MASK(%rip), \TMP2
-       PSHUFB_XMM \TMP2, \TMP3
+       pshufb  \TMP2, \TMP3
 
        # precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
 
@@ -263,10 +262,10 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
        movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
 
        movdqa  SHUF_MASK(%rip), %xmm2
-       PSHUFB_XMM %xmm2, %xmm0
+       pshufb %xmm2, %xmm0
        movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
 
-       PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+       PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
        movdqu HashKey(%arg2), %xmm13
 
        CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
@@ -347,7 +346,7 @@ _zero_cipher_left_\@:
        paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
        movdqu %xmm0, CurCount(%arg2)
        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10, %xmm0
+       pshufb %xmm10, %xmm0
 
        ENCRYPT_SINGLE_BLOCK    %xmm0, %xmm1        # Encrypt(K, Yn)
        movdqu %xmm0, PBlockEncKey(%arg2)
@@ -377,7 +376,7 @@ _large_enough_update_\@:
        # get the appropriate shuffle mask
        movdqu  (%r12), %xmm2
        # shift right 16-r13 bytes
-       PSHUFB_XMM  %xmm2, %xmm1
+       pshufb  %xmm2, %xmm1
 
 _data_read_\@:
        lea ALL_F+16(%rip), %r12
@@ -393,12 +392,12 @@ _data_read_\@:
 .ifc \operation, dec
        pand    %xmm1, %xmm2
        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10 ,%xmm2
+       pshufb %xmm10 ,%xmm2
 
        pxor %xmm2, %xmm8
 .else
        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10,%xmm0
+       pshufb %xmm10,%xmm0
 
        pxor    %xmm0, %xmm8
 .endif
@@ -408,17 +407,17 @@ _data_read_\@:
        # GHASH computation for the last <16 byte block
        movdqa SHUF_MASK(%rip), %xmm10
        # shuffle xmm0 back to output as ciphertext
-       PSHUFB_XMM %xmm10, %xmm0
+       pshufb %xmm10, %xmm0
 .endif
 
        # Output %r13 bytes
-       MOVQ_R64_XMM %xmm0, %rax
+       movq %xmm0, %rax
        cmp $8, %r13
        jle _less_than_8_bytes_left_\@
        mov %rax, (%arg3 , %r11, 1)
        add $8, %r11
        psrldq $8, %xmm0
-       MOVQ_R64_XMM %xmm0, %rax
+       movq %xmm0, %rax
        sub $8, %r13
 _less_than_8_bytes_left_\@:
        mov %al,  (%arg3, %r11, 1)
@@ -449,7 +448,7 @@ _partial_done\@:
        movd    %r12d, %xmm15             # len(A) in %xmm15
        mov InLen(%arg2), %r12
        shl     $3, %r12                  # len(C) in bits (*128)
-       MOVQ_R64_XMM    %r12, %xmm1
+       movq    %r12, %xmm1
 
        pslldq  $8, %xmm15                # %xmm15 = len(A)||0x0000000000000000
        pxor    %xmm1, %xmm15             # %xmm15 = len(A)||len(C)
@@ -457,7 +456,7 @@ _partial_done\@:
        GHASH_MUL       %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
        # final GHASH computation
        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10, %xmm8
+       pshufb %xmm10, %xmm8
 
        movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
        ENCRYPT_SINGLE_BLOCK    %xmm0,  %xmm1     # E(K, Y0)
@@ -470,7 +469,7 @@ _return_T_\@:
        cmp     $8, %r11
        jl      _T_4_\@
 _T_8_\@:
-       MOVQ_R64_XMM    %xmm0, %rax
+       movq    %xmm0, %rax
        mov     %rax, (%r10)
        add     $8, %r10
        sub     $8, %r11
@@ -518,9 +517,9 @@ _return_T_done_\@:
        pshufd    $78, \HK, \TMP3
        pxor      \GH, \TMP2            # TMP2 = a1+a0
        pxor      \HK, \TMP3            # TMP3 = b1+b0
-       PCLMULQDQ 0x11, \HK, \TMP1     # TMP1 = a1*b1
-       PCLMULQDQ 0x00, \HK, \GH       # GH = a0*b0
-       PCLMULQDQ 0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
+       pclmulqdq $0x11, \HK, \TMP1     # TMP1 = a1*b1
+       pclmulqdq $0x00, \HK, \GH       # GH = a0*b0
+       pclmulqdq $0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
        pxor      \GH, \TMP2
        pxor      \TMP1, \TMP2          # TMP2 = (a0*b0)+(a1*b0)
        movdqa    \TMP2, \TMP3
@@ -570,7 +569,7 @@ _return_T_done_\@:
         cmp $8, \DLEN
         jl _read_lt8_\@
         mov (\DPTR), %rax
-        MOVQ_R64_XMM %rax, \XMMDst
+        movq %rax, \XMMDst
         sub $8, \DLEN
         jz _done_read_partial_block_\@
        xor %eax, %eax
@@ -579,7 +578,7 @@ _read_next_byte_\@:
         mov 7(\DPTR, \DLEN, 1), %al
         dec \DLEN
         jnz _read_next_byte_\@
-        MOVQ_R64_XMM %rax, \XMM1
+        movq %rax, \XMM1
        pslldq $8, \XMM1
         por \XMM1, \XMMDst
        jmp _done_read_partial_block_\@
@@ -590,7 +589,7 @@ _read_next_byte_lt8_\@:
         mov -1(\DPTR, \DLEN, 1), %al
         dec \DLEN
         jnz _read_next_byte_lt8_\@
-        MOVQ_R64_XMM %rax, \XMMDst
+        movq %rax, \XMMDst
 _done_read_partial_block_\@:
 .endm
 
@@ -608,7 +607,7 @@ _done_read_partial_block_\@:
        jl         _get_AAD_rest\@
 _get_AAD_blocks\@:
        movdqu     (%r10), \TMP7
-       PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+       pshufb     %xmm14, \TMP7 # byte-reflect the AAD data
        pxor       \TMP7, \TMP6
        GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
        add        $16, %r10
@@ -624,7 +623,7 @@ _get_AAD_rest\@:
        je         _get_AAD_done\@
 
        READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
-       PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+       pshufb     %xmm14, \TMP7 # byte-reflect the AAD data
        pxor       \TMP6, \TMP7
        GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
        movdqu \TMP7, \TMP6
@@ -667,7 +666,7 @@ _data_read_\@:                              # Finished reading in data
        # r16-r13 is the number of bytes in plaintext mod 16)
        add     %r13, %r12
        movdqu  (%r12), %xmm2           # get the appropriate shuffle mask
-       PSHUFB_XMM %xmm2, %xmm9         # shift right r13 bytes
+       pshufb  %xmm2, %xmm9            # shift right r13 bytes
 
 .ifc \operation, dec
        movdqa  %xmm1, %xmm3
@@ -689,8 +688,8 @@ _no_extra_mask_1_\@:
 
        pand    %xmm1, %xmm3
        movdqa  SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM      %xmm10, %xmm3
-       PSHUFB_XMM      %xmm2, %xmm3
+       pshufb  %xmm10, %xmm3
+       pshufb  %xmm2, %xmm3
        pxor    %xmm3, \AAD_HASH
 
        cmp     $0, %r10
@@ -724,8 +723,8 @@ _no_extra_mask_2_\@:
        pand    %xmm1, %xmm9
 
        movdqa  SHUF_MASK(%rip), %xmm1
-       PSHUFB_XMM %xmm1, %xmm9
-       PSHUFB_XMM %xmm2, %xmm9
+       pshufb  %xmm1, %xmm9
+       pshufb  %xmm2, %xmm9
        pxor    %xmm9, \AAD_HASH
 
        cmp     $0, %r10
@@ -744,8 +743,8 @@ _encode_done_\@:
 
        movdqa  SHUF_MASK(%rip), %xmm10
        # shuffle xmm9 back to output as ciphertext
-       PSHUFB_XMM      %xmm10, %xmm9
-       PSHUFB_XMM      %xmm2, %xmm9
+       pshufb  %xmm10, %xmm9
+       pshufb  %xmm2, %xmm9
 .endif
        # output encrypted Bytes
        cmp     $0, %r10
@@ -759,14 +758,14 @@ _partial_fill_\@:
        mov     \PLAIN_CYPH_LEN, %r13
 _count_set_\@:
        movdqa  %xmm9, %xmm0
-       MOVQ_R64_XMM    %xmm0, %rax
+       movq    %xmm0, %rax
        cmp     $8, %r13
        jle     _less_than_8_bytes_left_\@
 
        mov     %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
        add     $8, \DATA_OFFSET
        psrldq  $8, %xmm0
-       MOVQ_R64_XMM    %xmm0, %rax
+       movq    %xmm0, %rax
        sub     $8, %r13
 _less_than_8_bytes_left_\@:
        movb    %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
@@ -810,7 +809,7 @@ _partial_block_done_\@:
 .else
        MOVADQ          \XMM0, %xmm\index
 .endif
-       PSHUFB_XMM      %xmm14, %xmm\index      # perform a 16 byte swap
+       pshufb  %xmm14, %xmm\index      # perform a 16 byte swap
        pxor            \TMP2, %xmm\index
 .endr
        lea     0x10(%arg1),%r10
@@ -821,7 +820,7 @@ _partial_block_done_\@:
 aes_loop_initial_\@:
        MOVADQ  (%r10),\TMP1
 .irpc  index, \i_seq
-       AESENC  \TMP1, %xmm\index
+       aesenc  \TMP1, %xmm\index
 .endr
        add     $16,%r10
        sub     $1,%eax
@@ -829,7 +828,7 @@ aes_loop_initial_\@:
 
        MOVADQ  (%r10), \TMP1
 .irpc index, \i_seq
-       AESENCLAST \TMP1, %xmm\index         # Last Round
+       aesenclast \TMP1, %xmm\index         # Last Round
 .endr
 .irpc index, \i_seq
        movdqu     (%arg4 , %r11, 1), \TMP1
@@ -841,7 +840,7 @@ aes_loop_initial_\@:
 .ifc \operation, dec
        movdqa     \TMP1, %xmm\index
 .endif
-       PSHUFB_XMM         %xmm14, %xmm\index
+       pshufb     %xmm14, %xmm\index
 
                # prepare plaintext/ciphertext for GHASH computation
 .endr
@@ -876,19 +875,19 @@ aes_loop_initial_\@:
        MOVADQ     ONE(%RIP),\TMP1
        paddd      \TMP1, \XMM0              # INCR Y0
        MOVADQ     \XMM0, \XMM1
-       PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
+       pshufb  %xmm14, \XMM1        # perform a 16 byte swap
 
        paddd      \TMP1, \XMM0              # INCR Y0
        MOVADQ     \XMM0, \XMM2
-       PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
+       pshufb  %xmm14, \XMM2        # perform a 16 byte swap
 
        paddd      \TMP1, \XMM0              # INCR Y0
        MOVADQ     \XMM0, \XMM3
-       PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
+       pshufb %xmm14, \XMM3        # perform a 16 byte swap
 
        paddd      \TMP1, \XMM0              # INCR Y0
        MOVADQ     \XMM0, \XMM4
-       PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
+       pshufb %xmm14, \XMM4        # perform a 16 byte swap
 
        MOVADQ     0(%arg1),\TMP1
        pxor       \TMP1, \XMM1
@@ -897,17 +896,17 @@ aes_loop_initial_\@:
        pxor       \TMP1, \XMM4
 .irpc index, 1234 # do 4 rounds
        movaps 0x10*\index(%arg1), \TMP1
-       AESENC     \TMP1, \XMM1
-       AESENC     \TMP1, \XMM2
-       AESENC     \TMP1, \XMM3
-       AESENC     \TMP1, \XMM4
+       aesenc     \TMP1, \XMM1
+       aesenc     \TMP1, \XMM2
+       aesenc     \TMP1, \XMM3
+       aesenc     \TMP1, \XMM4
 .endr
 .irpc index, 56789 # do next 5 rounds
        movaps 0x10*\index(%arg1), \TMP1
-       AESENC     \TMP1, \XMM1
-       AESENC     \TMP1, \XMM2
-       AESENC     \TMP1, \XMM3
-       AESENC     \TMP1, \XMM4
+       aesenc     \TMP1, \XMM1
+       aesenc     \TMP1, \XMM2
+       aesenc     \TMP1, \XMM3
+       aesenc     \TMP1, \XMM4
 .endr
        lea        0xa0(%arg1),%r10
        mov        keysize,%eax
@@ -918,7 +917,7 @@ aes_loop_initial_\@:
 aes_loop_pre_\@:
        MOVADQ     (%r10),\TMP2
 .irpc  index, 1234
-       AESENC     \TMP2, %xmm\index
+       aesenc     \TMP2, %xmm\index
 .endr
        add        $16,%r10
        sub        $1,%eax
@@ -926,10 +925,10 @@ aes_loop_pre_\@:
 
 aes_loop_pre_done\@:
        MOVADQ     (%r10), \TMP2
-       AESENCLAST \TMP2, \XMM1
-       AESENCLAST \TMP2, \XMM2
-       AESENCLAST \TMP2, \XMM3
-       AESENCLAST \TMP2, \XMM4
+       aesenclast \TMP2, \XMM1
+       aesenclast \TMP2, \XMM2
+       aesenclast \TMP2, \XMM3
+       aesenclast \TMP2, \XMM4
        movdqu     16*0(%arg4 , %r11 , 1), \TMP1
        pxor       \TMP1, \XMM1
 .ifc \operation, dec
@@ -961,12 +960,12 @@ aes_loop_pre_done\@:
 .endif
 
        add        $64, %r11
-       PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+       pshufb %xmm14, \XMM1 # perform a 16 byte swap
        pxor       \XMMDst, \XMM1
 # combine GHASHed value with the corresponding ciphertext
-       PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-       PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-       PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+       pshufb %xmm14, \XMM2 # perform a 16 byte swap
+       pshufb %xmm14, \XMM3 # perform a 16 byte swap
+       pshufb %xmm14, \XMM4 # perform a 16 byte swap
 
 _initial_blocks_done\@:
 
@@ -978,7 +977,7 @@ _initial_blocks_done\@:
 * arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 */
-.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
+.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
 TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 
        movdqa    \XMM1, \XMM5
@@ -994,7 +993,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
        pxor      \XMM5, \TMP6
        paddd     ONE(%rip), \XMM0              # INCR CNT
        movdqu    HashKey_4(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
+       pclmulqdq $0x11, \TMP5, \TMP4           # TMP4 = a1*b1
        movdqa    \XMM0, \XMM1
        paddd     ONE(%rip), \XMM0              # INCR CNT
        movdqa    \XMM0, \XMM2
@@ -1002,51 +1001,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
        movdqa    \XMM0, \XMM3
        paddd     ONE(%rip), \XMM0              # INCR CNT
        movdqa    \XMM0, \XMM4
-       PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-       PCLMULQDQ 0x00, \TMP5, \XMM5           # XMM5 = a0*b0
-       PSHUFB_XMM %xmm15, \XMM2        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM3        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM4        # perform a 16 byte swap
+       pshufb %xmm15, \XMM1    # perform a 16 byte swap
+       pclmulqdq $0x00, \TMP5, \XMM5           # XMM5 = a0*b0
+       pshufb %xmm15, \XMM2    # perform a 16 byte swap
+       pshufb %xmm15, \XMM3    # perform a 16 byte swap
+       pshufb %xmm15, \XMM4    # perform a 16 byte swap
 
        pxor      (%arg1), \XMM1
        pxor      (%arg1), \XMM2
        pxor      (%arg1), \XMM3
        pxor      (%arg1), \XMM4
        movdqu    HashKey_4_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP6       # TMP6 = (a1+a0)*(b1+b0)
        movaps 0x10(%arg1), \TMP1
-       AESENC    \TMP1, \XMM1              # Round 1
-       AESENC    \TMP1, \XMM2
-       AESENC    \TMP1, \XMM3
-       AESENC    \TMP1, \XMM4
+       aesenc    \TMP1, \XMM1              # Round 1
+       aesenc    \TMP1, \XMM2
+       aesenc    \TMP1, \XMM3
+       aesenc    \TMP1, \XMM4
        movaps 0x20(%arg1), \TMP1
-       AESENC    \TMP1, \XMM1              # Round 2
-       AESENC    \TMP1, \XMM2
-       AESENC    \TMP1, \XMM3
-       AESENC    \TMP1, \XMM4
+       aesenc    \TMP1, \XMM1              # Round 2
+       aesenc    \TMP1, \XMM2
+       aesenc    \TMP1, \XMM3
+       aesenc    \TMP1, \XMM4
        movdqa    \XMM6, \TMP1
        pshufd    $78, \XMM6, \TMP2
        pxor      \XMM6, \TMP2
        movdqu    HashKey_3(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1 * b1
        movaps 0x30(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 3
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
-       PCLMULQDQ 0x00, \TMP5, \XMM6           # XMM6 = a0*b0
+       aesenc    \TMP3, \XMM1              # Round 3
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
+       pclmulqdq $0x00, \TMP5, \XMM6       # XMM6 = a0*b0
        movaps 0x40(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 4
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 4
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        movdqu    HashKey_3_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        movaps 0x50(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 5
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 5
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        pxor      \TMP1, \TMP4
 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
        pxor      \XMM6, \XMM5
@@ -1058,25 +1057,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 
         # Multiply TMP5 * HashKey using karatsuba
 
-       PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1*b1
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
        movaps 0x60(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 6
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
-       PCLMULQDQ 0x00, \TMP5, \XMM7           # XMM7 = a0*b0
+       aesenc    \TMP3, \XMM1              # Round 6
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
+       pclmulqdq $0x00, \TMP5, \XMM7       # XMM7 = a0*b0
        movaps 0x70(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1             # Round 7
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 7
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        movdqu    HashKey_2_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        movaps 0x80(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1             # Round 8
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 8
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        pxor      \TMP1, \TMP4
 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
        pxor      \XMM7, \XMM5
@@ -1089,13 +1088,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
        pshufd    $78, \XMM8, \TMP2
        pxor      \XMM8, \TMP2
        movdqu    HashKey(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
+       pclmulqdq $0x11, \TMP5, \TMP1      # TMP1 = a1*b1
        movaps 0x90(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1            # Round 9
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
-       PCLMULQDQ 0x00, \TMP5, \XMM8          # XMM8 = a0*b0
+       aesenc    \TMP3, \XMM1             # Round 9
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
+       pclmulqdq $0x00, \TMP5, \XMM8      # XMM8 = a0*b0
        lea       0xa0(%arg1),%r10
        mov       keysize,%eax
        shr       $2,%eax                       # 128->4, 192->6, 256->8
@@ -1105,7 +1104,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 aes_loop_par_enc\@:
        MOVADQ    (%r10),\TMP3
 .irpc  index, 1234
-       AESENC    \TMP3, %xmm\index
+       aesenc    \TMP3, %xmm\index
 .endr
        add       $16,%r10
        sub       $1,%eax
@@ -1113,12 +1112,12 @@ aes_loop_par_enc\@:
 
 aes_loop_par_enc_done\@:
        MOVADQ    (%r10), \TMP3
-       AESENCLAST \TMP3, \XMM1           # Round 10
-       AESENCLAST \TMP3, \XMM2
-       AESENCLAST \TMP3, \XMM3
-       AESENCLAST \TMP3, \XMM4
+       aesenclast \TMP3, \XMM1           # Round 10
+       aesenclast \TMP3, \XMM2
+       aesenclast \TMP3, \XMM3
+       aesenclast \TMP3, \XMM4
        movdqu    HashKey_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
        movdqu    (%arg4,%r11,1), \TMP3
        pxor      \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
        movdqu    16(%arg4,%r11,1), \TMP3
@@ -1131,10 +1130,10 @@ aes_loop_par_enc_done\@:
         movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
         movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
         movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
-       PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM2        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM3        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM4        # perform a 16 byte swap
+       pshufb %xmm15, \XMM1        # perform a 16 byte swap
+       pshufb %xmm15, \XMM2    # perform a 16 byte swap
+       pshufb %xmm15, \XMM3    # perform a 16 byte swap
+       pshufb %xmm15, \XMM4    # perform a 16 byte swap
 
        pxor      \TMP4, \TMP1
        pxor      \XMM8, \XMM5
@@ -1186,7 +1185,7 @@ aes_loop_par_enc_done\@:
 * arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 */
-.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
+.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
 TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 
        movdqa    \XMM1, \XMM5
@@ -1202,7 +1201,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
        pxor      \XMM5, \TMP6
        paddd     ONE(%rip), \XMM0              # INCR CNT
        movdqu    HashKey_4(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
+       pclmulqdq $0x11, \TMP5, \TMP4           # TMP4 = a1*b1
        movdqa    \XMM0, \XMM1
        paddd     ONE(%rip), \XMM0              # INCR CNT
        movdqa    \XMM0, \XMM2
@@ -1210,51 +1209,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
        movdqa    \XMM0, \XMM3
        paddd     ONE(%rip), \XMM0              # INCR CNT
        movdqa    \XMM0, \XMM4
-       PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-       PCLMULQDQ 0x00, \TMP5, \XMM5           # XMM5 = a0*b0
-       PSHUFB_XMM %xmm15, \XMM2        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM3        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM4        # perform a 16 byte swap
+       pshufb %xmm15, \XMM1    # perform a 16 byte swap
+       pclmulqdq $0x00, \TMP5, \XMM5           # XMM5 = a0*b0
+       pshufb %xmm15, \XMM2    # perform a 16 byte swap
+       pshufb %xmm15, \XMM3    # perform a 16 byte swap
+       pshufb %xmm15, \XMM4    # perform a 16 byte swap
 
        pxor      (%arg1), \XMM1
        pxor      (%arg1), \XMM2
        pxor      (%arg1), \XMM3
        pxor      (%arg1), \XMM4
        movdqu    HashKey_4_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP6       # TMP6 = (a1+a0)*(b1+b0)
        movaps 0x10(%arg1), \TMP1
-       AESENC    \TMP1, \XMM1              # Round 1
-       AESENC    \TMP1, \XMM2
-       AESENC    \TMP1, \XMM3
-       AESENC    \TMP1, \XMM4
+       aesenc    \TMP1, \XMM1              # Round 1
+       aesenc    \TMP1, \XMM2
+       aesenc    \TMP1, \XMM3
+       aesenc    \TMP1, \XMM4
        movaps 0x20(%arg1), \TMP1
-       AESENC    \TMP1, \XMM1              # Round 2
-       AESENC    \TMP1, \XMM2
-       AESENC    \TMP1, \XMM3
-       AESENC    \TMP1, \XMM4
+       aesenc    \TMP1, \XMM1              # Round 2
+       aesenc    \TMP1, \XMM2
+       aesenc    \TMP1, \XMM3
+       aesenc    \TMP1, \XMM4
        movdqa    \XMM6, \TMP1
        pshufd    $78, \XMM6, \TMP2
        pxor      \XMM6, \TMP2
        movdqu    HashKey_3(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1 * b1
        movaps 0x30(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 3
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
-       PCLMULQDQ 0x00, \TMP5, \XMM6           # XMM6 = a0*b0
+       aesenc    \TMP3, \XMM1              # Round 3
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
+       pclmulqdq $0x00, \TMP5, \XMM6       # XMM6 = a0*b0
        movaps 0x40(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 4
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 4
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        movdqu    HashKey_3_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        movaps 0x50(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 5
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 5
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        pxor      \TMP1, \TMP4
 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
        pxor      \XMM6, \XMM5
@@ -1266,25 +1265,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 
         # Multiply TMP5 * HashKey using karatsuba
 
-       PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1*b1
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
        movaps 0x60(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1              # Round 6
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
-       PCLMULQDQ 0x00, \TMP5, \XMM7           # XMM7 = a0*b0
+       aesenc    \TMP3, \XMM1              # Round 6
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
+       pclmulqdq $0x00, \TMP5, \XMM7       # XMM7 = a0*b0
        movaps 0x70(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1             # Round 7
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 7
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        movdqu    HashKey_2_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        movaps 0x80(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1             # Round 8
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
+       aesenc    \TMP3, \XMM1              # Round 8
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
        pxor      \TMP1, \TMP4
 # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
        pxor      \XMM7, \XMM5
@@ -1297,13 +1296,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
        pshufd    $78, \XMM8, \TMP2
        pxor      \XMM8, \TMP2
        movdqu    HashKey(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
+       pclmulqdq $0x11, \TMP5, \TMP1      # TMP1 = a1*b1
        movaps 0x90(%arg1), \TMP3
-       AESENC    \TMP3, \XMM1            # Round 9
-       AESENC    \TMP3, \XMM2
-       AESENC    \TMP3, \XMM3
-       AESENC    \TMP3, \XMM4
-       PCLMULQDQ 0x00, \TMP5, \XMM8          # XMM8 = a0*b0
+       aesenc    \TMP3, \XMM1             # Round 9
+       aesenc    \TMP3, \XMM2
+       aesenc    \TMP3, \XMM3
+       aesenc    \TMP3, \XMM4
+       pclmulqdq $0x00, \TMP5, \XMM8      # XMM8 = a0*b0
        lea       0xa0(%arg1),%r10
        mov       keysize,%eax
        shr       $2,%eax                       # 128->4, 192->6, 256->8
@@ -1313,7 +1312,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 aes_loop_par_dec\@:
        MOVADQ    (%r10),\TMP3
 .irpc  index, 1234
-       AESENC    \TMP3, %xmm\index
+       aesenc    \TMP3, %xmm\index
 .endr
        add       $16,%r10
        sub       $1,%eax
@@ -1321,12 +1320,12 @@ aes_loop_par_dec\@:
 
 aes_loop_par_dec_done\@:
        MOVADQ    (%r10), \TMP3
-       AESENCLAST \TMP3, \XMM1           # last round
-       AESENCLAST \TMP3, \XMM2
-       AESENCLAST \TMP3, \XMM3
-       AESENCLAST \TMP3, \XMM4
+       aesenclast \TMP3, \XMM1           # last round
+       aesenclast \TMP3, \XMM2
+       aesenclast \TMP3, \XMM3
+       aesenclast \TMP3, \XMM4
        movdqu    HashKey_k(%arg2), \TMP5
-       PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
        movdqu    (%arg4,%r11,1), \TMP3
        pxor      \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
        movdqu    \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
@@ -1343,10 +1342,10 @@ aes_loop_par_dec_done\@:
        pxor      \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
        movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
        movdqa    \TMP3, \XMM4
-       PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM2        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM3        # perform a 16 byte swap
-       PSHUFB_XMM %xmm15, \XMM4        # perform a 16 byte swap
+       pshufb %xmm15, \XMM1        # perform a 16 byte swap
+       pshufb %xmm15, \XMM2    # perform a 16 byte swap
+       pshufb %xmm15, \XMM3    # perform a 16 byte swap
+       pshufb %xmm15, \XMM4    # perform a 16 byte swap
 
        pxor      \TMP4, \TMP1
        pxor      \XMM8, \XMM5
@@ -1402,10 +1401,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
        pshufd    $78, \XMM1, \TMP2
        pxor      \XMM1, \TMP2
        movdqu    HashKey_4(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
-       PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
+       pclmulqdq $0x11, \TMP5, \TMP6       # TMP6 = a1*b1
+       pclmulqdq $0x00, \TMP5, \XMM1       # XMM1 = a0*b0
        movdqu    HashKey_4_k(%arg2), \TMP4
-       PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        movdqa    \XMM1, \XMMDst
        movdqa    \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
 
@@ -1415,10 +1414,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
        pshufd    $78, \XMM2, \TMP2
        pxor      \XMM2, \TMP2
        movdqu    HashKey_3(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
-       PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
+       pclmulqdq $0x00, \TMP5, \XMM2       # XMM2 = a0*b0
        movdqu    HashKey_3_k(%arg2), \TMP4
-       PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        pxor      \TMP1, \TMP6
        pxor      \XMM2, \XMMDst
        pxor      \TMP2, \XMM1
@@ -1430,10 +1429,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
        pshufd    $78, \XMM3, \TMP2
        pxor      \XMM3, \TMP2
        movdqu    HashKey_2(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
-       PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
+       pclmulqdq $0x00, \TMP5, \XMM3       # XMM3 = a0*b0
        movdqu    HashKey_2_k(%arg2), \TMP4
-       PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        pxor      \TMP1, \TMP6
        pxor      \XMM3, \XMMDst
        pxor      \TMP2, \XMM1   # results accumulated in TMP6, XMMDst, XMM1
@@ -1443,10 +1442,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
        pshufd    $78, \XMM4, \TMP2
        pxor      \XMM4, \TMP2
        movdqu    HashKey(%arg2), \TMP5
-       PCLMULQDQ 0x11, \TMP5, \TMP1        # TMP1 = a1*b1
-       PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
+       pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
+       pclmulqdq $0x00, \TMP5, \XMM4       # XMM4 = a0*b0
        movdqu    HashKey_k(%arg2), \TMP4
-       PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
+       pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
        pxor      \TMP1, \TMP6
        pxor      \XMM4, \XMMDst
        pxor      \XMM1, \TMP2
@@ -1504,13 +1503,13 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 
 _esb_loop_\@:
        MOVADQ          (%r10),\TMP1
-       AESENC          \TMP1,\XMM0
+       aesenc          \TMP1,\XMM0
        add             $16,%r10
        sub             $1,%eax
        jnz             _esb_loop_\@
 
        MOVADQ          (%r10),\TMP1
-       AESENCLAST      \TMP1,\XMM0
+       aesenclast      \TMP1,\XMM0
 .endm
 /*****************************************************************************
 * void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
@@ -1849,72 +1848,72 @@ SYM_FUNC_START(aesni_set_key)
        movups 0x10(UKEYP), %xmm2       # other user key
        movaps %xmm2, (TKEYP)
        add $0x10, TKEYP
-       AESKEYGENASSIST 0x1 %xmm2 %xmm1         # round 1
+       aeskeygenassist $0x1, %xmm2, %xmm1      # round 1
        call _key_expansion_256a
-       AESKEYGENASSIST 0x1 %xmm0 %xmm1
+       aeskeygenassist $0x1, %xmm0, %xmm1
        call _key_expansion_256b
-       AESKEYGENASSIST 0x2 %xmm2 %xmm1         # round 2
+       aeskeygenassist $0x2, %xmm2, %xmm1      # round 2
        call _key_expansion_256a
-       AESKEYGENASSIST 0x2 %xmm0 %xmm1
+       aeskeygenassist $0x2, %xmm0, %xmm1
        call _key_expansion_256b
-       AESKEYGENASSIST 0x4 %xmm2 %xmm1         # round 3
+       aeskeygenassist $0x4, %xmm2, %xmm1      # round 3
        call _key_expansion_256a
-       AESKEYGENASSIST 0x4 %xmm0 %xmm1
+       aeskeygenassist $0x4, %xmm0, %xmm1
        call _key_expansion_256b
-       AESKEYGENASSIST 0x8 %xmm2 %xmm1         # round 4
+       aeskeygenassist $0x8, %xmm2, %xmm1      # round 4
        call _key_expansion_256a
-       AESKEYGENASSIST 0x8 %xmm0 %xmm1
+       aeskeygenassist $0x8, %xmm0, %xmm1
        call _key_expansion_256b
-       AESKEYGENASSIST 0x10 %xmm2 %xmm1        # round 5
+       aeskeygenassist $0x10, %xmm2, %xmm1     # round 5
        call _key_expansion_256a
-       AESKEYGENASSIST 0x10 %xmm0 %xmm1
+       aeskeygenassist $0x10, %xmm0, %xmm1
        call _key_expansion_256b
-       AESKEYGENASSIST 0x20 %xmm2 %xmm1        # round 6
+       aeskeygenassist $0x20, %xmm2, %xmm1     # round 6
        call _key_expansion_256a
-       AESKEYGENASSIST 0x20 %xmm0 %xmm1
+       aeskeygenassist $0x20, %xmm0, %xmm1
        call _key_expansion_256b
-       AESKEYGENASSIST 0x40 %xmm2 %xmm1        # round 7
+       aeskeygenassist $0x40, %xmm2, %xmm1     # round 7
        call _key_expansion_256a
        jmp .Ldec_key
 .Lenc_key192:
        movq 0x10(UKEYP), %xmm2         # other user key
-       AESKEYGENASSIST 0x1 %xmm2 %xmm1         # round 1
+       aeskeygenassist $0x1, %xmm2, %xmm1      # round 1
        call _key_expansion_192a
-       AESKEYGENASSIST 0x2 %xmm2 %xmm1         # round 2
+       aeskeygenassist $0x2, %xmm2, %xmm1      # round 2
        call _key_expansion_192b
-       AESKEYGENASSIST 0x4 %xmm2 %xmm1         # round 3
+       aeskeygenassist $0x4, %xmm2, %xmm1      # round 3
        call _key_expansion_192a
-       AESKEYGENASSIST 0x8 %xmm2 %xmm1         # round 4
+       aeskeygenassist $0x8, %xmm2, %xmm1      # round 4
        call _key_expansion_192b
-       AESKEYGENASSIST 0x10 %xmm2 %xmm1        # round 5
+       aeskeygenassist $0x10, %xmm2, %xmm1     # round 5
        call _key_expansion_192a
-       AESKEYGENASSIST 0x20 %xmm2 %xmm1        # round 6
+       aeskeygenassist $0x20, %xmm2, %xmm1     # round 6
        call _key_expansion_192b
-       AESKEYGENASSIST 0x40 %xmm2 %xmm1        # round 7
+       aeskeygenassist $0x40, %xmm2, %xmm1     # round 7
        call _key_expansion_192a
-       AESKEYGENASSIST 0x80 %xmm2 %xmm1        # round 8
+       aeskeygenassist $0x80, %xmm2, %xmm1     # round 8
        call _key_expansion_192b
        jmp .Ldec_key
 .Lenc_key128:
-       AESKEYGENASSIST 0x1 %xmm0 %xmm1         # round 1
+       aeskeygenassist $0x1, %xmm0, %xmm1      # round 1
        call _key_expansion_128
-       AESKEYGENASSIST 0x2 %xmm0 %xmm1         # round 2
+       aeskeygenassist $0x2, %xmm0, %xmm1      # round 2
        call _key_expansion_128
-       AESKEYGENASSIST 0x4 %xmm0 %xmm1         # round 3
+       aeskeygenassist $0x4, %xmm0, %xmm1      # round 3
        call _key_expansion_128
-       AESKEYGENASSIST 0x8 %xmm0 %xmm1         # round 4
+       aeskeygenassist $0x8, %xmm0, %xmm1      # round 4
        call _key_expansion_128
-       AESKEYGENASSIST 0x10 %xmm0 %xmm1        # round 5
+       aeskeygenassist $0x10, %xmm0, %xmm1     # round 5
        call _key_expansion_128
-       AESKEYGENASSIST 0x20 %xmm0 %xmm1        # round 6
+       aeskeygenassist $0x20, %xmm0, %xmm1     # round 6
        call _key_expansion_128
-       AESKEYGENASSIST 0x40 %xmm0 %xmm1        # round 7
+       aeskeygenassist $0x40, %xmm0, %xmm1     # round 7
        call _key_expansion_128
-       AESKEYGENASSIST 0x80 %xmm0 %xmm1        # round 8
+       aeskeygenassist $0x80, %xmm0, %xmm1     # round 8
        call _key_expansion_128
-       AESKEYGENASSIST 0x1b %xmm0 %xmm1        # round 9
+       aeskeygenassist $0x1b, %xmm0, %xmm1     # round 9
        call _key_expansion_128
-       AESKEYGENASSIST 0x36 %xmm0 %xmm1        # round 10
+       aeskeygenassist $0x36, %xmm0, %xmm1     # round 10
        call _key_expansion_128
 .Ldec_key:
        sub $0x10, TKEYP
@@ -1927,7 +1926,7 @@ SYM_FUNC_START(aesni_set_key)
 .align 4
 .Ldec_key_loop:
        movaps (KEYP), %xmm0
-       AESIMC %xmm0 %xmm1
+       aesimc %xmm0, %xmm1
        movaps %xmm1, (UKEYP)
        add $0x10, KEYP
        sub $0x10, UKEYP
@@ -1988,37 +1987,37 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
        je .Lenc192
        add $0x20, TKEYP
        movaps -0x60(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps -0x50(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
 .align 4
 .Lenc192:
        movaps -0x40(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps -0x30(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
 .align 4
 .Lenc128:
        movaps -0x20(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps -0x10(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps (TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x10(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x20(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x30(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x40(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x50(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x60(TKEYP), KEY
-       AESENC KEY STATE
+       aesenc KEY, STATE
        movaps 0x70(TKEYP), KEY
-       AESENCLAST KEY STATE
+       aesenclast KEY, STATE
        ret
 SYM_FUNC_END(_aesni_enc1)
 
@@ -2054,79 +2053,79 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
        je .L4enc192
        add $0x20, TKEYP
        movaps -0x60(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps -0x50(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
 #.align 4
 .L4enc192:
        movaps -0x40(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps -0x30(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
 #.align 4
 .L4enc128:
        movaps -0x20(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps -0x10(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps (TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x10(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x20(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x30(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x40(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x50(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x60(TKEYP), KEY
-       AESENC KEY STATE1
-       AESENC KEY STATE2
-       AESENC KEY STATE3
-       AESENC KEY STATE4
+       aesenc KEY, STATE1
+       aesenc KEY, STATE2
+       aesenc KEY, STATE3
+       aesenc KEY, STATE4
        movaps 0x70(TKEYP), KEY
-       AESENCLAST KEY STATE1           # last round
-       AESENCLAST KEY STATE2
-       AESENCLAST KEY STATE3
-       AESENCLAST KEY STATE4
+       aesenclast KEY, STATE1          # last round
+       aesenclast KEY, STATE2
+       aesenclast KEY, STATE3
+       aesenclast KEY, STATE4
        ret
 SYM_FUNC_END(_aesni_enc4)
 
@@ -2178,37 +2177,37 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
        je .Ldec192
        add $0x20, TKEYP
        movaps -0x60(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps -0x50(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
 .align 4
 .Ldec192:
        movaps -0x40(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps -0x30(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
 .align 4
 .Ldec128:
        movaps -0x20(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps -0x10(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps (TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x10(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x20(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x30(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x40(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x50(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x60(TKEYP), KEY
-       AESDEC KEY STATE
+       aesdec KEY, STATE
        movaps 0x70(TKEYP), KEY
-       AESDECLAST KEY STATE
+       aesdeclast KEY, STATE
        ret
 SYM_FUNC_END(_aesni_dec1)
 
@@ -2244,79 +2243,79 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
        je .L4dec192
        add $0x20, TKEYP
        movaps -0x60(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps -0x50(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
 .align 4
 .L4dec192:
        movaps -0x40(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps -0x30(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
 .align 4
 .L4dec128:
        movaps -0x20(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps -0x10(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps (TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x10(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x20(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x30(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x40(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x50(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x60(TKEYP), KEY
-       AESDEC KEY STATE1
-       AESDEC KEY STATE2
-       AESDEC KEY STATE3
-       AESDEC KEY STATE4
+       aesdec KEY, STATE1
+       aesdec KEY, STATE2
+       aesdec KEY, STATE3
+       aesdec KEY, STATE4
        movaps 0x70(TKEYP), KEY
-       AESDECLAST KEY STATE1           # last round
-       AESDECLAST KEY STATE2
-       AESDECLAST KEY STATE3
-       AESDECLAST KEY STATE4
+       aesdeclast KEY, STATE1          # last round
+       aesdeclast KEY, STATE2
+       aesdeclast KEY, STATE3
+       aesdeclast KEY, STATE4
        ret
 SYM_FUNC_END(_aesni_dec4)
 
@@ -2599,10 +2598,10 @@ SYM_FUNC_END(aesni_cbc_dec)
 SYM_FUNC_START_LOCAL(_aesni_inc_init)
        movaps .Lbswap_mask, BSWAP_MASK
        movaps IV, CTR
-       PSHUFB_XMM BSWAP_MASK CTR
+       pshufb BSWAP_MASK, CTR
        mov $1, TCTR_LOW
-       MOVQ_R64_XMM TCTR_LOW INC
-       MOVQ_R64_XMM CTR TCTR_LOW
+       movq TCTR_LOW, INC
+       movq CTR, TCTR_LOW
        ret
 SYM_FUNC_END(_aesni_inc_init)
 
@@ -2630,7 +2629,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
        psrldq $8, INC
 .Linc_low:
        movaps CTR, IV
-       PSHUFB_XMM BSWAP_MASK IV
+       pshufb BSWAP_MASK, IV
        ret
 SYM_FUNC_END(_aesni_inc)
 
index 0cea332..5fee479 100644 (file)
 ##
 
 #include <linux/linkage.h>
-#include <asm/inst.h>
 
 # constants in mergeable sections, linker can reorder and merge
 .section       .rodata.cst16.POLY, "aM", @progbits, 16
index a38ab25..ca1788b 100644 (file)
@@ -120,10 +120,10 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
        FRAME_BEGIN
 
        # x0..3 = s0..3
-       movdqa          0x00(%rdi),%xmm0
-       movdqa          0x10(%rdi),%xmm1
-       movdqa          0x20(%rdi),%xmm2
-       movdqa          0x30(%rdi),%xmm3
+       movdqu          0x00(%rdi),%xmm0
+       movdqu          0x10(%rdi),%xmm1
+       movdqu          0x20(%rdi),%xmm2
+       movdqu          0x30(%rdi),%xmm3
        movdqa          %xmm0,%xmm8
        movdqa          %xmm1,%xmm9
        movdqa          %xmm2,%xmm10
@@ -205,10 +205,10 @@ SYM_FUNC_START(hchacha_block_ssse3)
        # %edx: nrounds
        FRAME_BEGIN
 
-       movdqa          0x00(%rdi),%xmm0
-       movdqa          0x10(%rdi),%xmm1
-       movdqa          0x20(%rdi),%xmm2
-       movdqa          0x30(%rdi),%xmm3
+       movdqu          0x00(%rdi),%xmm0
+       movdqu          0x10(%rdi),%xmm1
+       movdqu          0x20(%rdi),%xmm2
+       movdqu          0x30(%rdi),%xmm3
 
        mov             %edx,%r8d
        call            chacha_permute
index 2225009..e67a591 100644 (file)
@@ -14,8 +14,6 @@
 #include <linux/module.h>
 #include <asm/simd.h>
 
-#define CHACHA_STATE_ALIGN 16
-
 asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
                                       unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
@@ -124,8 +122,6 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 
 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
 {
-       state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
-
        if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
                hchacha_block_generic(state, stream, nrounds);
        } else {
@@ -138,8 +134,6 @@ EXPORT_SYMBOL(hchacha_block_arch);
 
 void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
 {
-       state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
-
        chacha_init_generic(state, key, iv);
 }
 EXPORT_SYMBOL(chacha_init_arch);
@@ -147,8 +141,6 @@ EXPORT_SYMBOL(chacha_init_arch);
 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
                       int nrounds)
 {
-       state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
-
        if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
            bytes <= CHACHA_BLOCK_SIZE)
                return chacha_crypt_generic(state, dst, src, bytes, nrounds);
@@ -170,15 +162,12 @@ EXPORT_SYMBOL(chacha_crypt_arch);
 static int chacha_simd_stream_xor(struct skcipher_request *req,
                                  const struct chacha_ctx *ctx, const u8 *iv)
 {
-       u32 *state, state_buf[16 + 2] __aligned(8);
+       u32 state[CHACHA_STATE_WORDS] __aligned(8);
        struct skcipher_walk walk;
        int err;
 
        err = skcipher_walk_virt(&walk, req, false);
 
-       BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
-       state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
-
        chacha_init_generic(state, ctx->key, iv);
 
        while (walk.nbytes > 0) {
@@ -217,12 +206,10 @@ static int xchacha_simd(struct skcipher_request *req)
 {
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-       u32 *state, state_buf[16 + 2] __aligned(8);
+       u32 state[CHACHA_STATE_WORDS] __aligned(8);
        struct chacha_ctx subctx;
        u8 real_iv[16];
 
-       BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
-       state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
        chacha_init_generic(state, ctx->key, req->iv);
 
        if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
index 9fd28ff..6e7d4c4 100644 (file)
@@ -38,7 +38,6 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/inst.h>
 
 
 .section .rodata
@@ -129,17 +128,17 @@ loop_64:/*  64 bytes Full cache line folding */
 #ifdef __x86_64__
        movdqa  %xmm4, %xmm8
 #endif
-       PCLMULQDQ 00, CONSTANT, %xmm1
-       PCLMULQDQ 00, CONSTANT, %xmm2
-       PCLMULQDQ 00, CONSTANT, %xmm3
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm2
+       pclmulqdq $0x00, CONSTANT, %xmm3
 #ifdef __x86_64__
-       PCLMULQDQ 00, CONSTANT, %xmm4
+       pclmulqdq $0x00, CONSTANT, %xmm4
 #endif
-       PCLMULQDQ 0x11, CONSTANT, %xmm5
-       PCLMULQDQ 0x11, CONSTANT, %xmm6
-       PCLMULQDQ 0x11, CONSTANT, %xmm7
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pclmulqdq $0x11, CONSTANT, %xmm6
+       pclmulqdq $0x11, CONSTANT, %xmm7
 #ifdef __x86_64__
-       PCLMULQDQ 0x11, CONSTANT, %xmm8
+       pclmulqdq $0x11, CONSTANT, %xmm8
 #endif
        pxor    %xmm5, %xmm1
        pxor    %xmm6, %xmm2
@@ -149,8 +148,8 @@ loop_64:/*  64 bytes Full cache line folding */
 #else
        /* xmm8 unsupported for x32 */
        movdqa  %xmm4, %xmm5
-       PCLMULQDQ 00, CONSTANT, %xmm4
-       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm4
+       pclmulqdq $0x11, CONSTANT, %xmm5
        pxor    %xmm5, %xmm4
 #endif
 
@@ -172,20 +171,20 @@ less_64:/*  Folding cache line into 128bit */
        prefetchnta     (BUF)
 
        movdqa  %xmm1, %xmm5
-       PCLMULQDQ 0x00, CONSTANT, %xmm1
-       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
        pxor    %xmm5, %xmm1
        pxor    %xmm2, %xmm1
 
        movdqa  %xmm1, %xmm5
-       PCLMULQDQ 0x00, CONSTANT, %xmm1
-       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
        pxor    %xmm5, %xmm1
        pxor    %xmm3, %xmm1
 
        movdqa  %xmm1, %xmm5
-       PCLMULQDQ 0x00, CONSTANT, %xmm1
-       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
        pxor    %xmm5, %xmm1
        pxor    %xmm4, %xmm1
 
@@ -193,8 +192,8 @@ less_64:/*  Folding cache line into 128bit */
        jb      fold_64
 loop_16:/* Folding rest buffer into 128bit */
        movdqa  %xmm1, %xmm5
-       PCLMULQDQ 0x00, CONSTANT, %xmm1
-       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
        pxor    %xmm5, %xmm1
        pxor    (BUF), %xmm1
        sub     $0x10, LEN
@@ -205,7 +204,7 @@ loop_16:/* Folding rest buffer into 128bit */
 fold_64:
        /* perform the last 64 bit fold, also adds 32 zeroes
         * to the input stream */
-       PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
+       pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
        psrldq  $0x08, %xmm1
        pxor    CONSTANT, %xmm1
 
@@ -220,7 +219,7 @@ fold_64:
 #endif
        psrldq  $0x04, %xmm2
        pand    %xmm3, %xmm1
-       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm1
        pxor    %xmm2, %xmm1
 
        /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
@@ -231,11 +230,11 @@ fold_64:
 #endif
        movdqa  %xmm1, %xmm2
        pand    %xmm3, %xmm1
-       PCLMULQDQ 0x10, CONSTANT, %xmm1
+       pclmulqdq $0x10, CONSTANT, %xmm1
        pand    %xmm3, %xmm1
-       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm1
        pxor    %xmm2, %xmm1
-       PEXTRD  0x01, %xmm1, %eax
+       pextrd  $0x01, %xmm1, %eax
 
        ret
 SYM_FUNC_END(crc32_pclmul_le_16)
index 8501ec4..884dc76 100644 (file)
@@ -43,7 +43,6 @@
  * SOFTWARE.
  */
 
-#include <asm/inst.h>
 #include <linux/linkage.h>
 #include <asm/nospec-branch.h>
 
@@ -170,7 +169,7 @@ continue_block:
 
        ## branch into array
        lea     jump_table(%rip), %bufp
-       movzxw  (%bufp, %rax, 2), len
+       movzwq  (%bufp, %rax, 2), len
        lea     crc_array(%rip), %bufp
        lea     (%bufp, len, 1), %bufp
        JMP_NOSPEC bufp
@@ -225,10 +224,10 @@ LABEL crc_ %i
        subq    %rax, tmp                       # tmp -= rax*24
 
        movq    crc_init, %xmm1                 # CRC for block 1
-       PCLMULQDQ 0x00,%xmm0,%xmm1              # Multiply by K2
+       pclmulqdq $0x00, %xmm0, %xmm1           # Multiply by K2
 
        movq    crc1, %xmm2                     # CRC for block 2
-       PCLMULQDQ 0x10, %xmm0, %xmm2            # Multiply by K1
+       pclmulqdq $0x10, %xmm0, %xmm2           # Multiply by K1
 
        pxor    %xmm2,%xmm1
        movq    %xmm1, %rax
index 8a17621..8acbb65 100644 (file)
@@ -948,10 +948,8 @@ static void store_felem(u64 *b, u64 *f)
 {
        u64 f30 = f[3U];
        u64 top_bit0 = f30 >> (u32)63U;
-       u64 carry0;
        u64 f31;
        u64 top_bit;
-       u64 carry;
        u64 f0;
        u64 f1;
        u64 f2;
@@ -970,11 +968,11 @@ static void store_felem(u64 *b, u64 *f)
        u64 o2;
        u64 o3;
        f[3U] = f30 & (u64)0x7fffffffffffffffU;
-       carry0 = add_scalar(f, f, (u64)19U * top_bit0);
+       add_scalar(f, f, (u64)19U * top_bit0);
        f31 = f[3U];
        top_bit = f31 >> (u32)63U;
        f[3U] = f31 & (u64)0x7fffffffffffffffU;
-       carry = add_scalar(f, f, (u64)19U * top_bit);
+       add_scalar(f, f, (u64)19U * top_bit);
        f0 = f[0U];
        f1 = f[1U];
        f2 = f[2U];
index bb9735f..99ac25e 100644 (file)
@@ -14,7 +14,6 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/inst.h>
 #include <asm/frame.h>
 
 .section       .rodata.cst16.bswap_mask, "aM", @progbits, 16
@@ -51,9 +50,9 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
        pxor DATA, T2
        pxor SHASH, T3
 
-       PCLMULQDQ 0x00 SHASH DATA       # DATA = a0 * b0
-       PCLMULQDQ 0x11 SHASH T1         # T1 = a1 * b1
-       PCLMULQDQ 0x00 T3 T2            # T2 = (a1 + a0) * (b1 + b0)
+       pclmulqdq $0x00, SHASH, DATA    # DATA = a0 * b0
+       pclmulqdq $0x11, SHASH, T1      # T1 = a1 * b1
+       pclmulqdq $0x00, T3, T2         # T2 = (a1 + a0) * (b1 + b0)
        pxor DATA, T2
        pxor T1, T2                     # T2 = a0 * b1 + a1 * b0
 
@@ -95,9 +94,9 @@ SYM_FUNC_START(clmul_ghash_mul)
        movups (%rdi), DATA
        movups (%rsi), SHASH
        movaps .Lbswap_mask, BSWAP
-       PSHUFB_XMM BSWAP DATA
+       pshufb BSWAP, DATA
        call __clmul_gf128mul_ble
-       PSHUFB_XMM BSWAP DATA
+       pshufb BSWAP, DATA
        movups DATA, (%rdi)
        FRAME_END
        ret
@@ -114,18 +113,18 @@ SYM_FUNC_START(clmul_ghash_update)
        movaps .Lbswap_mask, BSWAP
        movups (%rdi), DATA
        movups (%rcx), SHASH
-       PSHUFB_XMM BSWAP DATA
+       pshufb BSWAP, DATA
 .align 4
 .Lupdate_loop:
        movups (%rsi), IN1
-       PSHUFB_XMM BSWAP IN1
+       pshufb BSWAP, IN1
        pxor IN1, DATA
        call __clmul_gf128mul_ble
        sub $16, %rdx
        add $16, %rsi
        cmp $16, %rdx
        jge .Lupdate_loop
-       PSHUFB_XMM BSWAP DATA
+       pshufb BSWAP, DATA
        movups DATA, (%rdi)
 .Lupdate_just_ret:
        FRAME_END
index f092884..54ad189 100644 (file)
@@ -559,8 +559,7 @@ SYSCALL_DEFINE0(ni_syscall)
 }
 
 /**
- * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional
- *                          RCU handling
+ * idtentry_enter - Handle state tracking on ordinary idtentries
  * @regs:      Pointer to pt_regs of interrupted context
  *
  * Invokes:
@@ -572,6 +571,9 @@ SYSCALL_DEFINE0(ni_syscall)
  *  - The hardirq tracer to keep the state consistent as low level ASM
  *    entry disabled interrupts.
  *
+ * As a precondition, this requires that the entry came from user mode,
+ * idle, or a kernel context in which RCU is watching.
+ *
  * For kernel mode entries RCU handling is done conditional. If RCU is
  * watching then the only RCU requirement is to check whether the tick has
  * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
@@ -585,18 +587,21 @@ SYSCALL_DEFINE0(ni_syscall)
  * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
  * would not be possible.
  *
- * Returns: True if RCU has been adjusted on a kernel entry
- *         False otherwise
+ * Returns: An opaque object that must be passed to idtentry_exit()
  *
- * The return value must be fed into the rcu_exit argument of
- * idtentry_exit_cond_rcu().
+ * The return value must be fed into the state argument of
+ * idtentry_exit().
  */
-bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
+noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs)
 {
+       idtentry_state_t ret = {
+               .exit_rcu = false,
+       };
+
        if (user_mode(regs)) {
                check_user_regs(regs);
                enter_from_user_mode();
-               return false;
+               return ret;
        }
 
        /*
@@ -634,7 +639,8 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
                trace_hardirqs_off_finish();
                instrumentation_end();
 
-               return true;
+               ret.exit_rcu = true;
+               return ret;
        }
 
        /*
@@ -649,7 +655,7 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
        trace_hardirqs_off();
        instrumentation_end();
 
-       return false;
+       return ret;
 }
 
 static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
@@ -667,10 +673,9 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
 }
 
 /**
- * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU
- *                         handling
+ * idtentry_exit - Handle return from exception that used idtentry_enter()
  * @regs:      Pointer to pt_regs (exception entry regs)
- * @rcu_exit:  Invoke rcu_irq_exit() if true
+ * @state:     Return value from matching call to idtentry_enter()
  *
  * Depending on the return target (kernel/user) this runs the necessary
  * preemption and work checks if possible and reguired and returns to
@@ -679,10 +684,10 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
  * This is the last action before returning to the low level ASM code which
  * just needs to return to the appropriate context.
  *
- * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
- * function must be fed into the @rcu_exit argument.
+ * Counterpart to idtentry_enter(). The return value of the entry
+ * function must be fed into the @state argument.
  */
-void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
+noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
 {
        lockdep_assert_irqs_disabled();
 
@@ -695,7 +700,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
                 * carefully and needs the same ordering of lockdep/tracing
                 * and RCU as the return to user mode path.
                 */
-               if (rcu_exit) {
+               if (state.exit_rcu) {
                        instrumentation_begin();
                        /* Tell the tracer that IRET will enable interrupts */
                        trace_hardirqs_on_prepare();
@@ -714,7 +719,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
                 * IRQ flags state is correct already. Just tell RCU if it
                 * was not watching on entry.
                 */
-               if (rcu_exit)
+               if (state.exit_rcu)
                        rcu_irq_exit();
        }
 }
@@ -726,7 +731,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
  * Invokes enter_from_user_mode() to establish the proper context for
  * NOHZ_FULL. Otherwise scheduling on exit would not be possible.
  */
-void noinstr idtentry_enter_user(struct pt_regs *regs)
+noinstr void idtentry_enter_user(struct pt_regs *regs)
 {
        check_user_regs(regs);
        enter_from_user_mode();
@@ -744,13 +749,47 @@ void noinstr idtentry_enter_user(struct pt_regs *regs)
  *
  * Counterpart to idtentry_enter_user().
  */
-void noinstr idtentry_exit_user(struct pt_regs *regs)
+noinstr void idtentry_exit_user(struct pt_regs *regs)
 {
        lockdep_assert_irqs_disabled();
 
        prepare_exit_to_usermode(regs);
 }
 
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+       bool irq_state = lockdep_hardirqs_enabled();
+
+       __nmi_enter();
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       lockdep_hardirq_enter();
+       rcu_nmi_enter();
+
+       instrumentation_begin();
+       trace_hardirqs_off_finish();
+       ftrace_nmi_enter();
+       instrumentation_end();
+
+       return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+       instrumentation_begin();
+       ftrace_nmi_exit();
+       if (restore) {
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       }
+       instrumentation_end();
+
+       rcu_nmi_exit();
+       lockdep_hardirq_exit();
+       if (restore)
+               lockdep_hardirqs_on(CALLER_ADDR0);
+       __nmi_exit();
+}
+
 #ifdef CONFIG_XEN_PV
 #ifndef CONFIG_PREEMPTION
 /*
@@ -800,9 +839,10 @@ static void __xen_pv_evtchn_do_upcall(void)
 __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
 {
        struct pt_regs *old_regs;
-       bool inhcall, rcu_exit;
+       bool inhcall;
+       idtentry_state_t state;
 
-       rcu_exit = idtentry_enter_cond_rcu(regs);
+       state = idtentry_enter(regs);
        old_regs = set_irq_regs(regs);
 
        instrumentation_begin();
@@ -812,13 +852,13 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
        set_irq_regs(old_regs);
 
        inhcall = get_and_clear_inhcall();
-       if (inhcall && !WARN_ON_ONCE(rcu_exit)) {
+       if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
                instrumentation_begin();
                idtentry_exit_cond_resched(regs, true);
                instrumentation_end();
                restore_inhcall(inhcall);
        } else {
-               idtentry_exit_cond_rcu(regs, rcu_exit);
+               idtentry_exit(regs, state);
        }
 }
 #endif /* CONFIG_XEN_PV */
index 43b09e9..16a2369 100644 (file)
 #include <asm/cpu_device_id.h>
 #include "../perf_event.h"
 
-#define MSR_F15H_CU_PWR_ACCUMULATOR     0xc001007a
-#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
-#define MSR_F15H_PTSC                  0xc0010280
-
 /* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
 #define AMD_POWER_EVENT_MASK           0xFFULL
 
index 4103665..1cbf57d 100644 (file)
@@ -71,10 +71,9 @@ u64 x86_perf_event_update(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        int shift = 64 - x86_pmu.cntval_bits;
        u64 prev_raw_count, new_raw_count;
-       int idx = hwc->idx;
        u64 delta;
 
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+       if (unlikely(!hwc->event_base))
                return 0;
 
        /*
@@ -359,6 +358,7 @@ void x86_release_hardware(void)
        if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
                release_pmc_hardware();
                release_ds_buffers();
+               release_lbr_buffers();
                mutex_unlock(&pmc_reserve_mutex);
        }
 }
@@ -1097,22 +1097,31 @@ static inline void x86_assign_hw_event(struct perf_event *event,
                                struct cpu_hw_events *cpuc, int i)
 {
        struct hw_perf_event *hwc = &event->hw;
+       int idx;
 
-       hwc->idx = cpuc->assign[i];
+       idx = hwc->idx = cpuc->assign[i];
        hwc->last_cpu = smp_processor_id();
        hwc->last_tag = ++cpuc->tags[i];
 
-       if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
+       switch (hwc->idx) {
+       case INTEL_PMC_IDX_FIXED_BTS:
+       case INTEL_PMC_IDX_FIXED_VLBR:
                hwc->config_base = 0;
                hwc->event_base = 0;
-       } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+               break;
+
+       case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
                hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
-               hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
-       } else {
+               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
+                               (idx - INTEL_PMC_IDX_FIXED);
+               hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+               break;
+
+       default:
                hwc->config_base = x86_pmu_config_addr(hwc->idx);
                hwc->event_base  = x86_pmu_event_addr(hwc->idx);
                hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
+               break;
        }
 }
 
@@ -1233,7 +1242,7 @@ int x86_perf_event_set_period(struct perf_event *event)
        s64 period = hwc->sample_period;
        int ret = 0, idx = hwc->idx;
 
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+       if (unlikely(!hwc->event_base))
                return 0;
 
        /*
@@ -2363,7 +2372,6 @@ static struct pmu pmu = {
 
        .event_idx              = x86_pmu_event_idx,
        .sched_task             = x86_pmu_sched_task,
-       .task_ctx_size          = sizeof(struct x86_perf_task_context),
        .swap_task_ctx          = x86_pmu_swap_task_ctx,
        .check_period           = x86_pmu_check_period,
 
index ca35c8b..5096347 100644 (file)
@@ -2136,8 +2136,35 @@ static inline void intel_pmu_ack_status(u64 ack)
        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+static inline bool event_is_checkpointed(struct perf_event *event)
 {
+       return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
+static inline void intel_set_masks(struct perf_event *event, int idx)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (event->attr.exclude_host)
+               __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+       if (event->attr.exclude_guest)
+               __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+       if (event_is_checkpointed(event))
+               __set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
+}
+
+static inline void intel_clear_masks(struct perf_event *event, int idx)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+       __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+       __clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
+}
+
+static void intel_pmu_disable_fixed(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
        u64 ctrl_val, mask;
 
@@ -2148,30 +2175,22 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
        wrmsrl(hwc->config_base, ctrl_val);
 }
 
-static inline bool event_is_checkpointed(struct perf_event *event)
-{
-       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
-}
-
 static void intel_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx = hwc->idx;
 
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+       if (idx < INTEL_PMC_IDX_FIXED) {
+               intel_clear_masks(event, idx);
+               x86_pmu_disable_event(event);
+       } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
+               intel_clear_masks(event, idx);
+               intel_pmu_disable_fixed(event);
+       } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
                intel_pmu_disable_bts();
                intel_pmu_drain_bts_buffer();
-               return;
-       }
-
-       cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
-       cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
-       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
-
-       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
-               intel_pmu_disable_fixed(hwc);
-       else
-               x86_pmu_disable_event(event);
+       } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+               intel_clear_masks(event, idx);
 
        /*
         * Needs to be called after x86_pmu_disable_event,
@@ -2238,33 +2257,23 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
 static void intel_pmu_enable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               if (!__this_cpu_read(cpu_hw_events.enabled))
-                       return;
-
-               intel_pmu_enable_bts(hwc->config);
-               return;
-       }
-
-       if (event->attr.exclude_host)
-               cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
-       if (event->attr.exclude_guest)
-               cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
-
-       if (unlikely(event_is_checkpointed(event)))
-               cpuc->intel_cp_status |= (1ull << hwc->idx);
+       int idx = hwc->idx;
 
        if (unlikely(event->attr.precise_ip))
                intel_pmu_pebs_enable(event);
 
-       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+       if (idx < INTEL_PMC_IDX_FIXED) {
+               intel_set_masks(event, idx);
+               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+       } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
+               intel_set_masks(event, idx);
                intel_pmu_enable_fixed(event);
-               return;
-       }
-
-       __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+       } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
+               if (!__this_cpu_read(cpu_hw_events.enabled))
+                       return;
+               intel_pmu_enable_bts(hwc->config);
+       } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+               intel_set_masks(event, idx);
 }
 
 static void intel_pmu_add_event(struct perf_event *event)
@@ -2614,6 +2623,20 @@ intel_bts_constraints(struct perf_event *event)
        return NULL;
 }
 
+/*
+ * Note: matches a fake event, like Fixed2.
+ */
+static struct event_constraint *
+intel_vlbr_constraints(struct perf_event *event)
+{
+       struct event_constraint *c = &vlbr_constraint;
+
+       if (unlikely(constraint_match(c, event->hw.config)))
+               return c;
+
+       return NULL;
+}
+
 static int intel_alt_er(int idx, u64 config)
 {
        int alt_idx = idx;
@@ -2804,6 +2827,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
        struct event_constraint *c;
 
+       c = intel_vlbr_constraints(event);
+       if (c)
+               return c;
+
        c = intel_bts_constraints(event);
        if (c)
                return c;
@@ -3951,6 +3978,11 @@ static __initconst const struct x86_pmu core_pmu = {
        .cpu_dead               = intel_pmu_cpu_dead,
 
        .check_period           = intel_pmu_check_period,
+
+       .lbr_reset              = intel_pmu_lbr_reset_64,
+       .lbr_read               = intel_pmu_lbr_read_64,
+       .lbr_save               = intel_pmu_lbr_save,
+       .lbr_restore            = intel_pmu_lbr_restore,
 };
 
 static __initconst const struct x86_pmu intel_pmu = {
@@ -3996,6 +4028,11 @@ static __initconst const struct x86_pmu intel_pmu = {
        .check_period           = intel_pmu_check_period,
 
        .aux_output_match       = intel_pmu_aux_output_match,
+
+       .lbr_reset              = intel_pmu_lbr_reset_64,
+       .lbr_read               = intel_pmu_lbr_read_64,
+       .lbr_save               = intel_pmu_lbr_save,
+       .lbr_restore            = intel_pmu_lbr_restore,
 };
 
 static __init void intel_clovertown_quirk(void)
@@ -4622,6 +4659,14 @@ __init int intel_pmu_init(void)
                x86_pmu.intel_cap.capabilities = capabilities;
        }
 
+       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
+               x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
+               x86_pmu.lbr_read = intel_pmu_lbr_read_32;
+       }
+
+       if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
+               intel_pmu_arch_lbr_init();
+
        intel_ds_init();
 
        x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
index dc43cc1..86848c5 100644 (file)
@@ -954,7 +954,7 @@ static void adaptive_pebs_record_size_update(void)
        if (pebs_data_cfg & PEBS_DATACFG_XMMS)
                sz += sizeof(struct pebs_xmm);
        if (pebs_data_cfg & PEBS_DATACFG_LBRS)
-               sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
+               sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
 
        cpuc->pebs_record_size = sz;
 }
@@ -1595,10 +1595,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
        }
 
        if (format_size & PEBS_DATACFG_LBRS) {
-               struct pebs_lbr *lbr = next_record;
+               struct lbr_entry *lbr = next_record;
                int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
                                        & 0xff) + 1;
-               next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
+               next_record = next_record + num_lbr * sizeof(struct lbr_entry);
 
                if (has_branch_stack(event)) {
                        intel_pmu_store_pebs_lbrs(lbr);
index 65113b1..63f58bd 100644 (file)
@@ -8,17 +8,6 @@
 
 #include "../perf_event.h"
 
-enum {
-       LBR_FORMAT_32           = 0x00,
-       LBR_FORMAT_LIP          = 0x01,
-       LBR_FORMAT_EIP          = 0x02,
-       LBR_FORMAT_EIP_FLAGS    = 0x03,
-       LBR_FORMAT_EIP_FLAGS2   = 0x04,
-       LBR_FORMAT_INFO         = 0x05,
-       LBR_FORMAT_TIME         = 0x06,
-       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
-};
-
 static const enum {
        LBR_EIP_FLAGS           = 1,
        LBR_TSX                 = 2,
@@ -143,8 +132,54 @@ enum {
         X86_BR_IRQ             |\
         X86_BR_INT)
 
+/*
+ * Intel LBR_CTL bits
+ *
+ * Hardware branch filter for Arch LBR
+ */
+#define ARCH_LBR_KERNEL_BIT            1  /* capture at ring0 */
+#define ARCH_LBR_USER_BIT              2  /* capture at ring > 0 */
+#define ARCH_LBR_CALL_STACK_BIT                3  /* enable call stack */
+#define ARCH_LBR_JCC_BIT               16 /* capture conditional branches */
+#define ARCH_LBR_REL_JMP_BIT           17 /* capture relative jumps */
+#define ARCH_LBR_IND_JMP_BIT           18 /* capture indirect jumps */
+#define ARCH_LBR_REL_CALL_BIT          19 /* capture relative calls */
+#define ARCH_LBR_IND_CALL_BIT          20 /* capture indirect calls */
+#define ARCH_LBR_RETURN_BIT            21 /* capture near returns */
+#define ARCH_LBR_OTHER_BRANCH_BIT      22 /* capture other branches */
+
+#define ARCH_LBR_KERNEL                        (1ULL << ARCH_LBR_KERNEL_BIT)
+#define ARCH_LBR_USER                  (1ULL << ARCH_LBR_USER_BIT)
+#define ARCH_LBR_CALL_STACK            (1ULL << ARCH_LBR_CALL_STACK_BIT)
+#define ARCH_LBR_JCC                   (1ULL << ARCH_LBR_JCC_BIT)
+#define ARCH_LBR_REL_JMP               (1ULL << ARCH_LBR_REL_JMP_BIT)
+#define ARCH_LBR_IND_JMP               (1ULL << ARCH_LBR_IND_JMP_BIT)
+#define ARCH_LBR_REL_CALL              (1ULL << ARCH_LBR_REL_CALL_BIT)
+#define ARCH_LBR_IND_CALL              (1ULL << ARCH_LBR_IND_CALL_BIT)
+#define ARCH_LBR_RETURN                        (1ULL << ARCH_LBR_RETURN_BIT)
+#define ARCH_LBR_OTHER_BRANCH          (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
+
+#define ARCH_LBR_ANY                    \
+       (ARCH_LBR_JCC                   |\
+        ARCH_LBR_REL_JMP               |\
+        ARCH_LBR_IND_JMP               |\
+        ARCH_LBR_REL_CALL              |\
+        ARCH_LBR_IND_CALL              |\
+        ARCH_LBR_RETURN                |\
+        ARCH_LBR_OTHER_BRANCH)
+
+#define ARCH_LBR_CTL_MASK                      0x7f000e
+
 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
 
+static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
+{
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+               return !!(config & ARCH_LBR_CALL_STACK);
+
+       return !!(config & LBR_CALL_STACK);
+}
+
 /*
  * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  * otherwise it becomes near impossible to get a reliable stack.
@@ -168,33 +203,46 @@ static void __intel_pmu_lbr_enable(bool pmi)
         */
        if (cpuc->lbr_sel)
                lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
-       if (!pmi && cpuc->lbr_sel)
+       if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
                wrmsrl(MSR_LBR_SELECT, lbr_select);
 
        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
        orig_debugctl = debugctl;
-       debugctl |= DEBUGCTLMSR_LBR;
+
+       if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+               debugctl |= DEBUGCTLMSR_LBR;
        /*
         * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
         * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
         * may cause superfluous increase/decrease of LBR_TOS.
         */
-       if (!(lbr_select & LBR_CALL_STACK))
+       if (is_lbr_call_stack_bit_set(lbr_select))
+               debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+       else
                debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+
        if (orig_debugctl != debugctl)
                wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+               wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
 }
 
 static void __intel_pmu_lbr_disable(void)
 {
        u64 debugctl;
 
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+               wrmsrl(MSR_ARCH_LBR_CTL, 0);
+               return;
+       }
+
        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
        debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 }
 
-static void intel_pmu_lbr_reset_32(void)
+void intel_pmu_lbr_reset_32(void)
 {
        int i;
 
@@ -202,7 +250,7 @@ static void intel_pmu_lbr_reset_32(void)
                wrmsrl(x86_pmu.lbr_from + i, 0);
 }
 
-static void intel_pmu_lbr_reset_64(void)
+void intel_pmu_lbr_reset_64(void)
 {
        int i;
 
@@ -210,10 +258,16 @@ static void intel_pmu_lbr_reset_64(void)
                wrmsrl(x86_pmu.lbr_from + i, 0);
                wrmsrl(x86_pmu.lbr_to   + i, 0);
                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + i, 0);
+                       wrmsrl(x86_pmu.lbr_info + i, 0);
        }
 }
 
+static void intel_pmu_arch_lbr_reset(void)
+{
+       /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
+       wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
+}
+
 void intel_pmu_lbr_reset(void)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -221,10 +275,7 @@ void intel_pmu_lbr_reset(void)
        if (!x86_pmu.lbr_nr)
                return;
 
-       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
-               intel_pmu_lbr_reset_32();
-       else
-               intel_pmu_lbr_reset_64();
+       x86_pmu.lbr_reset();
 
        cpuc->last_task_ctx = NULL;
        cpuc->last_log_id = 0;
@@ -308,69 +359,97 @@ static u64 lbr_from_signext_quirk_rd(u64 val)
        return val;
 }
 
-static inline void wrlbr_from(unsigned int idx, u64 val)
+static __always_inline void wrlbr_from(unsigned int idx, u64 val)
 {
        val = lbr_from_signext_quirk_wr(val);
        wrmsrl(x86_pmu.lbr_from + idx, val);
 }
 
-static inline void wrlbr_to(unsigned int idx, u64 val)
+static __always_inline void wrlbr_to(unsigned int idx, u64 val)
 {
        wrmsrl(x86_pmu.lbr_to + idx, val);
 }
 
-static inline u64 rdlbr_from(unsigned int idx)
+static __always_inline void wrlbr_info(unsigned int idx, u64 val)
+{
+       wrmsrl(x86_pmu.lbr_info + idx, val);
+}
+
+static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
 {
        u64 val;
 
+       if (lbr)
+               return lbr->from;
+
        rdmsrl(x86_pmu.lbr_from + idx, val);
 
        return lbr_from_signext_quirk_rd(val);
 }
 
-static inline u64 rdlbr_to(unsigned int idx)
+static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
 {
        u64 val;
 
+       if (lbr)
+               return lbr->to;
+
        rdmsrl(x86_pmu.lbr_to + idx, val);
 
        return val;
 }
 
-static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
+{
+       u64 val;
+
+       if (lbr)
+               return lbr->info;
+
+       rdmsrl(x86_pmu.lbr_info + idx, val);
+
+       return val;
+}
+
+static inline void
+wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
+{
+       wrlbr_from(idx, lbr->from);
+       wrlbr_to(idx, lbr->to);
+       if (need_info)
+               wrlbr_info(idx, lbr->info);
+}
+
+static inline bool
+rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
+{
+       u64 from = rdlbr_from(idx, NULL);
+
+       /* Don't read invalid entry */
+       if (!from)
+               return false;
+
+       lbr->from = from;
+       lbr->to = rdlbr_to(idx, NULL);
+       if (need_info)
+               lbr->info = rdlbr_info(idx, NULL);
+
+       return true;
+}
+
+void intel_pmu_lbr_restore(void *ctx)
 {
+       bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx = ctx;
        int i;
        unsigned lbr_idx, mask;
-       u64 tos;
-
-       if (task_ctx->lbr_callstack_users == 0 ||
-           task_ctx->lbr_stack_state == LBR_NONE) {
-               intel_pmu_lbr_reset();
-               return;
-       }
-
-       tos = task_ctx->tos;
-       /*
-        * Does not restore the LBR registers, if
-        * - No one else touched them, and
-        * - Did not enter C6
-        */
-       if ((task_ctx == cpuc->last_task_ctx) &&
-           (task_ctx->log_id == cpuc->last_log_id) &&
-           rdlbr_from(tos)) {
-               task_ctx->lbr_stack_state = LBR_NONE;
-               return;
-       }
+       u64 tos = task_ctx->tos;
 
        mask = x86_pmu.lbr_nr - 1;
        for (i = 0; i < task_ctx->valid_lbrs; i++) {
                lbr_idx = (tos - i) & mask;
-               wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
-               wrlbr_to  (lbr_idx, task_ctx->lbr_to[i]);
-
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+               wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
        }
 
        for (; i < x86_pmu.lbr_nr; i++) {
@@ -378,49 +457,149 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
                wrlbr_from(lbr_idx, 0);
                wrlbr_to(lbr_idx, 0);
                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
+                       wrlbr_info(lbr_idx, 0);
        }
 
        wrmsrl(x86_pmu.lbr_tos, tos);
-       task_ctx->lbr_stack_state = LBR_NONE;
+
+       if (cpuc->lbr_select)
+               wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
 }
 
-static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+static void intel_pmu_arch_lbr_restore(void *ctx)
 {
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       unsigned lbr_idx, mask;
-       u64 tos, from;
+       struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
+       struct lbr_entry *entries = task_ctx->entries;
        int i;
 
-       if (task_ctx->lbr_callstack_users == 0) {
-               task_ctx->lbr_stack_state = LBR_NONE;
+       /* Fast reset the LBRs before restore if the call stack is not full. */
+       if (!entries[x86_pmu.lbr_nr - 1].from)
+               intel_pmu_arch_lbr_reset();
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               if (!entries[i].from)
+                       break;
+               wrlbr_all(&entries[i], i, true);
+       }
+}
+
+/*
+ * Restore the Architecture LBR state from the xsave area in the perf
+ * context data for the task via the XRSTORS instruction.
+ */
+static void intel_pmu_arch_lbr_xrstors(void *ctx)
+{
+       struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
+
+       copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
+}
+
+static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
+{
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+               return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
+
+       return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
+}
+
+static void __intel_pmu_lbr_restore(void *ctx)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
+           task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
+               intel_pmu_lbr_reset();
+               return;
+       }
+
+       /*
+        * Does not restore the LBR registers, if
+        * - No one else touched them, and
+        * - Was not cleared in Cstate
+        */
+       if ((ctx == cpuc->last_task_ctx) &&
+           (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
+           !lbr_is_reset_in_cstate(ctx)) {
+               task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
                return;
        }
 
+       x86_pmu.lbr_restore(ctx);
+
+       task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
+}
+
+void intel_pmu_lbr_save(void *ctx)
+{
+       bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx = ctx;
+       unsigned lbr_idx, mask;
+       u64 tos;
+       int i;
+
        mask = x86_pmu.lbr_nr - 1;
        tos = intel_pmu_lbr_tos();
        for (i = 0; i < x86_pmu.lbr_nr; i++) {
                lbr_idx = (tos - i) & mask;
-               from = rdlbr_from(lbr_idx);
-               if (!from)
+               if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
                        break;
-               task_ctx->lbr_from[i] = from;
-               task_ctx->lbr_to[i]   = rdlbr_to(lbr_idx);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
        }
        task_ctx->valid_lbrs = i;
        task_ctx->tos = tos;
-       task_ctx->lbr_stack_state = LBR_VALID;
 
-       cpuc->last_task_ctx = task_ctx;
-       cpuc->last_log_id = ++task_ctx->log_id;
+       if (cpuc->lbr_select)
+               rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
+}
+
+static void intel_pmu_arch_lbr_save(void *ctx)
+{
+       struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
+       struct lbr_entry *entries = task_ctx->entries;
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               if (!rdlbr_all(&entries[i], i, true))
+                       break;
+       }
+
+       /* LBR call stack is not full. Reset is required in restore. */
+       if (i < x86_pmu.lbr_nr)
+               entries[x86_pmu.lbr_nr - 1].from = 0;
+}
+
+/*
+ * Save the Architecture LBR state to the xsave area in the perf
+ * context data for the task via the XSAVES instruction.
+ */
+static void intel_pmu_arch_lbr_xsaves(void *ctx)
+{
+       struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
+
+       copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
+}
+
+static void __intel_pmu_lbr_save(void *ctx)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (task_context_opt(ctx)->lbr_callstack_users == 0) {
+               task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
+               return;
+       }
+
+       x86_pmu.lbr_save(ctx);
+
+       task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
+
+       cpuc->last_task_ctx = ctx;
+       cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
 }
 
 void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
                                 struct perf_event_context *next)
 {
-       struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
+       void *prev_ctx_data, *next_ctx_data;
 
        swap(prev->task_ctx_data, next->task_ctx_data);
 
@@ -436,14 +615,14 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
        if (!prev_ctx_data || !next_ctx_data)
                return;
 
-       swap(prev_ctx_data->lbr_callstack_users,
-            next_ctx_data->lbr_callstack_users);
+       swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
+            task_context_opt(next_ctx_data)->lbr_callstack_users);
 }
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
+       void *task_ctx;
 
        if (!cpuc->lbr_users)
                return;
@@ -479,18 +658,19 @@ static inline bool branch_user_callstack(unsigned br_sel)
 
 void intel_pmu_lbr_add(struct perf_event *event)
 {
+       struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache;
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
 
        if (!x86_pmu.lbr_nr)
                return;
 
+       if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
+               cpuc->lbr_select = 1;
+
        cpuc->br_sel = event->hw.branch_reg.reg;
 
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
-               task_ctx = event->ctx->task_ctx_data;
-               task_ctx->lbr_callstack_users++;
-       }
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
+               task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
 
        /*
         * Request pmu::sched_task() callback, which will fire inside the
@@ -516,21 +696,44 @@ void intel_pmu_lbr_add(struct perf_event *event)
        perf_sched_cb_inc(event->ctx->pmu);
        if (!cpuc->lbr_users++ && !event->total_time_running)
                intel_pmu_lbr_reset();
+
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
+           kmem_cache && !cpuc->lbr_xsave &&
+           (cpuc->lbr_users != cpuc->lbr_pebs_users))
+               cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL);
+}
+
+void release_lbr_buffers(void)
+{
+       struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache;
+       struct cpu_hw_events *cpuc;
+       int cpu;
+
+       if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+               return;
+
+       for_each_possible_cpu(cpu) {
+               cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+               if (kmem_cache && cpuc->lbr_xsave) {
+                       kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
+                       cpuc->lbr_xsave = NULL;
+               }
+       }
 }
 
 void intel_pmu_lbr_del(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
 
        if (!x86_pmu.lbr_nr)
                return;
 
        if (branch_user_callstack(cpuc->br_sel) &&
-           event->ctx->task_ctx_data) {
-               task_ctx = event->ctx->task_ctx_data;
-               task_ctx->lbr_callstack_users--;
-       }
+           event->ctx->task_ctx_data)
+               task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
+
+       if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
+               cpuc->lbr_select = 0;
 
        if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
                cpuc->lbr_pebs_users--;
@@ -540,11 +743,19 @@ void intel_pmu_lbr_del(struct perf_event *event)
        perf_sched_cb_dec(event->ctx->pmu);
 }
 
+static inline bool vlbr_exclude_host(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
+               (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+}
+
 void intel_pmu_lbr_enable_all(bool pmi)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
-       if (cpuc->lbr_users)
+       if (cpuc->lbr_users && !vlbr_exclude_host())
                __intel_pmu_lbr_enable(pmi);
 }
 
@@ -552,11 +763,11 @@ void intel_pmu_lbr_disable_all(void)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
-       if (cpuc->lbr_users)
+       if (cpuc->lbr_users && !vlbr_exclude_host())
                __intel_pmu_lbr_disable();
 }
 
-static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 {
        unsigned long mask = x86_pmu.lbr_nr - 1;
        u64 tos = intel_pmu_lbr_tos();
@@ -593,7 +804,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  * is the same as the linear address, allowing us to merge the LIP and EIP
  * LBR formats.
  */
-static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 {
        bool need_info = false, call_stack = false;
        unsigned long mask = x86_pmu.lbr_nr - 1;
@@ -616,8 +827,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
                u16 cycles = 0;
                int lbr_flags = lbr_desc[lbr_format];
 
-               from = rdlbr_from(lbr_idx);
-               to   = rdlbr_to(lbr_idx);
+               from = rdlbr_from(lbr_idx, NULL);
+               to   = rdlbr_to(lbr_idx, NULL);
 
                /*
                 * Read LBR call stack entries
@@ -629,7 +840,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
                if (lbr_format == LBR_FORMAT_INFO && need_info) {
                        u64 info;
 
-                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
+                       info = rdlbr_info(lbr_idx, NULL);
                        mis = !!(info & LBR_INFO_MISPRED);
                        pred = !mis;
                        in_tx = !!(info & LBR_INFO_IN_TX);
@@ -684,6 +895,93 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
        cpuc->lbr_stack.hw_idx = tos;
 }
 
+static __always_inline int get_lbr_br_type(u64 info)
+{
+       if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
+               return 0;
+
+       return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+}
+
+static __always_inline bool get_lbr_mispred(u64 info)
+{
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
+               return 0;
+
+       return !!(info & LBR_INFO_MISPRED);
+}
+
+static __always_inline bool get_lbr_predicted(u64 info)
+{
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
+               return 0;
+
+       return !(info & LBR_INFO_MISPRED);
+}
+
+static __always_inline bool get_lbr_cycles(u64 info)
+{
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
+           !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
+               return 0;
+
+       return info & LBR_INFO_CYCLES;
+}
+
+static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
+                               struct lbr_entry *entries)
+{
+       struct perf_branch_entry *e;
+       struct lbr_entry *lbr;
+       u64 from, to, info;
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               lbr = entries ? &entries[i] : NULL;
+               e = &cpuc->lbr_entries[i];
+
+               from = rdlbr_from(i, lbr);
+               /*
+                * Read LBR entries until invalid entry (0s) is detected.
+                */
+               if (!from)
+                       break;
+
+               to = rdlbr_to(i, lbr);
+               info = rdlbr_info(i, lbr);
+
+               e->from         = from;
+               e->to           = to;
+               e->mispred      = get_lbr_mispred(info);
+               e->predicted    = get_lbr_predicted(info);
+               e->in_tx        = !!(info & LBR_INFO_IN_TX);
+               e->abort        = !!(info & LBR_INFO_ABORT);
+               e->cycles       = get_lbr_cycles(info);
+               e->type         = get_lbr_br_type(info);
+               e->reserved     = 0;
+       }
+
+       cpuc->lbr_stack.nr = i;
+}
+
+static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
+{
+       intel_pmu_store_lbr(cpuc, NULL);
+}
+
+static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
+{
+       struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
+
+       if (!xsave) {
+               intel_pmu_store_lbr(cpuc, NULL);
+               return;
+       }
+       copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR);
+
+       intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
+}
+
 void intel_pmu_lbr_read(void)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -694,13 +992,11 @@ void intel_pmu_lbr_read(void)
         * This could be smarter and actually check the event,
         * but this simple approach seems to work for now.
         */
-       if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users)
+       if (!cpuc->lbr_users || vlbr_exclude_host() ||
+           cpuc->lbr_users == cpuc->lbr_pebs_users)
                return;
 
-       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
-               intel_pmu_lbr_read_32(cpuc);
-       else
-               intel_pmu_lbr_read_64(cpuc);
+       x86_pmu.lbr_read(cpuc);
 
        intel_pmu_lbr_filter(cpuc);
 }
@@ -800,6 +1096,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
        reg = &event->hw.branch_reg;
        reg->idx = EXTRA_REG_LBR;
 
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+               reg->config = mask;
+               return 0;
+       }
+
        /*
         * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
         * in suppress mode. So LBR_SELECT should be set to
@@ -1056,6 +1357,27 @@ common_branch_type(int type)
        return PERF_BR_UNKNOWN;
 }
 
+enum {
+       ARCH_LBR_BR_TYPE_JCC                    = 0,
+       ARCH_LBR_BR_TYPE_NEAR_IND_JMP           = 1,
+       ARCH_LBR_BR_TYPE_NEAR_REL_JMP           = 2,
+       ARCH_LBR_BR_TYPE_NEAR_IND_CALL          = 3,
+       ARCH_LBR_BR_TYPE_NEAR_REL_CALL          = 4,
+       ARCH_LBR_BR_TYPE_NEAR_RET               = 5,
+       ARCH_LBR_BR_TYPE_KNOWN_MAX              = ARCH_LBR_BR_TYPE_NEAR_RET,
+
+       ARCH_LBR_BR_TYPE_MAP_MAX                = 16,
+};
+
+static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
+       [ARCH_LBR_BR_TYPE_JCC]                  = X86_BR_JCC,
+       [ARCH_LBR_BR_TYPE_NEAR_IND_JMP]         = X86_BR_IND_JMP,
+       [ARCH_LBR_BR_TYPE_NEAR_REL_JMP]         = X86_BR_JMP,
+       [ARCH_LBR_BR_TYPE_NEAR_IND_CALL]        = X86_BR_IND_CALL,
+       [ARCH_LBR_BR_TYPE_NEAR_REL_CALL]        = X86_BR_CALL,
+       [ARCH_LBR_BR_TYPE_NEAR_RET]             = X86_BR_RET,
+};
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -1068,7 +1390,7 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 {
        u64 from, to;
        int br_sel = cpuc->br_sel;
-       int i, j, type;
+       int i, j, type, to_plm;
        bool compress = false;
 
        /* if sampling all branches, then nothing to filter */
@@ -1080,8 +1402,19 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 
                from = cpuc->lbr_entries[i].from;
                to = cpuc->lbr_entries[i].to;
+               type = cpuc->lbr_entries[i].type;
 
-               type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+               /*
+                * Parse the branch type recorded in LBR_x_INFO MSR.
+                * Doesn't support OTHER_BRANCH decoding for now.
+                * OTHER_BRANCH branch type still rely on software decoding.
+                */
+               if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
+                   type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
+                       to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+                       type = arch_lbr_br_type_map[type] | to_plm;
+               } else
+                       type = branch_type(from, to, cpuc->lbr_entries[i].abort);
                if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
                        if (cpuc->lbr_entries[i].in_tx)
                                type |= X86_BR_IN_TX;
@@ -1116,32 +1449,18 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
        }
 }
 
-void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
+void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int i;
-
-       cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
 
-       /* Cannot get TOS for large PEBS */
-       if (cpuc->n_pebs == cpuc->n_large_pebs)
+       /* Cannot get TOS for large PEBS and Arch LBR */
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
+           (cpuc->n_pebs == cpuc->n_large_pebs))
                cpuc->lbr_stack.hw_idx = -1ULL;
        else
                cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
 
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               u64 info = lbr->lbr[i].info;
-               struct perf_branch_entry *e = &cpuc->lbr_entries[i];
-
-               e->from         = lbr->lbr[i].from;
-               e->to           = lbr->lbr[i].to;
-               e->mispred      = !!(info & LBR_INFO_MISPRED);
-               e->predicted    = !(info & LBR_INFO_MISPRED);
-               e->in_tx        = !!(info & LBR_INFO_IN_TX);
-               e->abort        = !!(info & LBR_INFO_ABORT);
-               e->cycles       = info & LBR_INFO_CYCLES;
-               e->reserved     = 0;
-       }
+       intel_pmu_store_lbr(cpuc, lbr);
        intel_pmu_lbr_filter(cpuc);
 }
 
@@ -1198,6 +1517,26 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
        [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
 };
 
+static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = ARCH_LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = ARCH_LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = ARCH_LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = ARCH_LBR_RETURN |
+                                                 ARCH_LBR_OTHER_BRANCH,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = ARCH_LBR_REL_CALL |
+                                                 ARCH_LBR_IND_CALL |
+                                                 ARCH_LBR_OTHER_BRANCH,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = ARCH_LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = ARCH_LBR_JCC,
+       [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = ARCH_LBR_REL_CALL |
+                                                 ARCH_LBR_IND_CALL |
+                                                 ARCH_LBR_RETURN |
+                                                 ARCH_LBR_CALL_STACK,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = ARCH_LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = ARCH_LBR_REL_CALL,
+};
+
 /* core */
 void __init intel_pmu_lbr_init_core(void)
 {
@@ -1251,9 +1590,17 @@ void __init intel_pmu_lbr_init_snb(void)
         */
 }
 
+static inline struct kmem_cache *
+create_lbr_kmem_cache(size_t size, size_t align)
+{
+       return kmem_cache_create("x86_lbr", size, align, 0, NULL);
+}
+
 /* haswell */
 void intel_pmu_lbr_init_hsw(void)
 {
+       size_t size = sizeof(struct x86_perf_task_context);
+
        x86_pmu.lbr_nr   = 16;
        x86_pmu.lbr_tos  = MSR_LBR_TOS;
        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
@@ -1262,6 +1609,8 @@ void intel_pmu_lbr_init_hsw(void)
        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
 
+       x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+
        if (lbr_from_signext_quirk_needed())
                static_branch_enable(&lbr_from_quirk_key);
 }
@@ -1269,14 +1618,19 @@ void intel_pmu_lbr_init_hsw(void)
 /* skylake */
 __init void intel_pmu_lbr_init_skl(void)
 {
+       size_t size = sizeof(struct x86_perf_task_context);
+
        x86_pmu.lbr_nr   = 32;
        x86_pmu.lbr_tos  = MSR_LBR_TOS;
        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+       x86_pmu.lbr_info = MSR_LBR_INFO_0;
 
        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
 
+       x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+
        /*
         * SW branch filter usage:
         * - support syscall, sysret capture.
@@ -1343,3 +1697,152 @@ void intel_pmu_lbr_init_knl(void)
        if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
                x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
 }
+
+/*
+ * LBR state size is variable based on the max number of registers.
+ * This calculates the expected state size, which should match
+ * what the hardware enumerates for the size of XFEATURE_LBR.
+ */
+static inline unsigned int get_lbr_state_size(void)
+{
+       return sizeof(struct arch_lbr_state) +
+              x86_pmu.lbr_nr * sizeof(struct lbr_entry);
+}
+
+static bool is_arch_lbr_xsave_available(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_XSAVES))
+               return false;
+
+       /*
+        * Check the LBR state with the corresponding software structure.
+        * Disable LBR XSAVES support if the size doesn't match.
+        */
+       if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
+               return false;
+
+       return true;
+}
+
+void __init intel_pmu_arch_lbr_init(void)
+{
+       struct pmu *pmu = x86_get_pmu();
+       union cpuid28_eax eax;
+       union cpuid28_ebx ebx;
+       union cpuid28_ecx ecx;
+       unsigned int unused_edx;
+       bool arch_lbr_xsave;
+       size_t size;
+       u64 lbr_nr;
+
+       /* Arch LBR Capabilities */
+       cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
+
+       lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
+       if (!lbr_nr)
+               goto clear_arch_lbr;
+
+       /* Apply the max depth of Arch LBR */
+       if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
+               goto clear_arch_lbr;
+
+       x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
+       x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
+       x86_pmu.lbr_lip = eax.split.lbr_lip;
+       x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
+       x86_pmu.lbr_filter = ebx.split.lbr_filter;
+       x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
+       x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
+       x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
+       x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
+       x86_pmu.lbr_nr = lbr_nr;
+
+
+       arch_lbr_xsave = is_arch_lbr_xsave_available();
+       if (arch_lbr_xsave) {
+               size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
+                      get_lbr_state_size();
+               pmu->task_ctx_cache = create_lbr_kmem_cache(size,
+                                                           XSAVE_ALIGNMENT);
+       }
+
+       if (!pmu->task_ctx_cache) {
+               arch_lbr_xsave = false;
+
+               size = sizeof(struct x86_perf_task_context_arch_lbr) +
+                      lbr_nr * sizeof(struct lbr_entry);
+               pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+       }
+
+       x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
+       x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
+       x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
+
+       /* LBR callstack requires both CPL and Branch Filtering support */
+       if (!x86_pmu.lbr_cpl ||
+           !x86_pmu.lbr_filter ||
+           !x86_pmu.lbr_call_stack)
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
+
+       if (!x86_pmu.lbr_cpl) {
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
+       } else if (!x86_pmu.lbr_filter) {
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
+       }
+
+       x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
+       x86_pmu.lbr_ctl_map  = arch_lbr_ctl_map;
+
+       if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
+               x86_pmu.lbr_ctl_map = NULL;
+
+       x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
+       if (arch_lbr_xsave) {
+               x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
+               x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
+               x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
+               pr_cont("XSAVE ");
+       } else {
+               x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
+               x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
+               x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
+       }
+
+       pr_cont("Architectural LBR, ");
+
+       return;
+
+clear_arch_lbr:
+       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
+}
+
+/**
+ * x86_perf_get_lbr - get the LBR records information
+ *
+ * @lbr: the caller's memory to store the LBR records information
+ *
+ * Returns: 0 indicates the LBR info has been successfully obtained
+ */
+int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
+{
+       int lbr_fmt = x86_pmu.intel_cap.lbr_format;
+
+       lbr->nr = x86_pmu.lbr_nr;
+       lbr->from = x86_pmu.lbr_from;
+       lbr->to = x86_pmu.lbr_to;
+       lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
+
+struct event_constraint vlbr_constraint =
+       __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
+                         FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);
index cf76d66..d5c6d3b 100644 (file)
@@ -16,7 +16,7 @@ struct pci_driver *uncore_pci_driver;
 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
 struct pci_extra_dev *uncore_extra_pci_dev;
-static int max_dies;
+int __uncore_max_dies;
 
 /* mask of cpus that collect uncore events */
 static cpumask_t uncore_cpu_mask;
@@ -108,7 +108,7 @@ struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu
         * The unsigned check also catches the '-1' return value for non
         * existent mappings in the topology map.
         */
-       return dieid < max_dies ? pmu->boxes[dieid] : NULL;
+       return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
 }
 
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -132,6 +132,9 @@ u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
        if (!box->io_addr)
                return 0;
 
+       if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
+               return 0;
+
        return readq(box->io_addr + event->hw.event_base);
 }
 
@@ -843,10 +846,12 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
                        .read           = uncore_pmu_event_read,
                        .module         = THIS_MODULE,
                        .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
+                       .attr_update    = pmu->type->attr_update,
                };
        } else {
                pmu->pmu = *pmu->type->pmu;
                pmu->pmu.attr_groups = pmu->type->attr_groups;
+               pmu->pmu.attr_update = pmu->type->attr_update;
        }
 
        if (pmu->type->num_boxes == 1) {
@@ -877,7 +882,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 {
        int die;
 
-       for (die = 0; die < max_dies; die++)
+       for (die = 0; die < uncore_max_dies(); die++)
                kfree(pmu->boxes[die]);
        kfree(pmu->boxes);
 }
@@ -887,6 +892,9 @@ static void uncore_type_exit(struct intel_uncore_type *type)
        struct intel_uncore_pmu *pmu = type->pmus;
        int i;
 
+       if (type->cleanup_mapping)
+               type->cleanup_mapping(type);
+
        if (pmu) {
                for (i = 0; i < type->num_boxes; i++, pmu++) {
                        uncore_pmu_unregister(pmu);
@@ -915,7 +923,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
        if (!pmus)
                return -ENOMEM;
 
-       size = max_dies * sizeof(struct intel_uncore_box *);
+       size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
 
        for (i = 0; i < type->num_boxes; i++) {
                pmus[i].func_id = setid ? i : -1;
@@ -954,6 +962,9 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 
        type->pmu_group = &uncore_pmu_attr_group;
 
+       if (type->set_mapping)
+               type->set_mapping(type);
+
        return 0;
 
 err:
@@ -1112,7 +1123,7 @@ static int __init uncore_pci_init(void)
        size_t size;
        int ret;
 
-       size = max_dies * sizeof(struct pci_extra_dev);
+       size = uncore_max_dies() * sizeof(struct pci_extra_dev);
        uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
        if (!uncore_extra_pci_dev) {
                ret = -ENOMEM;
@@ -1514,6 +1525,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
+       X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
+       X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
@@ -1539,7 +1552,8 @@ static int __init intel_uncore_init(void)
        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
                return -ENODEV;
 
-       max_dies = topology_max_packages() * topology_max_die_per_package();
+       __uncore_max_dies =
+               topology_max_packages() * topology_max_die_per_package();
 
        uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
        if (uncore_init->pci_init) {
index b469ddd..105fdc6 100644 (file)
@@ -61,6 +61,7 @@ struct intel_uncore_type {
                unsigned msr_offset;
                unsigned mmio_offset;
        };
+       unsigned mmio_map_size;
        unsigned num_shared_regs:8;
        unsigned single_fixed:1;
        unsigned pair_ctr_ctl:1;
@@ -72,7 +73,19 @@ struct intel_uncore_type {
        struct uncore_event_desc *event_descs;
        struct freerunning_counters *freerunning;
        const struct attribute_group *attr_groups[4];
+       const struct attribute_group **attr_update;
        struct pmu *pmu; /* for custom pmu ops */
+       /*
+        * Uncore PMU would store relevant platform topology configuration here
+        * to identify which platform component each PMON block of that type is
+        * supposed to monitor.
+        */
+       u64 *topology;
+       /*
+        * Optional callbacks for managing mapping of Uncore units to PMONs
+        */
+       int (*set_mapping)(struct intel_uncore_type *type);
+       void (*cleanup_mapping)(struct intel_uncore_type *type);
 };
 
 #define pmu_group attr_groups[0]
@@ -169,6 +182,18 @@ int uncore_pcibus_to_physid(struct pci_bus *bus);
 ssize_t uncore_event_show(struct kobject *kobj,
                          struct kobj_attribute *attr, char *buf);
 
+static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev)
+{
+       return container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu);
+}
+
+#define to_device_attribute(n) container_of(n, struct device_attribute, attr)
+#define to_dev_ext_attribute(n)        container_of(n, struct dev_ext_attribute, attr)
+#define attr_to_ext_attr(n)    to_dev_ext_attribute(to_device_attribute(n))
+
+extern int __uncore_max_dies;
+#define uncore_max_dies()      (__uncore_max_dies)
+
 #define INTEL_UNCORE_EVENT_DESC(_name, _config)                        \
 {                                                              \
        .attr   = __ATTR(_name, 0444, uncore_event_show, NULL), \
@@ -196,6 +221,18 @@ static inline bool uncore_pmc_freerunning(int idx)
        return idx == UNCORE_PMC_IDX_FREERUNNING;
 }
 
+static inline bool uncore_mmio_is_valid_offset(struct intel_uncore_box *box,
+                                              unsigned long offset)
+{
+       if (offset < box->pmu->type->mmio_map_size)
+               return true;
+
+       pr_warn_once("perf uncore: Invalid offset 0x%lx exceeds mapped area of %s.\n",
+                    offset, box->pmu->type->name);
+
+       return false;
+}
+
 static inline
 unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box)
 {
index 3de1065..cb94ba8 100644 (file)
 #define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC         0x3ed0
 #define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC       0x3e34
 #define PCI_DEVICE_ID_INTEL_WHL_UD_IMC         0x3e35
+#define PCI_DEVICE_ID_INTEL_CML_H1_IMC         0x9b44
+#define PCI_DEVICE_ID_INTEL_CML_H2_IMC         0x9b54
+#define PCI_DEVICE_ID_INTEL_CML_H3_IMC         0x9b64
+#define PCI_DEVICE_ID_INTEL_CML_U1_IMC         0x9b51
+#define PCI_DEVICE_ID_INTEL_CML_U2_IMC         0x9b61
+#define PCI_DEVICE_ID_INTEL_CML_U3_IMC         0x9b71
+#define PCI_DEVICE_ID_INTEL_CML_S1_IMC         0x9b33
+#define PCI_DEVICE_ID_INTEL_CML_S2_IMC         0x9b43
+#define PCI_DEVICE_ID_INTEL_CML_S3_IMC         0x9b53
+#define PCI_DEVICE_ID_INTEL_CML_S4_IMC         0x9b63
+#define PCI_DEVICE_ID_INTEL_CML_S5_IMC         0x9b73
 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC          0x8a02
 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC         0x8a12
 #define PCI_DEVICE_ID_INTEL_TGL_U1_IMC         0x9a02
@@ -415,6 +426,7 @@ static const struct attribute_group snb_uncore_imc_format_group = {
 
 static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
 {
+       struct intel_uncore_type *type = box->pmu->type;
        struct pci_dev *pdev = box->pci_dev;
        int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
        resource_size_t addr;
@@ -430,7 +442,10 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
 
        addr &= ~(PAGE_SIZE - 1);
 
-       box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+       box->io_addr = ioremap(addr, type->mmio_map_size);
+       if (!box->io_addr)
+               pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
+
        box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
 }
 
@@ -586,6 +601,7 @@ static struct intel_uncore_type snb_uncore_imc = {
        .num_counters   = 2,
        .num_boxes      = 1,
        .num_freerunning_types  = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+       .mmio_map_size  = SNB_UNCORE_PCI_IMC_MAP_SIZE,
        .freerunning    = snb_uncore_imc_freerunning,
        .event_descs    = snb_uncore_imc_events,
        .format_group   = &snb_uncore_imc_format_group,
@@ -771,6 +787,50 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
                .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
        },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H1_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H2_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H3_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U1_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U2_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U3_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S1_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S2_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S3_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S4_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S5_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
        { /* end: all zeroes */ },
 };
 
@@ -863,6 +923,17 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver),    /* 8th Gen Core U Mobile Quad Core */
        IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core U Mobile Quad Core */
        IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver),    /* 8th Gen Core U Mobile Dual Core */
+       IMC_DEV(CML_H1_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_H2_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_H3_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_U1_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_U2_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_U3_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_S1_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_S2_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_S3_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_S4_IMC, &skl_uncore_pci_driver),
+       IMC_DEV(CML_S5_IMC, &skl_uncore_pci_driver),
        IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver),     /* 10th Gen Core Mobile */
        IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver),    /* 10th Gen Core Mobile */
        {  /* end marker */ }
@@ -1085,11 +1156,13 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
 }
 
 #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET         0x10000
+#define TGL_UNCORE_PCI_IMC_MAP_SIZE            0xe000
 
 static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
 {
        struct pci_dev *pdev = tgl_uncore_get_mc_dev();
        struct intel_uncore_pmu *pmu = box->pmu;
+       struct intel_uncore_type *type = pmu->type;
        resource_size_t addr;
        u32 mch_bar;
 
@@ -1112,7 +1185,9 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
        addr |= ((resource_size_t)mch_bar << 32);
 #endif
 
-       box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+       box->io_addr = ioremap(addr, type->mmio_map_size);
+       if (!box->io_addr)
+               pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
 }
 
 static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
@@ -1138,6 +1213,7 @@ static struct intel_uncore_type tgl_uncore_imc_free_running = {
        .num_counters           = 3,
        .num_boxes              = 2,
        .num_freerunning_types  = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+       .mmio_map_size          = TGL_UNCORE_PCI_IMC_MAP_SIZE,
        .freerunning            = tgl_uncore_imc_freerunning,
        .ops                    = &tgl_uncore_imc_freerunning_ops,
        .event_descs            = tgl_uncore_imc_events,
index 07652fa..62e88ad 100644 (file)
 #define SKX_CPUNODEID                  0xc0
 #define SKX_GIDNIDMAP                  0xd4
 
+/*
+ * The CPU_BUS_NUMBER MSR returns the values of the respective CPUBUSNO CSR
+ * that BIOS programmed. MSR has package scope.
+ * |  Bit  |  Default  |  Description
+ * | [63]  |    00h    | VALID - When set, indicates the CPU bus
+ *                       numbers have been initialized. (RO)
+ * |[62:48]|    ---    | Reserved
+ * |[47:40]|    00h    | BUS_NUM_5 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(5). (RO)
+ * |[39:32]|    00h    | BUS_NUM_4 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(4). (RO)
+ * |[31:24]|    00h    | BUS_NUM_3 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(3). (RO)
+ * |[23:16]|    00h    | BUS_NUM_2 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(2). (RO)
+ * |[15:8] |    00h    | BUS_NUM_1 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(1). (RO)
+ * | [7:0] |    00h    | BUS_NUM_0 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(0). (RO)
+ */
+#define SKX_MSR_CPU_BUS_NUMBER         0x300
+#define SKX_MSR_CPU_BUS_VALID_BIT      (1ULL << 63)
+#define BUS_NUM_STRIDE                 8
+
 /* SKX CHA */
 #define SKX_CHA_MSR_PMON_BOX_FILTER_TID                (0x1ffULL << 0)
 #define SKX_CHA_MSR_PMON_BOX_FILTER_LINK       (0xfULL << 9)
@@ -3612,6 +3636,170 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
        .read_counter           = uncore_msr_read_counter,
 };
 
+static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die)
+{
+       return pmu->type->topology[die] >> (pmu->pmu_idx * BUS_NUM_STRIDE);
+}
+
+static umode_t
+skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+       struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
+
+       /* Root bus 0x00 is valid only for die 0 AND pmu_idx = 0. */
+       return (!skx_iio_stack(pmu, die) && pmu->pmu_idx) ? 0 : attr->mode;
+}
+
+static ssize_t skx_iio_mapping_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct pci_bus *bus = pci_find_next_bus(NULL);
+       struct intel_uncore_pmu *uncore_pmu = dev_to_uncore_pmu(dev);
+       struct dev_ext_attribute *ea = to_dev_ext_attribute(attr);
+       long die = (long)ea->var;
+
+       /*
+        * Current implementation is for single segment configuration hence it's
+        * safe to take the segment value from the first available root bus.
+        */
+       return sprintf(buf, "%04x:%02x\n", pci_domain_nr(bus),
+                                          skx_iio_stack(uncore_pmu, die));
+}
+
+static int skx_msr_cpu_bus_read(int cpu, u64 *topology)
+{
+       u64 msr_value;
+
+       if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) ||
+                       !(msr_value & SKX_MSR_CPU_BUS_VALID_BIT))
+               return -ENXIO;
+
+       *topology = msr_value;
+
+       return 0;
+}
+
+static int die_to_cpu(int die)
+{
+       int res = 0, cpu, current_die;
+       /*
+        * Using cpus_read_lock() to ensure cpu is not going down between
+        * looking at cpu_online_mask.
+        */
+       cpus_read_lock();
+       for_each_online_cpu(cpu) {
+               current_die = topology_logical_die_id(cpu);
+               if (current_die == die) {
+                       res = cpu;
+                       break;
+               }
+       }
+       cpus_read_unlock();
+       return res;
+}
+
+static int skx_iio_get_topology(struct intel_uncore_type *type)
+{
+       int i, ret;
+       struct pci_bus *bus = NULL;
+
+       /*
+        * Verified single-segment environments only; disabled for multiple
+        * segment topologies for now except VMD domains.
+        * VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
+        */
+       while ((bus = pci_find_next_bus(bus))
+               && (!pci_domain_nr(bus) || pci_domain_nr(bus) > 0xffff))
+               ;
+       if (bus)
+               return -EPERM;
+
+       type->topology = kcalloc(uncore_max_dies(), sizeof(u64), GFP_KERNEL);
+       if (!type->topology)
+               return -ENOMEM;
+
+       for (i = 0; i < uncore_max_dies(); i++) {
+               ret = skx_msr_cpu_bus_read(die_to_cpu(i), &type->topology[i]);
+               if (ret) {
+                       kfree(type->topology);
+                       type->topology = NULL;
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static struct attribute_group skx_iio_mapping_group = {
+       .is_visible     = skx_iio_mapping_visible,
+};
+
+static const struct attribute_group *skx_iio_attr_update[] = {
+       &skx_iio_mapping_group,
+       NULL,
+};
+
+static int skx_iio_set_mapping(struct intel_uncore_type *type)
+{
+       char buf[64];
+       int ret;
+       long die = -1;
+       struct attribute **attrs = NULL;
+       struct dev_ext_attribute *eas = NULL;
+
+       ret = skx_iio_get_topology(type);
+       if (ret)
+               return ret;
+
+       /* One more for NULL. */
+       attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
+       if (!attrs)
+               goto err;
+
+       eas = kcalloc(uncore_max_dies(), sizeof(*eas), GFP_KERNEL);
+       if (!eas)
+               goto err;
+
+       for (die = 0; die < uncore_max_dies(); die++) {
+               sprintf(buf, "die%ld", die);
+               sysfs_attr_init(&eas[die].attr.attr);
+               eas[die].attr.attr.name = kstrdup(buf, GFP_KERNEL);
+               if (!eas[die].attr.attr.name)
+                       goto err;
+               eas[die].attr.attr.mode = 0444;
+               eas[die].attr.show = skx_iio_mapping_show;
+               eas[die].attr.store = NULL;
+               eas[die].var = (void *)die;
+               attrs[die] = &eas[die].attr.attr;
+       }
+       skx_iio_mapping_group.attrs = attrs;
+
+       return 0;
+err:
+       for (; die >= 0; die--)
+               kfree(eas[die].attr.attr.name);
+       kfree(eas);
+       kfree(attrs);
+       kfree(type->topology);
+       type->attr_update = NULL;
+       return -ENOMEM;
+}
+
+static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+       struct attribute **attr = skx_iio_mapping_group.attrs;
+
+       if (!attr)
+               return;
+
+       for (; *attr; attr++)
+               kfree((*attr)->name);
+       kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs));
+       kfree(skx_iio_mapping_group.attrs);
+       skx_iio_mapping_group.attrs = NULL;
+       kfree(type->topology);
+}
+
 static struct intel_uncore_type skx_uncore_iio = {
        .name                   = "iio",
        .num_counters           = 4,
@@ -3626,6 +3814,9 @@ static struct intel_uncore_type skx_uncore_iio = {
        .constraints            = skx_uncore_iio_constraints,
        .ops                    = &skx_uncore_iio_ops,
        .format_group           = &skx_uncore_iio_format_group,
+       .attr_update            = skx_iio_attr_update,
+       .set_mapping            = skx_iio_set_mapping,
+       .cleanup_mapping        = skx_iio_cleanup_mapping,
 };
 
 enum perf_uncore_iio_freerunning_type_id {
@@ -4421,6 +4612,7 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
                                       unsigned int box_ctl, int mem_offset)
 {
        struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
+       struct intel_uncore_type *type = box->pmu->type;
        resource_size_t addr;
        u32 pci_dword;
 
@@ -4435,9 +4627,11 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
 
        addr += box_ctl;
 
-       box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE);
-       if (!box->io_addr)
+       box->io_addr = ioremap(addr, type->mmio_map_size);
+       if (!box->io_addr) {
+               pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
                return;
+       }
 
        writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
 }
@@ -4480,6 +4674,9 @@ static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box,
        if (!box->io_addr)
                return;
 
+       if (!uncore_mmio_is_valid_offset(box, hwc->config_base))
+               return;
+
        writel(hwc->config | SNBEP_PMON_CTL_EN,
               box->io_addr + hwc->config_base);
 }
@@ -4492,6 +4689,9 @@ static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box,
        if (!box->io_addr)
                return;
 
+       if (!uncore_mmio_is_valid_offset(box, hwc->config_base))
+               return;
+
        writel(hwc->config, box->io_addr + hwc->config_base);
 }
 
@@ -4530,6 +4730,7 @@ static struct intel_uncore_type snr_uncore_imc = {
        .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,
        .box_ctl        = SNR_IMC_MMIO_PMON_BOX_CTL,
        .mmio_offset    = SNR_IMC_MMIO_OFFSET,
+       .mmio_map_size  = SNR_IMC_MMIO_SIZE,
        .ops            = &snr_uncore_mmio_ops,
        .format_group   = &skx_uncore_format_group,
 };
@@ -4570,6 +4771,7 @@ static struct intel_uncore_type snr_uncore_imc_free_running = {
        .num_counters           = 3,
        .num_boxes              = 1,
        .num_freerunning_types  = SNR_IMC_FREERUNNING_TYPE_MAX,
+       .mmio_map_size          = SNR_IMC_MMIO_SIZE,
        .freerunning            = snr_imc_freerunning,
        .ops                    = &snr_uncore_imc_freerunning_ops,
        .event_descs            = snr_uncore_imc_freerunning_events,
@@ -4987,6 +5189,7 @@ static struct intel_uncore_type icx_uncore_imc = {
        .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,
        .box_ctl        = SNR_IMC_MMIO_PMON_BOX_CTL,
        .mmio_offset    = SNR_IMC_MMIO_OFFSET,
+       .mmio_map_size  = SNR_IMC_MMIO_SIZE,
        .ops            = &icx_uncore_mmio_ops,
        .format_group   = &skx_uncore_format_group,
 };
@@ -5044,6 +5247,7 @@ static struct intel_uncore_type icx_uncore_imc_free_running = {
        .num_counters           = 5,
        .num_boxes              = 4,
        .num_freerunning_types  = ICX_IMC_FREERUNNING_TYPE_MAX,
+       .mmio_map_size          = SNR_IMC_MMIO_SIZE,
        .freerunning            = icx_imc_freerunning,
        .ops                    = &icx_uncore_imc_freerunning_ops,
        .event_descs            = icx_uncore_imc_freerunning_events,
index e17a3d8..7b68ab5 100644 (file)
@@ -78,6 +78,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
 #define PERF_X86_EVENT_LARGE_PEBS      0x0400 /* use large PEBS */
 #define PERF_X86_EVENT_PEBS_VIA_PT     0x0800 /* use PT buffer for PEBS */
 #define PERF_X86_EVENT_PAIR            0x1000 /* Large Increment per Cycle */
+#define PERF_X86_EVENT_LBR_SELECT      0x2000 /* Save/Restore MSR_LBR_SELECT */
 
 struct amd_nb {
        int nb_id;  /* NorthBridge id */
@@ -178,6 +179,17 @@ struct intel_excl_cntrs {
 struct x86_perf_task_context;
 #define MAX_LBR_ENTRIES                32
 
+enum {
+       LBR_FORMAT_32           = 0x00,
+       LBR_FORMAT_LIP          = 0x01,
+       LBR_FORMAT_EIP          = 0x02,
+       LBR_FORMAT_EIP_FLAGS    = 0x03,
+       LBR_FORMAT_EIP_FLAGS2   = 0x04,
+       LBR_FORMAT_INFO         = 0x05,
+       LBR_FORMAT_TIME         = 0x06,
+       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
+};
+
 enum {
        X86_PERF_KFREE_SHARED = 0,
        X86_PERF_KFREE_EXCL   = 1,
@@ -233,10 +245,15 @@ struct cpu_hw_events {
        int                             lbr_pebs_users;
        struct perf_branch_stack        lbr_stack;
        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
-       struct er_account               *lbr_sel;
+       union {
+               struct er_account               *lbr_sel;
+               struct er_account               *lbr_ctl;
+       };
        u64                             br_sel;
-       struct x86_perf_task_context    *last_task_ctx;
+       void                            *last_task_ctx;
        int                             last_log_id;
+       int                             lbr_select;
+       void                            *lbr_xsave;
 
        /*
         * Intel host/guest exclude bits
@@ -673,13 +690,37 @@ struct x86_pmu {
        /*
         * Intel LBR
         */
-       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
-       int             lbr_nr;                    /* hardware stack size */
-       u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
-       const int       *lbr_sel_map;              /* lbr_select mappings */
+       unsigned int    lbr_tos, lbr_from, lbr_to,
+                       lbr_info, lbr_nr;          /* LBR base regs and size */
+       union {
+               u64     lbr_sel_mask;              /* LBR_SELECT valid bits */
+               u64     lbr_ctl_mask;              /* LBR_CTL valid bits */
+       };
+       union {
+               const int       *lbr_sel_map;      /* lbr_select mappings */
+               int             *lbr_ctl_map;      /* LBR_CTL mappings */
+       };
        bool            lbr_double_abort;          /* duplicated lbr aborts */
        bool            lbr_pt_coexist;            /* (LBR|BTS) may coexist with PT */
 
+       /*
+        * Intel Architectural LBR CPUID Enumeration
+        */
+       unsigned int    lbr_depth_mask:8;
+       unsigned int    lbr_deep_c_reset:1;
+       unsigned int    lbr_lip:1;
+       unsigned int    lbr_cpl:1;
+       unsigned int    lbr_filter:1;
+       unsigned int    lbr_call_stack:1;
+       unsigned int    lbr_mispred:1;
+       unsigned int    lbr_timed_lbr:1;
+       unsigned int    lbr_br_type:1;
+
+       void            (*lbr_reset)(void);
+       void            (*lbr_read)(struct cpu_hw_events *cpuc);
+       void            (*lbr_save)(void *ctx);
+       void            (*lbr_restore)(void *ctx);
+
        /*
         * Intel PT/LBR/BTS are exclusive
         */
@@ -718,17 +759,46 @@ struct x86_pmu {
        int (*aux_output_match) (struct perf_event *event);
 };
 
-struct x86_perf_task_context {
-       u64 lbr_from[MAX_LBR_ENTRIES];
-       u64 lbr_to[MAX_LBR_ENTRIES];
-       u64 lbr_info[MAX_LBR_ENTRIES];
-       int tos;
-       int valid_lbrs;
+struct x86_perf_task_context_opt {
        int lbr_callstack_users;
        int lbr_stack_state;
        int log_id;
 };
 
+struct x86_perf_task_context {
+       u64 lbr_sel;
+       int tos;
+       int valid_lbrs;
+       struct x86_perf_task_context_opt opt;
+       struct lbr_entry lbr[MAX_LBR_ENTRIES];
+};
+
+struct x86_perf_task_context_arch_lbr {
+       struct x86_perf_task_context_opt opt;
+       struct lbr_entry entries[];
+};
+
+/*
+ * Add padding to guarantee the 64-byte alignment of the state buffer.
+ *
+ * The structure is dynamically allocated. The size of the LBR state may vary
+ * based on the number of LBR registers.
+ *
+ * Do not put anything after the LBR state.
+ */
+struct x86_perf_task_context_arch_lbr_xsave {
+       struct x86_perf_task_context_opt                opt;
+
+       union {
+               struct xregs_state                      xsave;
+               struct {
+                       struct fxregs_state             i387;
+                       struct xstate_header            header;
+                       struct arch_lbr_state           lbr;
+               } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT)));
+       };
+};
+
 #define x86_add_quirk(func_)                                           \
 do {                                                                   \
        static struct x86_pmu_quirk __quirk __initdata = {              \
@@ -777,6 +847,14 @@ static struct perf_pmu_events_ht_attr event_attr_##v = {           \
 struct pmu *x86_get_pmu(void);
 extern struct x86_pmu x86_pmu __read_mostly;
 
+static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
+{
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+               return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt;
+
+       return &((struct x86_perf_task_context *)ctx)->opt;
+}
+
 static inline bool x86_pmu_has_lbr_callstack(void)
 {
        return  x86_pmu.lbr_sel_map &&
@@ -989,7 +1067,10 @@ void release_ds_buffers(void);
 
 void reserve_ds_buffers(void);
 
+void release_lbr_buffers(void);
+
 extern struct event_constraint bts_constraint;
+extern struct event_constraint vlbr_constraint;
 
 void intel_pmu_enable_bts(u64 config);
 
@@ -1041,7 +1122,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
 
 void intel_pmu_auto_reload_read(struct perf_event *event);
 
-void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
+void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
 
 void intel_ds_init(void);
 
@@ -1054,6 +1135,10 @@ u64 lbr_from_signext_quirk_wr(u64 val);
 
 void intel_pmu_lbr_reset(void);
 
+void intel_pmu_lbr_reset_32(void);
+
+void intel_pmu_lbr_reset_64(void);
+
 void intel_pmu_lbr_add(struct perf_event *event);
 
 void intel_pmu_lbr_del(struct perf_event *event);
@@ -1064,6 +1149,14 @@ void intel_pmu_lbr_disable_all(void);
 
 void intel_pmu_lbr_read(void);
 
+void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc);
+
+void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc);
+
+void intel_pmu_lbr_save(void *ctx);
+
+void intel_pmu_lbr_restore(void *ctx);
+
 void intel_pmu_lbr_init_core(void);
 
 void intel_pmu_lbr_init_nhm(void);
@@ -1080,6 +1173,8 @@ void intel_pmu_lbr_init_skl(void);
 
 void intel_pmu_lbr_init_knl(void);
 
+void intel_pmu_arch_lbr_init(void);
+
 void intel_pmu_pebs_data_source_nhm(void);
 
 void intel_pmu_pebs_data_source_skl(bool pmem);
@@ -1115,6 +1210,10 @@ static inline void release_ds_buffers(void)
 {
 }
 
+static inline void release_lbr_buffers(void)
+{
+}
+
 static inline int intel_pmu_init(void)
 {
        return 0;
index 0f2bf59..68b3882 100644 (file)
@@ -787,7 +787,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &model_hsx),
        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &model_skl),
        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &model_skl),
-       X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
+       X86_MATCH_VENDOR_FAM(AMD,       0x17,           &model_amd_fam17h),
+       X86_MATCH_VENDOR_FAM(HYGON,     0x18,           &model_amd_fam17h),
        {},
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
index 898fa1a..e68827e 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Zhoaxin PMU; like Intel Architectural PerfMon-v2
+ * Zhaoxin PMU; like Intel Architectural PerfMon-v2
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
index 0f63585..5c15f95 100644 (file)
        _ASM_PTR (entry);                                       \
        .popsection
 
-#else
+#else /* ! __ASSEMBLY__ */
 # define _EXPAND_EXTABLE_HANDLE(x) #x
 # define _ASM_EXTABLE_HANDLE(from, to, handler)                        \
        " .pushsection \"__ex_table\",\"a\"\n"                  \
        _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
 
 /* For C file, we already have NOKPROBE_SYMBOL macro */
-#endif
 
-#ifndef __ASSEMBLY__
 /*
  * This output constraint should be used for any inline asm which has a "call"
  * instruction.  Otherwise the asm may be inserted before the frame pointer
  */
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif
+#endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_ASM_H */
index bf35e47..b6cac6e 100644 (file)
@@ -14,8 +14,6 @@
  * resource counting etc..
  */
 
-#define ATOMIC_INIT(i) { (i) }
-
 /**
  * arch_atomic_read - read atomic variable
  * @v: pointer of type atomic_t
index 680c320..9191280 100644 (file)
 # error "Invalid value for CONFIG_PHYSICAL_ALIGN"
 #endif
 
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
 # define BOOT_HEAP_SIZE                0x400000
-#else /* !CONFIG_KERNEL_BZIP2 */
+#elif defined(CONFIG_KERNEL_ZSTD)
+/*
+ * Zstd needs to allocate the ZSTD_DCtx in order to decompress the kernel.
+ * The ZSTD_DCtx is ~160KB, so set the heap size to 192KB because it is a
+ * round number and to allow some slack.
+ */
+# define BOOT_HEAP_SIZE                 0x30000
+#else
 # define BOOT_HEAP_SIZE                 0x10000
 #endif
 
index 0281895..297fa12 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_X86_BUG_H
 
 #include <linux/stringify.h>
+#include <linux/instrumentation.h>
 
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
index 1a2eafc..0a7fe03 100644 (file)
@@ -3,7 +3,7 @@
 #define _ASM_X86_CMPXCHG_32_H
 
 /*
- * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
+ * Note: if you use set64_bit(), __cmpxchg64(), or their variants,
  *       you need to test for the feature in boot_cpu_data.
  */
 
index adf45cf..2901d5d 100644 (file)
@@ -96,6 +96,7 @@
 #define X86_FEATURE_SYSCALL32          ( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32         ( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD           ( 3*32+16) /* REP microcode works well */
+/* free                                        ( 3*32+17) */
 #define X86_FEATURE_LFENCE_RDTSC       ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER          ( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL               ( 3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_EXTD_APICID                ( 3*32+26) /* Extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM            ( 3*32+27) /* AMD multi-node processor */
 #define X86_FEATURE_APERFMPERF         ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+/* free                                        ( 3*32+29) */
 #define X86_FEATURE_NONSTOP_TSC_S3     ( 3*32+30) /* TSC doesn't stop in S3 state */
 #define X86_FEATURE_TSC_KNOWN_FREQ     ( 3*32+31) /* TSC has known frequency */
 
 #define X86_FEATURE_TSX_FORCE_ABORT    (18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE          (18*32+14) /* SERIALIZE instruction */
 #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR           (18*32+19) /* Intel ARCH LBR */
 #define X86_FEATURE_SPEC_CTRL          (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP                (18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D          (18*32+28) /* Flush L1D cache */
index 9b8cb50..b8f1dc0 100644 (file)
@@ -74,16 +74,26 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
 #else
 # include <asm-generic/div64.h>
 
-static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+/*
+ * Will generate an #DE when the result doesn't fit u64, could fix with an
+ * __ex_table[] entry when it becomes an issue.
+ */
+static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
 {
        u64 q;
 
        asm ("mulq %2; divq %3" : "=a" (q)
-                               : "a" (a), "rm" ((u64)mul), "rm" ((u64)div)
+                               : "a" (a), "rm" (mul), "rm" (div)
                                : "rdx");
 
        return q;
 }
+#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+       return mul_u64_u64_div_u64(a, mul, div);
+}
 #define mul_u64_u32_div        mul_u64_u32_div
 
 #endif /* CONFIG_X86_32 */
index 845e748..6b10cda 100644 (file)
@@ -274,7 +274,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  */
 static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
 {
-       u64 mask = -1;
+       u64 mask = xfeatures_mask_all;
        u32 lmask = mask;
        u32 hmask = mask >> 32;
        int err;
@@ -320,7 +320,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
  */
 static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
 {
-       u64 mask = -1;
+       u64 mask = xfeatures_mask_all;
        u32 lmask = mask;
        u32 hmask = mask >> 32;
        int err;
@@ -356,6 +356,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
  */
 static inline int copy_xregs_to_user(struct xregs_state __user *buf)
 {
+       u64 mask = xfeatures_mask_user();
+       u32 lmask = mask;
+       u32 hmask = mask >> 32;
        int err;
 
        /*
@@ -367,7 +370,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf)
                return -EFAULT;
 
        stac();
-       XSTATE_OP(XSAVE, buf, -1, -1, err);
+       XSTATE_OP(XSAVE, buf, lmask, hmask, err);
        clac();
 
        return err;
@@ -408,43 +411,7 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
        return err;
 }
 
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact and we can
- * keep registers active.
- *
- * The legacy FNSAVE instruction cleared all FPU state
- * unconditionally, so registers are essentially destroyed.
- * Modern FPU state can be kept in registers, if there are
- * no pending FP exceptions.
- */
-static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
-{
-       if (likely(use_xsave())) {
-               copy_xregs_to_kernel(&fpu->state.xsave);
-
-               /*
-                * AVX512 state is tracked here because its use is
-                * known to slow the max clock speed of the core.
-                */
-               if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
-                       fpu->avx512_timestamp = jiffies;
-               return 1;
-       }
-
-       if (likely(use_fxsr())) {
-               copy_fxregs_to_kernel(fpu);
-               return 1;
-       }
-
-       /*
-        * Legacy FPU register saving, FNSAVE always clears FPU registers,
-        * so we have to mark them inactive:
-        */
-       asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
-
-       return 0;
-}
+extern int copy_fpregs_to_fpstate(struct fpu *fpu);
 
 static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
 {
index f098f6c..c87364e 100644 (file)
@@ -114,6 +114,12 @@ enum xfeature {
        XFEATURE_Hi16_ZMM,
        XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
        XFEATURE_PKRU,
+       XFEATURE_RSRVD_COMP_10,
+       XFEATURE_RSRVD_COMP_11,
+       XFEATURE_RSRVD_COMP_12,
+       XFEATURE_RSRVD_COMP_13,
+       XFEATURE_RSRVD_COMP_14,
+       XFEATURE_LBR,
 
        XFEATURE_MAX,
 };
@@ -128,6 +134,7 @@ enum xfeature {
 #define XFEATURE_MASK_Hi16_ZMM         (1 << XFEATURE_Hi16_ZMM)
 #define XFEATURE_MASK_PT               (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU             (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_LBR              (1 << XFEATURE_LBR)
 
 #define XFEATURE_MASK_FPSSE            (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 #define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK \
@@ -229,6 +236,26 @@ struct pkru_state {
        u32                             pad;
 } __packed;
 
+/*
+ * State component 15: Architectural LBR configuration state.
+ * The size of Arch LBR state depends on the number of LBRs (lbr_depth).
+ */
+
+struct lbr_entry {
+       u64 from;
+       u64 to;
+       u64 info;
+};
+
+struct arch_lbr_state {
+       u64 lbr_ctl;
+       u64 lbr_depth;
+       u64 ler_from;
+       u64 ler_to;
+       u64 ler_info;
+       struct lbr_entry                entries[];
+} __packed;
+
 struct xstate_header {
        u64                             xfeatures;
        u64                             xcomp_bv;
index 422d836..1559554 100644 (file)
@@ -21,6 +21,8 @@
 #define XSAVE_YMM_SIZE     256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
+#define XSAVE_ALIGNMENT     64
+
 /* All currently supported user features */
 #define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
                                      XFEATURE_MASK_SSE | \
 /* All currently supported supervisor features */
 #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
 
+/*
+ * A supervisor state component may not always contain valuable information,
+ * and its size may be huge. Saving/restoring such supervisor state components
+ * at each context switch can cause high CPU and space overhead, which should
+ * be avoided. Such supervisor state components should only be saved/restored
+ * on demand. The on-demand dynamic supervisor features are set in this mask.
+ *
+ * Unlike the existing supported supervisor features, a dynamic supervisor
+ * feature does not allocate a buffer in task->fpu, and the corresponding
+ * supervisor state component cannot be saved/restored at each context switch.
+ *
+ * To support a dynamic supervisor feature, a developer should follow the
+ * dos and don'ts as below:
+ * - Do dynamically allocate a buffer for the supervisor state component.
+ * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
+ *   state component to/from the buffer.
+ * - Don't set the bit corresponding to the dynamic supervisor feature in
+ *   IA32_XSS at run time, since it has been set at boot time.
+ */
+#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR)
+
 /*
  * Unsupported supervisor features. When a supervisor feature in this mask is
  * supported in the future, move it to the supported supervisor feature mask.
@@ -43,6 +66,7 @@
 
 /* All supervisor states including supported and unsupported states. */
 #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
+                                     XFEATURE_MASK_DYNAMIC | \
                                      XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
 
 #ifdef CONFIG_X86_64
@@ -63,6 +87,14 @@ static inline u64 xfeatures_mask_user(void)
        return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
 }
 
+static inline u64 xfeatures_mask_dynamic(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
+               return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR;
+
+       return XFEATURE_MASK_DYNAMIC;
+}
+
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void __init update_regset_xstate_info(unsigned int size,
@@ -71,11 +103,15 @@ extern void __init update_regset_xstate_info(unsigned int size,
 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
 const void *get_xsave_field_ptr(int xfeature_nr);
 int using_compacted_format(void);
+int xfeature_size(int xfeature_nr);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
 void copy_supervisor_to_kernel(struct xregs_state *xsave);
+void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
+void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
+
 
 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
 int validate_user_xstate_header(const struct xstate_header *hdr);
index 80d3b30..d74128c 100644 (file)
 void idtentry_enter_user(struct pt_regs *regs);
 void idtentry_exit_user(struct pt_regs *regs);
 
-bool idtentry_enter_cond_rcu(struct pt_regs *regs);
-void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
+typedef struct idtentry_state {
+       bool exit_rcu;
+} idtentry_state_t;
+
+idtentry_state_t idtentry_enter(struct pt_regs *regs);
+void idtentry_exit(struct pt_regs *regs, idtentry_state_t state);
+
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
 
 /**
  * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
@@ -54,12 +61,12 @@ static __always_inline void __##func(struct pt_regs *regs);         \
                                                                        \
 __visible noinstr void func(struct pt_regs *regs)                      \
 {                                                                      \
-       bool rcu_exit = idtentry_enter_cond_rcu(regs);                  \
+       idtentry_state_t state = idtentry_enter(regs);                  \
                                                                        \
        instrumentation_begin();                                        \
        __##func (regs);                                                \
        instrumentation_end();                                          \
-       idtentry_exit_cond_rcu(regs, rcu_exit);                         \
+       idtentry_exit(regs, state);                                     \
 }                                                                      \
                                                                        \
 static __always_inline void __##func(struct pt_regs *regs)
@@ -101,12 +108,12 @@ static __always_inline void __##func(struct pt_regs *regs,                \
 __visible noinstr void func(struct pt_regs *regs,                      \
                            unsigned long error_code)                   \
 {                                                                      \
-       bool rcu_exit = idtentry_enter_cond_rcu(regs);                  \
+       idtentry_state_t state = idtentry_enter(regs);                  \
                                                                        \
        instrumentation_begin();                                        \
        __##func (regs, error_code);                                    \
        instrumentation_end();                                          \
-       idtentry_exit_cond_rcu(regs, rcu_exit);                         \
+       idtentry_exit(regs, state);                                     \
 }                                                                      \
                                                                        \
 static __always_inline void __##func(struct pt_regs *regs,             \
@@ -199,7 +206,7 @@ static __always_inline void __##func(struct pt_regs *regs, u8 vector);      \
 __visible noinstr void func(struct pt_regs *regs,                      \
                            unsigned long error_code)                   \
 {                                                                      \
-       bool rcu_exit = idtentry_enter_cond_rcu(regs);                  \
+       idtentry_state_t state = idtentry_enter(regs);                  \
                                                                        \
        instrumentation_begin();                                        \
        irq_enter_rcu();                                                \
@@ -207,7 +214,7 @@ __visible noinstr void func(struct pt_regs *regs,                   \
        __##func (regs, (u8)error_code);                                \
        irq_exit_rcu();                                                 \
        instrumentation_end();                                          \
-       idtentry_exit_cond_rcu(regs, rcu_exit);                         \
+       idtentry_exit(regs, state);                                     \
 }                                                                      \
                                                                        \
 static __always_inline void __##func(struct pt_regs *regs, u8 vector)
@@ -241,7 +248,7 @@ static void __##func(struct pt_regs *regs);                         \
                                                                        \
 __visible noinstr void func(struct pt_regs *regs)                      \
 {                                                                      \
-       bool rcu_exit = idtentry_enter_cond_rcu(regs);                  \
+       idtentry_state_t state = idtentry_enter(regs);                  \
                                                                        \
        instrumentation_begin();                                        \
        irq_enter_rcu();                                                \
@@ -249,7 +256,7 @@ __visible noinstr void func(struct pt_regs *regs)                   \
        run_on_irqstack_cond(__##func, regs, regs);                     \
        irq_exit_rcu();                                                 \
        instrumentation_end();                                          \
-       idtentry_exit_cond_rcu(regs, rcu_exit);                         \
+       idtentry_exit(regs, state);                                     \
 }                                                                      \
                                                                        \
 static noinline void __##func(struct pt_regs *regs)
@@ -270,7 +277,7 @@ static __always_inline void __##func(struct pt_regs *regs);         \
                                                                        \
 __visible noinstr void func(struct pt_regs *regs)                      \
 {                                                                      \
-       bool rcu_exit = idtentry_enter_cond_rcu(regs);                  \
+       idtentry_state_t state = idtentry_enter(regs);                  \
                                                                        \
        instrumentation_begin();                                        \
        __irq_enter_raw();                                              \
@@ -278,7 +285,7 @@ __visible noinstr void func(struct pt_regs *regs)                   \
        __##func (regs);                                                \
        __irq_exit_raw();                                               \
        instrumentation_end();                                          \
-       idtentry_exit_cond_rcu(regs, rcu_exit);                         \
+       idtentry_exit(regs, state);                                     \
 }                                                                      \
                                                                        \
 static __always_inline void __##func(struct pt_regs *regs)
index f5a796d..438ccd4 100644 (file)
@@ -12,7 +12,6 @@
 
 #define REG_TYPE_R32           0
 #define REG_TYPE_R64           1
-#define REG_TYPE_XMM           2
 #define REG_TYPE_INVALID       100
 
        .macro R32_NUM opd r32
 #endif
        .endm
 
-       .macro XMM_NUM opd xmm
-       \opd = REG_NUM_INVALID
-       .ifc \xmm,%xmm0
-       \opd = 0
-       .endif
-       .ifc \xmm,%xmm1
-       \opd = 1
-       .endif
-       .ifc \xmm,%xmm2
-       \opd = 2
-       .endif
-       .ifc \xmm,%xmm3
-       \opd = 3
-       .endif
-       .ifc \xmm,%xmm4
-       \opd = 4
-       .endif
-       .ifc \xmm,%xmm5
-       \opd = 5
-       .endif
-       .ifc \xmm,%xmm6
-       \opd = 6
-       .endif
-       .ifc \xmm,%xmm7
-       \opd = 7
-       .endif
-       .ifc \xmm,%xmm8
-       \opd = 8
-       .endif
-       .ifc \xmm,%xmm9
-       \opd = 9
-       .endif
-       .ifc \xmm,%xmm10
-       \opd = 10
-       .endif
-       .ifc \xmm,%xmm11
-       \opd = 11
-       .endif
-       .ifc \xmm,%xmm12
-       \opd = 12
-       .endif
-       .ifc \xmm,%xmm13
-       \opd = 13
-       .endif
-       .ifc \xmm,%xmm14
-       \opd = 14
-       .endif
-       .ifc \xmm,%xmm15
-       \opd = 15
-       .endif
-       .endm
-
        .macro REG_TYPE type reg
        R32_NUM reg_type_r32 \reg
        R64_NUM reg_type_r64 \reg
-       XMM_NUM reg_type_xmm \reg
        .if reg_type_r64 <> REG_NUM_INVALID
        \type = REG_TYPE_R64
        .elseif reg_type_r32 <> REG_NUM_INVALID
        \type = REG_TYPE_R32
-       .elseif reg_type_xmm <> REG_NUM_INVALID
-       \type = REG_TYPE_XMM
        .else
        \type = REG_TYPE_INVALID
        .endif
        .endm
 
-       .macro PFX_OPD_SIZE
-       .byte 0x66
-       .endm
-
        .macro PFX_REX opd1 opd2 W=0
        .if ((\opd1 | \opd2) & 8) || \W
        .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
        .macro MODRM mod opd1 opd2
        .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
        .endm
-
-       .macro PSHUFB_XMM xmm1 xmm2
-       XMM_NUM pshufb_opd1 \xmm1
-       XMM_NUM pshufb_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX pshufb_opd1 pshufb_opd2
-       .byte 0x0f, 0x38, 0x00
-       MODRM 0xc0 pshufb_opd1 pshufb_opd2
-       .endm
-
-       .macro PCLMULQDQ imm8 xmm1 xmm2
-       XMM_NUM clmul_opd1 \xmm1
-       XMM_NUM clmul_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX clmul_opd1 clmul_opd2
-       .byte 0x0f, 0x3a, 0x44
-       MODRM 0xc0 clmul_opd1 clmul_opd2
-       .byte \imm8
-       .endm
-
-       .macro PEXTRD imm8 xmm gpr
-       R32_NUM extrd_opd1 \gpr
-       XMM_NUM extrd_opd2 \xmm
-       PFX_OPD_SIZE
-       PFX_REX extrd_opd1 extrd_opd2
-       .byte 0x0f, 0x3a, 0x16
-       MODRM 0xc0 extrd_opd1 extrd_opd2
-       .byte \imm8
-       .endm
-
-       .macro AESKEYGENASSIST rcon xmm1 xmm2
-       XMM_NUM aeskeygen_opd1 \xmm1
-       XMM_NUM aeskeygen_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX aeskeygen_opd1 aeskeygen_opd2
-       .byte 0x0f, 0x3a, 0xdf
-       MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
-       .byte \rcon
-       .endm
-
-       .macro AESIMC xmm1 xmm2
-       XMM_NUM aesimc_opd1 \xmm1
-       XMM_NUM aesimc_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX aesimc_opd1 aesimc_opd2
-       .byte 0x0f, 0x38, 0xdb
-       MODRM 0xc0 aesimc_opd1 aesimc_opd2
-       .endm
-
-       .macro AESENC xmm1 xmm2
-       XMM_NUM aesenc_opd1 \xmm1
-       XMM_NUM aesenc_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX aesenc_opd1 aesenc_opd2
-       .byte 0x0f, 0x38, 0xdc
-       MODRM 0xc0 aesenc_opd1 aesenc_opd2
-       .endm
-
-       .macro AESENCLAST xmm1 xmm2
-       XMM_NUM aesenclast_opd1 \xmm1
-       XMM_NUM aesenclast_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX aesenclast_opd1 aesenclast_opd2
-       .byte 0x0f, 0x38, 0xdd
-       MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
-       .endm
-
-       .macro AESDEC xmm1 xmm2
-       XMM_NUM aesdec_opd1 \xmm1
-       XMM_NUM aesdec_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX aesdec_opd1 aesdec_opd2
-       .byte 0x0f, 0x38, 0xde
-       MODRM 0xc0 aesdec_opd1 aesdec_opd2
-       .endm
-
-       .macro AESDECLAST xmm1 xmm2
-       XMM_NUM aesdeclast_opd1 \xmm1
-       XMM_NUM aesdeclast_opd2 \xmm2
-       PFX_OPD_SIZE
-       PFX_REX aesdeclast_opd1 aesdeclast_opd2
-       .byte 0x0f, 0x38, 0xdf
-       MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
-       .endm
-
-       .macro MOVQ_R64_XMM opd1 opd2
-       REG_TYPE movq_r64_xmm_opd1_type \opd1
-       .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
-       XMM_NUM movq_r64_xmm_opd1 \opd1
-       R64_NUM movq_r64_xmm_opd2 \opd2
-       .else
-       R64_NUM movq_r64_xmm_opd1 \opd1
-       XMM_NUM movq_r64_xmm_opd2 \opd2
-       .endif
-       PFX_OPD_SIZE
-       PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
-       .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
-       .byte 0x0f, 0x7e
-       .else
-       .byte 0x0f, 0x6e
-       .endif
-       MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
-       .endm
 #endif
 
 #endif
index fd20a23..a1a26f6 100644 (file)
@@ -99,7 +99,6 @@ struct IR_IO_APIC_route_entry {
 struct irq_alloc_info;
 struct ioapic_domain_cfg;
 
-#define IOAPIC_AUTO                    -1
 #define IOAPIC_EDGE                    0
 #define IOAPIC_LEVEL                   1
 
index 247ab14..d1514e7 100644 (file)
@@ -36,8 +36,9 @@ extern void die(const char *, struct pt_regs *,long);
 void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
-extern void show_iret_regs(struct pt_regs *regs);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode,
+                       const char *log_lvl);
+extern void show_iret_regs(struct pt_regs *regs, const char *log_lvl);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
index 073eb7a..143bc9a 100644 (file)
@@ -66,6 +66,8 @@ struct arch_specific_insn {
         */
        bool boostable;
        bool if_modifier;
+       /* Number of bytes of text poked */
+       int tp_len;
 };
 
 struct arch_optimized_insn {
index 848ce43..5049f6c 100644 (file)
@@ -43,9 +43,10 @@ void __init sme_enable(struct boot_params *bp);
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
 
+void __init mem_encrypt_free_decrypted_mem(void);
+
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
-void __init mem_encrypt_free_decrypted_mem(void);
 
 bool sme_active(void);
 bool sev_active(void);
@@ -77,6 +78,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
 
+static inline void mem_encrypt_free_decrypted_mem(void) { }
+
 #define __bss_decrypted
 
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
index e8370e6..b87b8f8 100644 (file)
 #define LBR_INFO_MISPRED               BIT_ULL(63)
 #define LBR_INFO_IN_TX                 BIT_ULL(62)
 #define LBR_INFO_ABORT                 BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID         BIT_ULL(60)
 #define LBR_INFO_CYCLES                        0xffff
+#define LBR_INFO_BR_TYPE_OFFSET                56
+#define LBR_INFO_BR_TYPE               (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+
+#define MSR_ARCH_LBR_CTL               0x000014ce
+#define ARCH_LBR_CTL_LBREN             BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET                1
+#define ARCH_LBR_CTL_CPL               (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET      3
+#define ARCH_LBR_CTL_STACK             (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET     16
+#define ARCH_LBR_CTL_FILTER            (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH             0x000014cf
+#define MSR_ARCH_LBR_FROM_0            0x00001500
+#define MSR_ARCH_LBR_TO_0              0x00001600
+#define MSR_ARCH_LBR_INFO_0            0x00001200
 
 #define MSR_IA32_PEBS_ENABLE           0x000003f1
 #define MSR_PEBS_DATA_CFG              0x000003f2
 #define MSR_AMD64_PATCH_LEVEL          0x0000008b
 #define MSR_AMD64_TSC_RATIO            0xc0000104
 #define MSR_AMD64_NB_CFG               0xc001001f
-#define MSR_AMD64_CPUID_FN_1           0xc0011004
 #define MSR_AMD64_PATCH_LOADER         0xc0010020
 #define MSR_AMD_PERF_CTL               0xc0010062
 #define MSR_AMD_PERF_STATUS            0xc0010063
 #define MSR_AMD64_OSVW_STATUS          0xc0010141
 #define MSR_AMD_PPIN_CTL               0xc00102f0
 #define MSR_AMD_PPIN                   0xc00102f1
+#define MSR_AMD64_CPUID_FN_1           0xc0011004
 #define MSR_AMD64_LS_CFG               0xc0011020
 #define MSR_AMD64_DC_CFG               0xc0011022
 #define MSR_AMD64_BU_CFG2              0xc001102a
 #define MSR_F16H_DR0_ADDR_MASK         0xc0011027
 
 /* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR     0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
 #define MSR_F15H_PERF_CTL              0xc0010200
 #define MSR_F15H_PERF_CTL0             MSR_F15H_PERF_CTL
 #define MSR_F15H_PERF_CTL1             (MSR_F15H_PERF_CTL + 2)
index 2278797..a3c33b7 100644 (file)
@@ -4,33 +4,15 @@
 
 #ifdef CONFIG_X86_64
 #define __percpu_seg           gs
-#define __percpu_mov_op                movq
 #else
 #define __percpu_seg           fs
-#define __percpu_mov_op                movl
 #endif
 
 #ifdef __ASSEMBLY__
 
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
 #ifdef CONFIG_SMP
-#define PER_CPU(var, reg)                                              \
-       __percpu_mov_op %__percpu_seg:this_cpu_off, reg;                \
-       lea var(reg), reg
 #define PER_CPU_VAR(var)       %__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU(var, reg)      __percpu_mov_op $var, reg
 #define PER_CPU_VAR(var)       var
 #endif /* SMP */
 
 
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(qual, op, var, val)               \
-do {                                                   \
-       typedef typeof(var) pto_T__;                    \
-       if (0) {                                        \
-               pto_T__ pto_tmp__;                      \
-               pto_tmp__ = (val);                      \
-               (void)pto_tmp__;                        \
-       }                                               \
-       switch (sizeof(var)) {                          \
-       case 1:                                         \
-               asm qual (op "b %1,"__percpu_arg(0)     \
-                   : "+m" (var)                        \
-                   : "qi" ((pto_T__)(val)));           \
-               break;                                  \
-       case 2:                                         \
-               asm qual (op "w %1,"__percpu_arg(0)     \
-                   : "+m" (var)                        \
-                   : "ri" ((pto_T__)(val)));           \
-               break;                                  \
-       case 4:                                         \
-               asm qual (op "l %1,"__percpu_arg(0)     \
-                   : "+m" (var)                        \
-                   : "ri" ((pto_T__)(val)));           \
-               break;                                  \
-       case 8:                                         \
-               asm qual (op "q %1,"__percpu_arg(0)     \
-                   : "+m" (var)                        \
-                   : "re" ((pto_T__)(val)));           \
-               break;                                  \
-       default: __bad_percpu_size();                   \
-       }                                               \
+
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
+
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
+
+#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
+#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
+#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
+#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
+#define percpu_to_op(size, qual, op, _var, _val)                       \
+do {                                                                   \
+       __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);        \
+       if (0) {                                                        \
+               typeof(_var) pto_tmp__;                                 \
+               pto_tmp__ = (_val);                                     \
+               (void)pto_tmp__;                                        \
+       }                                                               \
+       asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))   \
+           : [var] "+m" (_var)                                         \
+           : [val] __pcpu_reg_imm_##size(pto_val__));                  \
 } while (0)
 
+#define percpu_unary_op(size, qual, op, _var)                          \
+({                                                                     \
+       asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))            \
+           : [var] "+m" (_var));                                       \
+})
+
 /*
  * Generate a percpu add to memory instruction and optimize code
  * if one is added or subtracted.
  */
-#define percpu_add_op(qual, var, val)                                  \
+#define percpu_add_op(size, qual, var, val)                            \
 do {                                                                   \
-       typedef typeof(var) pao_T__;                                    \
        const int pao_ID__ = (__builtin_constant_p(val) &&              \
                              ((val) == 1 || (val) == -1)) ?            \
                                (int)(val) : 0;                         \
        if (0) {                                                        \
-               pao_T__ pao_tmp__;                                      \
+               typeof(var) pao_tmp__;                                  \
                pao_tmp__ = (val);                                      \
                (void)pao_tmp__;                                        \
        }                                                               \
-       switch (sizeof(var)) {                                          \
-       case 1:                                                         \
-               if (pao_ID__ == 1)                                      \
-                       asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
-               else if (pao_ID__ == -1)                                \
-                       asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
-               else                                                    \
-                       asm qual ("addb %1, "__percpu_arg(0)            \
-                           : "+m" (var)                                \
-                           : "qi" ((pao_T__)(val)));                   \
-               break;                                                  \
-       case 2:                                                         \
-               if (pao_ID__ == 1)                                      \
-                       asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
-               else if (pao_ID__ == -1)                                \
-                       asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
-               else                                                    \
-                       asm qual ("addw %1, "__percpu_arg(0)            \
-                           : "+m" (var)                                \
-                           : "ri" ((pao_T__)(val)));                   \
-               break;                                                  \
-       case 4:                                                         \
-               if (pao_ID__ == 1)                                      \
-                       asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
-               else if (pao_ID__ == -1)                                \
-                       asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
-               else                                                    \
-                       asm qual ("addl %1, "__percpu_arg(0)            \
-                           : "+m" (var)                                \
-                           : "ri" ((pao_T__)(val)));                   \
-               break;                                                  \
-       case 8:                                                         \
-               if (pao_ID__ == 1)                                      \
-                       asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
-               else if (pao_ID__ == -1)                                \
-                       asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
-               else                                                    \
-                       asm qual ("addq %1, "__percpu_arg(0)            \
-                           : "+m" (var)                                \
-                           : "re" ((pao_T__)(val)));                   \
-               break;                                                  \
-       default: __bad_percpu_size();                                   \
-       }                                                               \
+       if (pao_ID__ == 1)                                              \
+               percpu_unary_op(size, qual, "inc", var);                \
+       else if (pao_ID__ == -1)                                        \
+               percpu_unary_op(size, qual, "dec", var);                \
+       else                                                            \
+               percpu_to_op(size, qual, "add", var, val);              \
 } while (0)
 
-#define percpu_from_op(qual, op, var)                  \
-({                                                     \
-       typeof(var) pfo_ret__;                          \
-       switch (sizeof(var)) {                          \
-       case 1:                                         \
-               asm qual (op "b "__percpu_arg(1)",%0"   \
-                   : "=q" (pfo_ret__)                  \
-                   : "m" (var));                       \
-               break;                                  \
-       case 2:                                         \
-               asm qual (op "w "__percpu_arg(1)",%0"   \
-                   : "=r" (pfo_ret__)                  \
-                   : "m" (var));                       \
-               break;                                  \
-       case 4:                                         \
-               asm qual (op "l "__percpu_arg(1)",%0"   \
-                   : "=r" (pfo_ret__)                  \
-                   : "m" (var));                       \
-               break;                                  \
-       case 8:                                         \
-               asm qual (op "q "__percpu_arg(1)",%0"   \
-                   : "=r" (pfo_ret__)                  \
-                   : "m" (var));                       \
-               break;                                  \
-       default: __bad_percpu_size();                   \
-       }                                               \
-       pfo_ret__;                                      \
-})
-
-#define percpu_stable_op(op, var)                      \
-({                                                     \
-       typeof(var) pfo_ret__;                          \
-       switch (sizeof(var)) {                          \
-       case 1:                                         \
-               asm(op "b "__percpu_arg(P1)",%0"        \
-                   : "=q" (pfo_ret__)                  \
-                   : "p" (&(var)));                    \
-               break;                                  \
-       case 2:                                         \
-               asm(op "w "__percpu_arg(P1)",%0"        \
-                   : "=r" (pfo_ret__)                  \
-                   : "p" (&(var)));                    \
-               break;                                  \
-       case 4:                                         \
-               asm(op "l "__percpu_arg(P1)",%0"        \
-                   : "=r" (pfo_ret__)                  \
-                   : "p" (&(var)));                    \
-               break;                                  \
-       case 8:                                         \
-               asm(op "q "__percpu_arg(P1)",%0"        \
-                   : "=r" (pfo_ret__)                  \
-                   : "p" (&(var)));                    \
-               break;                                  \
-       default: __bad_percpu_size();                   \
-       }                                               \
-       pfo_ret__;                                      \
+#define percpu_from_op(size, qual, op, _var)                           \
+({                                                                     \
+       __pcpu_type_##size pfo_val__;                                   \
+       asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")  \
+           : [val] __pcpu_reg_##size("=", pfo_val__)                   \
+           : [var] "m" (_var));                                        \
+       (typeof(_var))(unsigned long) pfo_val__;                        \
 })
 
-#define percpu_unary_op(qual, op, var)                 \
-({                                                     \
-       switch (sizeof(var)) {                          \
-       case 1:                                         \
-               asm qual (op "b "__percpu_arg(0)        \
-                   : "+m" (var));                      \
-               break;                                  \
-       case 2:                                         \
-               asm qual (op "w "__percpu_arg(0)        \
-                   : "+m" (var));                      \
-               break;                                  \
-       case 4:                                         \
-               asm qual (op "l "__percpu_arg(0)        \
-                   : "+m" (var));                      \
-               break;                                  \
-       case 8:                                         \
-               asm qual (op "q "__percpu_arg(0)        \
-                   : "+m" (var));                      \
-               break;                                  \
-       default: __bad_percpu_size();                   \
-       }                                               \
+#define percpu_stable_op(size, op, _var)                               \
+({                                                                     \
+       __pcpu_type_##size pfo_val__;                                   \
+       asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")       \
+           : [val] __pcpu_reg_##size("=", pfo_val__)                   \
+           : [var] "p" (&(_var)));                                     \
+       (typeof(_var))(unsigned long) pfo_val__;                        \
 })
 
 /*
  * Add return operation
  */
-#define percpu_add_return_op(qual, var, val)                           \
+#define percpu_add_return_op(size, qual, _var, _val)                   \
 ({                                                                     \
-       typeof(var) paro_ret__ = val;                                   \
-       switch (sizeof(var)) {                                          \
-       case 1:                                                         \
-               asm qual ("xaddb %0, "__percpu_arg(1)                   \
-                           : "+q" (paro_ret__), "+m" (var)             \
-                           : : "memory");                              \
-               break;                                                  \
-       case 2:                                                         \
-               asm qual ("xaddw %0, "__percpu_arg(1)                   \
-                           : "+r" (paro_ret__), "+m" (var)             \
-                           : : "memory");                              \
-               break;                                                  \
-       case 4:                                                         \
-               asm qual ("xaddl %0, "__percpu_arg(1)                   \
-                           : "+r" (paro_ret__), "+m" (var)             \
-                           : : "memory");                              \
-               break;                                                  \
-       case 8:                                                         \
-               asm qual ("xaddq %0, "__percpu_arg(1)                   \
-                           : "+re" (paro_ret__), "+m" (var)            \
-                           : : "memory");                              \
-               break;                                                  \
-       default: __bad_percpu_size();                                   \
-       }                                                               \
-       paro_ret__ += val;                                              \
-       paro_ret__;                                                     \
+       __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);       \
+       asm qual (__pcpu_op2_##size("xadd", "%[tmp]",                   \
+                                    __percpu_arg([var]))               \
+                 : [tmp] __pcpu_reg_##size("+", paro_tmp__),           \
+                   [var] "+m" (_var)                                   \
+                 : : "memory");                                        \
+       (typeof(_var))(unsigned long) (paro_tmp__ + _val);              \
 })
 
 /*
@@ -299,85 +176,38 @@ do {                                                                      \
  * expensive due to the implied lock prefix.  The processor cannot prefetch
  * cachelines if xchg is used.
  */
-#define percpu_xchg_op(qual, var, nval)                                        \
+#define percpu_xchg_op(size, qual, _var, _nval)                                \
 ({                                                                     \
-       typeof(var) pxo_ret__;                                          \
-       typeof(var) pxo_new__ = (nval);                                 \
-       switch (sizeof(var)) {                                          \
-       case 1:                                                         \
-               asm qual ("\n\tmov "__percpu_arg(1)",%%al"              \
-                   "\n1:\tcmpxchgb %2, "__percpu_arg(1)                \
-                   "\n\tjnz 1b"                                        \
-                           : "=&a" (pxo_ret__), "+m" (var)             \
-                           : "q" (pxo_new__)                           \
-                           : "memory");                                \
-               break;                                                  \
-       case 2:                                                         \
-               asm qual ("\n\tmov "__percpu_arg(1)",%%ax"              \
-                   "\n1:\tcmpxchgw %2, "__percpu_arg(1)                \
-                   "\n\tjnz 1b"                                        \
-                           : "=&a" (pxo_ret__), "+m" (var)             \
-                           : "r" (pxo_new__)                           \
-                           : "memory");                                \
-               break;                                                  \
-       case 4:                                                         \
-               asm qual ("\n\tmov "__percpu_arg(1)",%%eax"             \
-                   "\n1:\tcmpxchgl %2, "__percpu_arg(1)                \
-                   "\n\tjnz 1b"                                        \
-                           : "=&a" (pxo_ret__), "+m" (var)             \
-                           : "r" (pxo_new__)                           \
-                           : "memory");                                \
-               break;                                                  \
-       case 8:                                                         \
-               asm qual ("\n\tmov "__percpu_arg(1)",%%rax"             \
-                   "\n1:\tcmpxchgq %2, "__percpu_arg(1)                \
-                   "\n\tjnz 1b"                                        \
-                           : "=&a" (pxo_ret__), "+m" (var)             \
-                           : "r" (pxo_new__)                           \
-                           : "memory");                                \
-               break;                                                  \
-       default: __bad_percpu_size();                                   \
-       }                                                               \
-       pxo_ret__;                                                      \
+       __pcpu_type_##size pxo_old__;                                   \
+       __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval);       \
+       asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]),         \
+                                   "%[oval]")                          \
+                 "\n1:\t"                                              \
+                 __pcpu_op2_##size("cmpxchg", "%[nval]",               \
+                                   __percpu_arg([var]))                \
+                 "\n\tjnz 1b"                                          \
+                 : [oval] "=&a" (pxo_old__),                           \
+                   [var] "+m" (_var)                                   \
+                 : [nval] __pcpu_reg_##size(, pxo_new__)               \
+                 : "memory");                                          \
+       (typeof(_var))(unsigned long) pxo_old__;                        \
 })
 
 /*
  * cmpxchg has no such implied lock semantics as a result it is much
  * more efficient for cpu local operations.
  */
-#define percpu_cmpxchg_op(qual, var, oval, nval)                       \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)              \
 ({                                                                     \
-       typeof(var) pco_ret__;                                          \
-       typeof(var) pco_old__ = (oval);                                 \
-       typeof(var) pco_new__ = (nval);                                 \
-       switch (sizeof(var)) {                                          \
-       case 1:                                                         \
-               asm qual ("cmpxchgb %2, "__percpu_arg(1)                \
-                           : "=a" (pco_ret__), "+m" (var)              \
-                           : "q" (pco_new__), "0" (pco_old__)          \
-                           : "memory");                                \
-               break;                                                  \
-       case 2:                                                         \
-               asm qual ("cmpxchgw %2, "__percpu_arg(1)                \
-                           : "=a" (pco_ret__), "+m" (var)              \
-                           : "r" (pco_new__), "0" (pco_old__)          \
-                           : "memory");                                \
-               break;                                                  \
-       case 4:                                                         \
-               asm qual ("cmpxchgl %2, "__percpu_arg(1)                \
-                           : "=a" (pco_ret__), "+m" (var)              \
-                           : "r" (pco_new__), "0" (pco_old__)          \
-                           : "memory");                                \
-               break;                                                  \
-       case 8:                                                         \
-               asm qual ("cmpxchgq %2, "__percpu_arg(1)                \
-                           : "=a" (pco_ret__), "+m" (var)              \
-                           : "r" (pco_new__), "0" (pco_old__)          \
-                           : "memory");                                \
-               break;                                                  \
-       default: __bad_percpu_size();                                   \
-       }                                                               \
-       pco_ret__;                                                      \
+       __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);       \
+       __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);       \
+       asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",               \
+                                   __percpu_arg([var]))                \
+                 : [oval] "+a" (pco_old__),                            \
+                   [var] "+m" (_var)                                   \
+                 : [nval] __pcpu_reg_##size(, pco_new__)               \
+                 : "memory");                                          \
+       (typeof(_var))(unsigned long) pco_old__;                        \
 })
 
 /*
@@ -389,24 +219,28 @@ do {                                                                      \
  * per-thread variables implemented as per-cpu variables and thus
  * stable for the duration of the respective task.
  */
-#define this_cpu_read_stable(var)      percpu_stable_op("mov", var)
-
-#define raw_cpu_read_1(pcp)            percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_2(pcp)            percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_4(pcp)            percpu_from_op(, "mov", pcp)
-
-#define raw_cpu_write_1(pcp, val)      percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val)      percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val)      percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val)                percpu_add_op(, (pcp), val)
-#define raw_cpu_add_2(pcp, val)                percpu_add_op(, (pcp), val)
-#define raw_cpu_add_4(pcp, val)                percpu_add_op(, (pcp), val)
-#define raw_cpu_and_1(pcp, val)                percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_and_2(pcp, val)                percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_and_4(pcp, val)                percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_or_1(pcp, val)         percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_2(pcp, val)         percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_4(pcp, val)         percpu_to_op(, "or", (pcp), val)
+#define this_cpu_read_stable_1(pcp)    percpu_stable_op(1, "mov", pcp)
+#define this_cpu_read_stable_2(pcp)    percpu_stable_op(2, "mov", pcp)
+#define this_cpu_read_stable_4(pcp)    percpu_stable_op(4, "mov", pcp)
+#define this_cpu_read_stable_8(pcp)    percpu_stable_op(8, "mov", pcp)
+#define this_cpu_read_stable(pcp)      __pcpu_size_call_return(this_cpu_read_stable_, pcp)
+
+#define raw_cpu_read_1(pcp)            percpu_from_op(1, , "mov", pcp)
+#define raw_cpu_read_2(pcp)            percpu_from_op(2, , "mov", pcp)
+#define raw_cpu_read_4(pcp)            percpu_from_op(4, , "mov", pcp)
+
+#define raw_cpu_write_1(pcp, val)      percpu_to_op(1, , "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val)      percpu_to_op(2, , "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val)      percpu_to_op(4, , "mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val)                percpu_add_op(1, , (pcp), val)
+#define raw_cpu_add_2(pcp, val)                percpu_add_op(2, , (pcp), val)
+#define raw_cpu_add_4(pcp, val)                percpu_add_op(4, , (pcp), val)
+#define raw_cpu_and_1(pcp, val)                percpu_to_op(1, , "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val)                percpu_to_op(2, , "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val)                percpu_to_op(4, , "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val)         percpu_to_op(1, , "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val)         percpu_to_op(2, , "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val)         percpu_to_op(4, , "or", (pcp), val)
 
 /*
  * raw_cpu_xchg() can use a load-store since it is not required to be
@@ -423,38 +257,38 @@ do {                                                                      \
 #define raw_cpu_xchg_2(pcp, val)       raw_percpu_xchg_op(pcp, val)
 #define raw_cpu_xchg_4(pcp, val)       raw_percpu_xchg_op(pcp, val)
 
-#define this_cpu_read_1(pcp)           percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_2(pcp)           percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_4(pcp)           percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_write_1(pcp, val)     percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_write_2(pcp, val)     percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_write_4(pcp, val)     percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_add_1(pcp, val)       percpu_add_op(volatile, (pcp), val)
-#define this_cpu_add_2(pcp, val)       percpu_add_op(volatile, (pcp), val)
-#define this_cpu_add_4(pcp, val)       percpu_add_op(volatile, (pcp), val)
-#define this_cpu_and_1(pcp, val)       percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_and_2(pcp, val)       percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_and_4(pcp, val)       percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_or_1(pcp, val)                percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_or_2(pcp, val)                percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_or_4(pcp, val)                percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval)     percpu_xchg_op(volatile, pcp, nval)
-#define this_cpu_xchg_2(pcp, nval)     percpu_xchg_op(volatile, pcp, nval)
-#define this_cpu_xchg_4(pcp, nval)     percpu_xchg_op(volatile, pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val)         percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_2(pcp, val)         percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_4(pcp, val)         percpu_add_return_op(, pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val)                percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_2(pcp, val)                percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_4(pcp, val)                percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_read_1(pcp)           percpu_from_op(1, volatile, "mov", pcp)
+#define this_cpu_read_2(pcp)           percpu_from_op(2, volatile, "mov", pcp)
+#define this_cpu_read_4(pcp)           percpu_from_op(4, volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val)     percpu_to_op(1, volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val)     percpu_to_op(2, volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val)     percpu_to_op(4, volatile, "mov", (pcp), val)
+#define this_cpu_add_1(pcp, val)       percpu_add_op(1, volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val)       percpu_add_op(2, volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val)       percpu_add_op(4, volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val)       percpu_to_op(1, volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val)       percpu_to_op(2, volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val)       percpu_to_op(4, volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val)                percpu_to_op(1, volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val)                percpu_to_op(2, volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val)                percpu_to_op(4, volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval)     percpu_xchg_op(1, volatile, pcp, nval)
+#define this_cpu_xchg_2(pcp, nval)     percpu_xchg_op(2, volatile, pcp, nval)
+#define this_cpu_xchg_4(pcp, nval)     percpu_xchg_op(4, volatile, pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val)         percpu_add_return_op(1, , pcp, val)
+#define raw_cpu_add_return_2(pcp, val)         percpu_add_return_op(2, , pcp, val)
+#define raw_cpu_add_return_4(pcp, val)         percpu_add_return_op(4, , pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval)     percpu_cmpxchg_op(1, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval)     percpu_cmpxchg_op(2, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval)     percpu_cmpxchg_op(4, , pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val)                percpu_add_return_op(1, volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val)                percpu_add_return_op(2, volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val)                percpu_add_return_op(4, volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval)    percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval)    percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval)    percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
 
 #ifdef CONFIG_X86_CMPXCHG64
 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)            \
@@ -478,23 +312,23 @@ do {                                                                      \
  * 32 bit must fall back to generic operations.
  */
 #ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp)                    percpu_from_op(, "mov", pcp)
-#define raw_cpu_write_8(pcp, val)              percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val)                        percpu_add_op(, (pcp), val)
-#define raw_cpu_and_8(pcp, val)                        percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_or_8(pcp, val)                 percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val)         percpu_add_return_op(, pcp, val)
+#define raw_cpu_read_8(pcp)                    percpu_from_op(8, , "mov", pcp)
+#define raw_cpu_write_8(pcp, val)              percpu_to_op(8, , "mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val)                        percpu_add_op(8, , (pcp), val)
+#define raw_cpu_and_8(pcp, val)                        percpu_to_op(8, , "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val)                 percpu_to_op(8, , "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val)         percpu_add_return_op(8, , pcp, val)
 #define raw_cpu_xchg_8(pcp, nval)              raw_percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
-
-#define this_cpu_read_8(pcp)                   percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val)             percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_add_8(pcp, val)               percpu_add_op(volatile, (pcp), val)
-#define this_cpu_and_8(pcp, val)               percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_or_8(pcp, val)                        percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val)                percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval)             percpu_xchg_op(volatile, pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval)     percpu_cmpxchg_op(8, , pcp, oval, nval)
+
+#define this_cpu_read_8(pcp)                   percpu_from_op(8, volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val)             percpu_to_op(8, volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val)               percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val)               percpu_to_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val)                        percpu_to_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val)                percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval)             percpu_xchg_op(8, volatile, pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
 
 /*
  * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
index e855e9c..0c1b137 100644 (file)
@@ -142,6 +142,46 @@ union cpuid10_edx {
        unsigned int full;
 };
 
+/*
+ * Intel Architectural LBR CPUID detection/enumeration details:
+ */
+union cpuid28_eax {
+       struct {
+               /* Supported LBR depth values */
+               unsigned int    lbr_depth_mask:8;
+               unsigned int    reserved:22;
+               /* Deep C-state Reset */
+               unsigned int    lbr_deep_c_reset:1;
+               /* IP values contain LIP */
+               unsigned int    lbr_lip:1;
+       } split;
+       unsigned int            full;
+};
+
+union cpuid28_ebx {
+       struct {
+               /* CPL Filtering Supported */
+               unsigned int    lbr_cpl:1;
+               /* Branch Filtering Supported */
+               unsigned int    lbr_filter:1;
+               /* Call-stack Mode Supported */
+               unsigned int    lbr_call_stack:1;
+       } split;
+       unsigned int            full;
+};
+
+union cpuid28_ecx {
+       struct {
+               /* Mispredict Bit Supported */
+               unsigned int    lbr_mispred:1;
+               /* Timed LBRs Supported */
+               unsigned int    lbr_timed_lbr:1;
+               /* Branch Type Field Supported */
+               unsigned int    lbr_br_type:1;
+       } split;
+       unsigned int            full;
+};
+
 struct x86_pmu_capability {
        int             version;
        int             num_counters_gp;
@@ -192,9 +232,29 @@ struct x86_pmu_capability {
 #define GLOBAL_STATUS_UNC_OVF                          BIT_ULL(61)
 #define GLOBAL_STATUS_ASIF                             BIT_ULL(60)
 #define GLOBAL_STATUS_COUNTERS_FROZEN                  BIT_ULL(59)
-#define GLOBAL_STATUS_LBRS_FROZEN                      BIT_ULL(58)
+#define GLOBAL_STATUS_LBRS_FROZEN_BIT                  58
+#define GLOBAL_STATUS_LBRS_FROZEN                      BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
 #define GLOBAL_STATUS_TRACE_TOPAPMI                    BIT_ULL(55)
 
+/*
+ * We model guest LBR event tracing as another fixed-mode PMC like BTS.
+ *
+ * We choose bit 58 because it's used to indicate LBR stack frozen state
+ * for architectural perfmon v4, also we unconditionally mask that bit in
+ * the handle_pmi_common(), so it'll never be set in the overflow handling.
+ *
+ * With this fake counter assigned, the guest LBR event user (such as KVM),
+ * can program the LBR registers on its own, and we don't actually do anything
+ * with then in the host context.
+ */
+#define INTEL_PMC_IDX_FIXED_VLBR       (GLOBAL_STATUS_LBRS_FROZEN_BIT)
+
+/*
+ * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b,
+ * since it would claim bit 58 which is effectively Fixed26.
+ */
+#define INTEL_FIXED_VLBR_EVENT 0x1b00
+
 /*
  * Adaptive PEBS v4
  */
@@ -222,14 +282,6 @@ struct pebs_xmm {
        u64 xmm[16*2];  /* two entries for each register */
 };
 
-struct pebs_lbr_entry {
-       u64 from, to, info;
-};
-
-struct pebs_lbr {
-       struct pebs_lbr_entry lbr[0]; /* Variable length */
-};
-
 /*
  * IBS cpuid feature detection
  */
@@ -333,6 +385,13 @@ struct perf_guest_switch_msr {
        u64 host, guest;
 };
 
+struct x86_pmu_lbr {
+       unsigned int    nr;
+       unsigned int    from;
+       unsigned int    to;
+       unsigned int    info;
+};
+
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 extern void perf_check_microcode(void);
 extern int x86_perf_rdpmc_index(struct perf_event *event);
@@ -348,12 +407,17 @@ static inline void perf_check_microcode(void) { }
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
 #else
 static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
        *nr = 0;
        return NULL;
 }
+static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
+{
+       return -1;
+}
 #endif
 
 #ifdef CONFIG_CPU_SUP_INTEL
index 76aa21e..b836138 100644 (file)
@@ -999,15 +999,12 @@ extern int direct_gbpages;
 void init_mem_mapping(void);
 void early_alloc_pgt_buf(void);
 extern void memblock_find_dma_reserve(void);
-
-
-#ifdef CONFIG_X86_64
-extern pgd_t trampoline_pgd_entry;
-
 void __init poking_init(void);
-
 unsigned long init_memory_mapping(unsigned long start,
                                  unsigned long end, pgprot_t prot);
+
+#ifdef CONFIG_X86_64
+extern pgd_t trampoline_pgd_entry;
 #endif
 
 /* local pte updates need not use xchg for locking */
index 1b68d24..d2af8c4 100644 (file)
@@ -175,16 +175,13 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
  * and a page entry and page directory to the page they refer to.
  */
 
-/*
- * Level 4 access.
- */
-#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
+/* PGD - Level 4 access */
 
-/* PUD - Level3 access */
+/* PUD - Level 3 access */
 
-/* PMD  - Level 2 access */
+/* PMD - Level 2 access */
 
-/* PTE - Level 1 access. */
+/* PTE - Level 1 access */
 
 /*
  * Encode and de-code a swap entry
index 1992187..6bfc878 100644 (file)
  *    field of the struct page
  *
  * SECTION_SIZE_BITS           2^n: size of each section
- * MAX_PHYSADDR_BITS           2^n: max size of physical address space
- * MAX_PHYSMEM_BITS            2^n: how much memory we can have in that space
+ * MAX_PHYSMEM_BITS            2^n: max size of physical address space
  *
  */
 
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_X86_PAE
 #  define SECTION_SIZE_BITS    29
-#  define MAX_PHYSADDR_BITS    36
 #  define MAX_PHYSMEM_BITS     36
 # else
 #  define SECTION_SIZE_BITS    26
-#  define MAX_PHYSADDR_BITS    32
 #  define MAX_PHYSMEM_BITS     32
 # endif
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS     27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS     (pgtable_l5_enabled() ? 52 : 44)
 # define MAX_PHYSMEM_BITS      (pgtable_l5_enabled() ? 52 : 46)
 #endif
 
index 79d8d54..f423457 100644 (file)
@@ -193,7 +193,7 @@ static inline void sched_clear_itmt_support(void)
 }
 #endif /* CONFIG_SCHED_MC_PRIO */
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
 #include <asm/cpufeature.h>
 
 DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key);
index 8a0c25c..b7b2624 100644 (file)
@@ -7,9 +7,6 @@
 
 #include <asm/processor.h>
 
-#define NS_SCALE       10 /* 2^10, carefully chosen */
-#define US_SCALE       32 /* 2^32, arbitralrily chosen */
-
 /*
  * Standard way to access the cycle counter.
  */
index 18dfa07..2f3e8f2 100644 (file)
@@ -314,11 +314,14 @@ do {                                                                      \
 
 #define __get_user_size(x, ptr, size, retval)                          \
 do {                                                                   \
+       unsigned char x_u8__;                                           \
+                                                                       \
        retval = 0;                                                     \
        __chk_user_ptr(ptr);                                            \
        switch (size) {                                                 \
        case 1:                                                         \
-               __get_user_asm(x, ptr, retval, "b", "=q");              \
+               __get_user_asm(x_u8__, ptr, retval, "b", "=q");         \
+               (x) = x_u8__;                                           \
                break;                                                  \
        case 2:                                                         \
                __get_user_asm(x, ptr, retval, "w", "=r");              \
index 60ca0af..5738c36 100644 (file)
@@ -682,7 +682,7 @@ static inline int uv_node_to_blade_id(int nid)
        return nid;
 }
 
-/* Convert a cpu number to the the UV blade number */
+/* Convert a CPU number to the UV blade number */
 static inline int uv_cpu_to_blade_id(int cpu)
 {
        return uv_node_to_blade_id(cpu_to_node(cpu));
index 8669c6b..600a141 100644 (file)
@@ -255,7 +255,7 @@ struct boot_params {
  *     currently supportd through this PV boot path.
  * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
  *     systems which do not have the PCI legacy interfaces.
- * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for
+ * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC
  *     for settop boxes and media devices, the use of a subarch for CE4100
  *     is more of a hack...
  */
index 6e63231..3abc131 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/perf_event.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
@@ -54,7 +55,7 @@ __setup("noreplace-smp", setup_noreplace_smp);
 #define DPRINTK(fmt, args...)                                          \
 do {                                                                   \
        if (debug_alternative)                                          \
-               printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);   \
+               printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args);            \
 } while (0)
 
 #define DUMP_BYTES(buf, len, fmt, args...)                             \
@@ -65,7 +66,7 @@ do {                                                                  \
                if (!(len))                                             \
                        break;                                          \
                                                                        \
-               printk(KERN_DEBUG fmt, ##args);                         \
+               printk(KERN_DEBUG pr_fmt(fmt), ##args);                 \
                for (j = 0; j < (len) - 1; j++)                         \
                        printk(KERN_CONT "%02hhx ", buf[j]);            \
                printk(KERN_CONT "%02hhx\n", buf[j]);                   \
@@ -1002,6 +1003,7 @@ struct text_poke_loc {
        s32 rel32;
        u8 opcode;
        const u8 text[POKE_MAX_OPCODE_SIZE];
+       u8 old;
 };
 
 struct bp_patching_desc {
@@ -1169,8 +1171,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
        /*
         * First step: add a int3 trap to the address that will be patched.
         */
-       for (i = 0; i < nr_entries; i++)
+       for (i = 0; i < nr_entries; i++) {
+               tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
                text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
+       }
 
        text_poke_sync();
 
@@ -1178,14 +1182,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
         * Second step: update all but the first byte of the patched range.
         */
        for (do_sync = 0, i = 0; i < nr_entries; i++) {
+               u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
                int len = text_opcode_size(tp[i].opcode);
 
                if (len - INT3_INSN_SIZE > 0) {
+                       memcpy(old + INT3_INSN_SIZE,
+                              text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+                              len - INT3_INSN_SIZE);
                        text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
                                  (const char *)tp[i].text + INT3_INSN_SIZE,
                                  len - INT3_INSN_SIZE);
                        do_sync++;
                }
+
+               /*
+                * Emit a perf event to record the text poke, primarily to
+                * support Intel PT decoding which must walk the executable code
+                * to reconstruct the trace. The flow up to here is:
+                *   - write INT3 byte
+                *   - IPI-SYNC
+                *   - write instruction tail
+                * At this point the actual control flow will be through the
+                * INT3 and handler and not hit the old or new instruction.
+                * Intel PT outputs FUP/TIP packets for the INT3, so the flow
+                * can still be decoded. Subsequently:
+                *   - emit RECORD_TEXT_POKE with the new instruction
+                *   - IPI-SYNC
+                *   - write first byte
+                *   - IPI-SYNC
+                * So before the text poke event timestamp, the decoder will see
+                * either the old instruction flow or FUP/TIP of INT3. After the
+                * text poke event timestamp, the decoder will see either the
+                * new instruction flow or FUP/TIP of INT3. Thus decoders can
+                * use the timestamp as the point at which to modify the
+                * executable code.
+                * The old instruction is recorded so that the event can be
+                * processed forwards or backwards.
+                */
+               perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
+                                    tp[i].text, len);
        }
 
        if (do_sync) {
index 81ffcfb..21325a4 100644 (file)
@@ -2335,8 +2335,13 @@ static int mp_irqdomain_create(int ioapic)
 
 static void ioapic_destroy_irqdomain(int idx)
 {
+       struct ioapic_domain_cfg *cfg = &ioapics[idx].irqdomain_cfg;
+       struct fwnode_handle *fn = ioapics[idx].irqdomain->fwnode;
+
        if (ioapics[idx].irqdomain) {
                irq_domain_remove(ioapics[idx].irqdomain);
+               if (!cfg->dev)
+                       irq_domain_free_fwnode(fn);
                ioapics[idx].irqdomain = NULL;
        }
 }
index 7649da2..dae32d9 100644 (file)
@@ -560,6 +560,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
                 * as that can corrupt the affinity move state.
                 */
                irqd_set_handle_enforce_irqctx(irqd);
+
+               /* Don't invoke affinity setter on deactivated interrupts */
+               irqd_set_affinity_on_activate(irqd);
+
                /*
                 * Legacy vectors are already assigned when the IOAPIC
                 * takes them over. They stay on the same vector. This is
index 7401cc1..48ce445 100644 (file)
@@ -133,15 +133,15 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
        show_opcodes(regs, loglvl);
 }
 
-void show_iret_regs(struct pt_regs *regs)
+void show_iret_regs(struct pt_regs *regs, const char *log_lvl)
 {
-       show_ip(regs, KERN_DEFAULT);
-       printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+       show_ip(regs, log_lvl);
+       printk("%sRSP: %04x:%016lx EFLAGS: %08lx", log_lvl, (int)regs->ss,
                regs->sp, regs->flags);
 }
 
 static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
-                                 bool partial)
+                                 bool partial, const char *log_lvl)
 {
        /*
         * These on_stack() checks aren't strictly necessary: the unwind code
@@ -153,7 +153,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
         * they can be printed in the right context.
         */
        if (!partial && on_stack(info, regs, sizeof(*regs))) {
-               __show_regs(regs, SHOW_REGS_SHORT);
+               __show_regs(regs, SHOW_REGS_SHORT, log_lvl);
 
        } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
                                       IRET_FRAME_SIZE)) {
@@ -162,7 +162,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
                 * full pt_regs might not have been saved yet.  In that case
                 * just print the iret frame.
                 */
-               show_iret_regs(regs);
+               show_iret_regs(regs, log_lvl);
        }
 }
 
@@ -217,7 +217,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        printk("%s <%s>\n", log_lvl, stack_name);
 
                if (regs)
-                       show_regs_if_on_stack(&stack_info, regs, partial);
+                       show_regs_if_on_stack(&stack_info, regs, partial, log_lvl);
 
                /*
                 * Scan the stack, printing any text addresses we find.  At the
@@ -278,7 +278,7 @@ next:
                        /* if the frame has entry regs, print them */
                        regs = unwind_get_entry_regs(&state, &partial);
                        if (regs)
-                               show_regs_if_on_stack(&stack_info, regs, partial);
+                               show_regs_if_on_stack(&stack_info, regs, partial, log_lvl);
                }
 
                if (stack_name)
@@ -352,7 +352,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
        oops_exit();
 
        /* Executive summary in case the oops scrolled away */
-       __show_regs(&exec_summary_regs, SHOW_REGS_ALL);
+       __show_regs(&exec_summary_regs, SHOW_REGS_ALL, KERN_DEFAULT);
 
        if (!signr)
                return;
@@ -444,9 +444,12 @@ void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
 
 void show_regs(struct pt_regs *regs)
 {
+       enum show_regs_mode print_kernel_regs;
+
        show_regs_print_info(KERN_DEFAULT);
 
-       __show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL);
+       print_kernel_regs = user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL;
+       __show_regs(regs, print_kernel_regs, KERN_DEFAULT);
 
        /*
         * When in-kernel, we also print out the stack at the time of the fault..
index 15247b9..eb86a2b 100644 (file)
@@ -82,6 +82,45 @@ bool irq_fpu_usable(void)
 }
 EXPORT_SYMBOL(irq_fpu_usable);
 
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact and we can
+ * keep registers active.
+ *
+ * The legacy FNSAVE instruction cleared all FPU state
+ * unconditionally, so registers are essentially destroyed.
+ * Modern FPU state can be kept in registers, if there are
+ * no pending FP exceptions.
+ */
+int copy_fpregs_to_fpstate(struct fpu *fpu)
+{
+       if (likely(use_xsave())) {
+               copy_xregs_to_kernel(&fpu->state.xsave);
+
+               /*
+                * AVX512 state is tracked here because its use is
+                * known to slow the max clock speed of the core.
+                */
+               if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+                       fpu->avx512_timestamp = jiffies;
+               return 1;
+       }
+
+       if (likely(use_fxsr())) {
+               copy_fxregs_to_kernel(fpu);
+               return 1;
+       }
+
+       /*
+        * Legacy FPU register saving, FNSAVE always clears FPU registers,
+        * so we have to mark them inactive:
+        */
+       asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
+
+       return 0;
+}
+EXPORT_SYMBOL(copy_fpregs_to_fpstate);
+
 void kernel_fpu_begin(void)
 {
        preempt_disable();
index ad3a2b3..be2a68a 100644 (file)
@@ -233,8 +233,10 @@ void fpu__init_cpu_xstate(void)
        /*
         * MSR_IA32_XSS sets supervisor states managed by XSAVES.
         */
-       if (boot_cpu_has(X86_FEATURE_XSAVES))
-               wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+       if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+               wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+                                    xfeatures_mask_dynamic());
+       }
 }
 
 static bool xfeature_enabled(enum xfeature xfeature)
@@ -486,7 +488,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
        return ebx;
 }
 
-static int xfeature_size(int xfeature_nr)
+int xfeature_size(int xfeature_nr)
 {
        u32 eax, ebx, ecx, edx;
 
@@ -598,7 +600,8 @@ static void check_xstate_against_struct(int nr)
         */
        if ((nr < XFEATURE_YMM) ||
            (nr >= XFEATURE_MAX) ||
-           (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
+           (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
+           ((nr >= XFEATURE_RSRVD_COMP_10) && (nr <= XFEATURE_LBR))) {
                WARN_ONCE(1, "no structure for xstate: %d\n", nr);
                XSTATE_WARN_ON(1);
        }
@@ -847,8 +850,10 @@ void fpu__resume_cpu(void)
         * Restore IA32_XSS. The same CPUID bit enumerates support
         * of XSAVES and MSR_IA32_XSS.
         */
-       if (boot_cpu_has(X86_FEATURE_XSAVES))
-               wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+       if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+               wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
+                                    xfeatures_mask_dynamic());
+       }
 }
 
 /*
@@ -1356,6 +1361,78 @@ void copy_supervisor_to_kernel(struct xregs_state *xstate)
        }
 }
 
+/**
+ * copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to
+ *                                       an xsave area
+ * @xstate: A pointer to an xsave area
+ * @mask: Represent the dynamic supervisor features saved into the xsave area
+ *
+ * Only the dynamic supervisor states sets in the mask are saved into the xsave
+ * area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic
+ * supervisor feature). Besides the dynamic supervisor states, the legacy
+ * region and XSAVE header are also saved into the xsave area. The supervisor
+ * features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
+ * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved.
+ *
+ * The xsave area must be 64-bytes aligned.
+ */
+void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask)
+{
+       u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
+       u32 lmask, hmask;
+       int err;
+
+       if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
+               return;
+
+       if (WARN_ON_FPU(!dynamic_mask))
+               return;
+
+       lmask = dynamic_mask;
+       hmask = dynamic_mask >> 32;
+
+       XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+       /* Should never fault when copying to a kernel buffer */
+       WARN_ON_FPU(err);
+}
+
+/**
+ * copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from
+ *                                       an xsave area
+ * @xstate: A pointer to an xsave area
+ * @mask: Represent the dynamic supervisor features restored from the xsave area
+ *
+ * Only the dynamic supervisor states sets in the mask are restored from the
+ * xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of
+ * dynamic supervisor feature). Besides the dynamic supervisor states, the
+ * legacy region and XSAVE header are also restored from the xsave area. The
+ * supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
+ * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored.
+ *
+ * The xsave area must be 64-bytes aligned.
+ */
+void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask)
+{
+       u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
+       u32 lmask, hmask;
+       int err;
+
+       if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
+               return;
+
+       if (WARN_ON_FPU(!dynamic_mask))
+               return;
+
+       lmask = dynamic_mask;
+       hmask = dynamic_mask >> 32;
+
+       XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+
+       /* Should never fault when copying from a kernel buffer */
+       WARN_ON_FPU(err);
+}
+
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
 /*
  * Report the amount of time elapsed in millisecond since last AVX512
index f3c7625..282b4ee 100644 (file)
@@ -207,7 +207,7 @@ spurious_8259A_irq:
                 * lets ACK and report it. [once per IRQ]
                 */
                if (!(spurious_irq_mask & irqmask)) {
-                       printk(KERN_DEBUG
+                       printk_deferred(KERN_DEBUG
                               "spurious 8259A interrupt: IRQ%d.\n", irq);
                        spurious_irq_mask |= irqmask;
                }
index 0db2120..7ecf9ba 100644 (file)
@@ -160,7 +160,7 @@ static const __initconst struct idt_data apic_idts[] = {
 /* Must be page-aligned because the real IDT is used in the cpu entry area */
 static gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
 
-struct desc_ptr idt_descr __ro_after_init = {
+static struct desc_ptr idt_descr __ro_after_init = {
        .size           = IDT_TABLE_SIZE - 1,
        .address        = (unsigned long) idt_table,
 };
index ada39dd..fdadc37 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/hardirq.h>
 #include <linux/preempt.h>
 #include <linux/sched/debug.h>
+#include <linux/perf_event.h>
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
@@ -472,6 +473,9 @@ static int arch_copy_kprobe(struct kprobe *p)
        /* Also, displacement change doesn't affect the first byte */
        p->opcode = buf[0];
 
+       p->ainsn.tp_len = len;
+       perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len);
+
        /* OK, write back the instruction(s) into ROX insn buffer */
        text_poke(p->ainsn.insn, buf, len);
 
@@ -503,12 +507,18 @@ int arch_prepare_kprobe(struct kprobe *p)
 
 void arch_arm_kprobe(struct kprobe *p)
 {
-       text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);
+       u8 int3 = INT3_INSN_OPCODE;
+
+       text_poke(p->addr, &int3, 1);
        text_poke_sync();
+       perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
 }
 
 void arch_disarm_kprobe(struct kprobe *p)
 {
+       u8 int3 = INT3_INSN_OPCODE;
+
+       perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
        text_poke(p->addr, &p->opcode, 1);
        text_poke_sync();
 }
@@ -516,6 +526,9 @@ void arch_disarm_kprobe(struct kprobe *p)
 void arch_remove_kprobe(struct kprobe *p)
 {
        if (p->ainsn.insn) {
+               /* Record the perf event before freeing the slot */
+               perf_event_text_poke(p->ainsn.insn, p->ainsn.insn,
+                                    p->ainsn.tp_len, NULL, 0);
                free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
                p->ainsn.insn = NULL;
        }
index 7af4c61..40f3804 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) Hitachi Ltd., 2012
  */
 #include <linux/kprobes.h>
+#include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -352,8 +353,15 @@ int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 static
 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 {
-       if (op->optinsn.insn) {
-               free_optinsn_slot(op->optinsn.insn, dirty);
+       u8 *slot = op->optinsn.insn;
+       if (slot) {
+               int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
+
+               /* Record the perf event before freeing the slot */
+               if (dirty)
+                       perf_event_text_poke(slot, slot, len, NULL, 0);
+
+               free_optinsn_slot(slot, dirty);
                op->optinsn.insn = NULL;
                op->optinsn.size = 0;
        }
@@ -424,8 +432,15 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
                           (u8 *)op->kp.addr + op->optinsn.size);
        len += JMP32_INSN_SIZE;
 
+       /*
+        * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
+        * used in __arch_remove_optimized_kprobe().
+        */
+
        /* We have to use text_poke() for instruction buffer because it is RO */
+       perf_event_text_poke(slot, NULL, 0, buf, len);
        text_poke(slot, buf, len);
+
        ret = 0;
 out:
        kfree(buf);
@@ -477,10 +492,23 @@ void arch_optimize_kprobes(struct list_head *oplist)
  */
 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
-       arch_arm_kprobe(&op->kp);
-       text_poke(op->kp.addr + INT3_INSN_SIZE,
-                 op->optinsn.copied_insn, DISP32_SIZE);
+       u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
+       u8 old[JMP32_INSN_SIZE];
+       u8 *addr = op->kp.addr;
+
+       memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
+       memcpy(new + INT3_INSN_SIZE,
+              op->optinsn.copied_insn,
+              JMP32_INSN_SIZE - INT3_INSN_SIZE);
+
+       text_poke(addr, new, INT3_INSN_SIZE);
        text_poke_sync();
+       text_poke(addr + INT3_INSN_SIZE,
+                 new + INT3_INSN_SIZE,
+                 JMP32_INSN_SIZE - INT3_INSN_SIZE);
+       text_poke_sync();
+
+       perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
 }
 
 /*
index df63786..3f78482 100644 (file)
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
 noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
        u32 reason = kvm_read_and_reset_apf_flags();
-       bool rcu_exit;
+       idtentry_state_t state;
 
        switch (reason) {
        case KVM_PV_REASON_PAGE_NOT_PRESENT:
@@ -243,7 +243,7 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
                return false;
        }
 
-       rcu_exit = idtentry_enter_cond_rcu(regs);
+       state = idtentry_enter(regs);
        instrumentation_begin();
 
        /*
@@ -264,7 +264,7 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
        }
 
        instrumentation_end();
-       idtentry_exit_cond_rcu(regs, rcu_exit);
+       idtentry_exit(regs, state);
        return true;
 }
 
index d7c5e44..4fc9954 100644 (file)
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
        __this_cpu_write(last_nmi_rip, regs->ip);
 
        instrumentation_begin();
-       trace_hardirqs_off_finish();
 
        handled = nmi_handle(NMI_LOCAL, regs);
        __this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
                unknown_nmi_error(reason, regs);
 
 out:
-       if (regs->flags & X86_EFLAGS_IF)
-               trace_hardirqs_on_prepare();
        instrumentation_end();
 }
 
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
 
 DEFINE_IDTENTRY_RAW(exc_nmi)
 {
+       bool irq_state;
+
        if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
                return;
 
@@ -491,14 +490,14 @@ nmi_restart:
 
        this_cpu_write(nmi_dr7, local_db_save());
 
-       nmi_enter();
+       irq_state = idtentry_enter_nmi(regs);
 
        inc_irq_stat(__nmi_count);
 
        if (!ignore_nmis)
                default_do_nmi(regs);
 
-       nmi_exit();
+       idtentry_exit_nmi(regs, irq_state);
 
        local_db_restore(this_cpu_read(nmi_dr7));
 
index acfd6d2..4f2f54e 100644 (file)
@@ -56,7 +56,8 @@
 
 #include "process.h"
 
-void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
+                const char *log_lvl)
 {
        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
        unsigned long d0, d1, d2, d3, d6, d7;
@@ -67,14 +68,14 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
        else
                savesegment(gs, gs);
 
-       show_ip(regs, KERN_DEFAULT);
+       show_ip(regs, log_lvl);
 
-       printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-               regs->ax, regs->bx, regs->cx, regs->dx);
-       printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
-               regs->si, regs->di, regs->bp, regs->sp);
-       printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
-              (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
+       printk("%sEAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+               log_lvl, regs->ax, regs->bx, regs->cx, regs->dx);
+       printk("%sESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+               log_lvl, regs->si, regs->di, regs->bp, regs->sp);
+       printk("%sDS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
+              log_lvl, (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
 
        if (mode != SHOW_REGS_ALL)
                return;
@@ -83,8 +84,8 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
        cr2 = read_cr2();
        cr3 = __read_cr3();
        cr4 = __read_cr4();
-       printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
-                       cr0, cr2, cr3, cr4);
+       printk("%sCR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+               log_lvl, cr0, cr2, cr3, cr4);
 
        get_debugreg(d0, 0);
        get_debugreg(d1, 1);
@@ -98,10 +99,10 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
            (d6 == DR6_RESERVED) && (d7 == 0x400))
                return;
 
-       printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
-                       d0, d1, d2, d3);
-       printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
-                       d6, d7);
+       printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+               log_lvl, d0, d1, d2, d3);
+       printk("%sDR6: %08lx DR7: %08lx\n",
+               log_lvl, d6, d7);
 }
 
 void release_thread(struct task_struct *dead_task)
index 9a97415..04d201a 100644 (file)
 #include "process.h"
 
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
+                const char *log_lvl)
 {
        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
        unsigned long d0, d1, d2, d3, d6, d7;
        unsigned int fsindex, gsindex;
        unsigned int ds, es;
 
-       show_iret_regs(regs);
+       show_iret_regs(regs, log_lvl);
 
        if (regs->orig_ax != -1)
                pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
        else
                pr_cont("\n");
 
-       printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
-              regs->ax, regs->bx, regs->cx);
-       printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
-              regs->dx, regs->si, regs->di);
-       printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
-              regs->bp, regs->r8, regs->r9);
-       printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
-              regs->r10, regs->r11, regs->r12);
-       printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
-              regs->r13, regs->r14, regs->r15);
+       printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n",
+              log_lvl, regs->ax, regs->bx, regs->cx);
+       printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n",
+              log_lvl, regs->dx, regs->si, regs->di);
+       printk("%sRBP: %016lx R08: %016lx R09: %016lx\n",
+              log_lvl, regs->bp, regs->r8, regs->r9);
+       printk("%sR10: %016lx R11: %016lx R12: %016lx\n",
+              log_lvl, regs->r10, regs->r11, regs->r12);
+       printk("%sR13: %016lx R14: %016lx R15: %016lx\n",
+              log_lvl, regs->r13, regs->r14, regs->r15);
 
        if (mode == SHOW_REGS_SHORT)
                return;
@@ -93,8 +94,8 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
        if (mode == SHOW_REGS_USER) {
                rdmsrl(MSR_FS_BASE, fs);
                rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
-               printk(KERN_DEFAULT "FS:  %016lx GS:  %016lx\n",
-                      fs, shadowgs);
+               printk("%sFS:  %016lx GS:  %016lx\n",
+                      log_lvl, fs, shadowgs);
                return;
        }
 
@@ -112,12 +113,12 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
        cr3 = __read_cr3();
        cr4 = __read_cr4();
 
-       printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
-              fs, fsindex, gs, gsindex, shadowgs);
-       printk(KERN_DEFAULT "CS:  %04lx DS: %04x ES: %04x CR0: %016lx\n", regs->cs, ds,
-                       es, cr0);
-       printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
-                       cr4);
+       printk("%sFS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+              log_lvl, fs, fsindex, gs, gsindex, shadowgs);
+       printk("%sCS:  %04lx DS: %04x ES: %04x CR0: %016lx\n",
+               log_lvl, regs->cs, ds, es, cr0);
+       printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
+               log_lvl, cr2, cr3, cr4);
 
        get_debugreg(d0, 0);
        get_debugreg(d1, 1);
@@ -129,14 +130,14 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
        /* Only print out debug registers if they are in their non-default state. */
        if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
            (d6 == DR6_RESERVED) && (d7 == 0x400))) {
-               printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
-                      d0, d1, d2);
-               printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
-                      d3, d6, d7);
+               printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
+                      log_lvl, d0, d1, d2);
+               printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
+                      log_lvl, d3, d6, d7);
        }
 
        if (boot_cpu_has(X86_FEATURE_OSPKE))
-               printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
+               printk("%sPKRU: %08x\n", log_lvl, read_pkru());
 }
 
 void release_thread(struct task_struct *dead_task)
index a11bd53..27aa04a 100644 (file)
@@ -55,6 +55,7 @@
 #include <linux/cpuidle.h>
 #include <linux/numa.h>
 #include <linux/pgtable.h>
+#include <linux/overflow.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -1767,6 +1768,7 @@ void native_play_dead(void)
 
 #endif
 
+#ifdef CONFIG_X86_64
 /*
  * APERF/MPERF frequency ratio computation.
  *
@@ -1965,6 +1967,7 @@ static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
 static bool intel_set_max_freq_ratio(void)
 {
        u64 base_freq, turbo_freq;
+       u64 turbo_ratio;
 
        if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
                goto out;
@@ -1990,15 +1993,23 @@ out:
        /*
         * Some hypervisors advertise X86_FEATURE_APERFMPERF
         * but then fill all MSR's with zeroes.
+        * Some CPUs have turbo boost but don't declare any turbo ratio
+        * in MSR_TURBO_RATIO_LIMIT.
         */
-       if (!base_freq) {
-               pr_debug("Couldn't determine cpu base frequency, necessary for scale-invariant accounting.\n");
+       if (!base_freq || !turbo_freq) {
+               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
                return false;
        }
 
-       arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
-                                       base_freq);
+       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
+       if (!turbo_ratio) {
+               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
+               return false;
+       }
+
+       arch_turbo_freq_ratio = turbo_ratio;
        arch_set_max_freq_ratio(turbo_disabled());
+
        return true;
 }
 
@@ -2038,11 +2049,19 @@ static void init_freq_invariance(bool secondary)
        }
 }
 
+static void disable_freq_invariance_workfn(struct work_struct *work)
+{
+       static_branch_disable(&arch_scale_freq_key);
+}
+
+static DECLARE_WORK(disable_freq_invariance_work,
+                   disable_freq_invariance_workfn);
+
 DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
 
 void arch_scale_freq_tick(void)
 {
-       u64 freq_scale;
+       u64 freq_scale = SCHED_CAPACITY_SCALE;
        u64 aperf, mperf;
        u64 acnt, mcnt;
 
@@ -2054,19 +2073,32 @@ void arch_scale_freq_tick(void)
 
        acnt = aperf - this_cpu_read(arch_prev_aperf);
        mcnt = mperf - this_cpu_read(arch_prev_mperf);
-       if (!mcnt)
-               return;
 
        this_cpu_write(arch_prev_aperf, aperf);
        this_cpu_write(arch_prev_mperf, mperf);
 
-       acnt <<= 2*SCHED_CAPACITY_SHIFT;
-       mcnt *= arch_max_freq_ratio;
+       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
+               goto error;
+
+       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+               goto error;
 
        freq_scale = div64_u64(acnt, mcnt);
+       if (!freq_scale)
+               goto error;
 
        if (freq_scale > SCHED_CAPACITY_SCALE)
                freq_scale = SCHED_CAPACITY_SCALE;
 
        this_cpu_write(arch_freq_scale, freq_scale);
+       return;
+
+error:
+       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
+       schedule_work(&disable_freq_invariance_work);
+}
+#else
+static inline void init_freq_invariance(bool secondary)
+{
 }
+#endif /* CONFIG_X86_64 */
index b7cb3e0..8493f55 100644 (file)
@@ -245,7 +245,7 @@ static noinstr bool handle_bug(struct pt_regs *regs)
 
 DEFINE_IDTENTRY_RAW(exc_invalid_op)
 {
-       bool rcu_exit;
+       idtentry_state_t state;
 
        /*
         * We use UD2 as a short encoding for 'CALL __WARN', as such
@@ -255,11 +255,11 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op)
        if (!user_mode(regs) && handle_bug(regs))
                return;
 
-       rcu_exit = idtentry_enter_cond_rcu(regs);
+       state = idtentry_enter(regs);
        instrumentation_begin();
        handle_invalid_op(regs);
        instrumentation_end();
-       idtentry_exit_cond_rcu(regs, rcu_exit);
+       idtentry_exit(regs, state);
 }
 
 DEFINE_IDTENTRY(exc_coproc_segment_overrun)
@@ -405,7 +405,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
        }
 #endif
 
-       nmi_enter();
+       idtentry_enter_nmi(regs);
        instrumentation_begin();
        notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
@@ -651,15 +651,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
                instrumentation_end();
                idtentry_exit_user(regs);
        } else {
-               nmi_enter();
+               bool irq_state = idtentry_enter_nmi(regs);
                instrumentation_begin();
-               trace_hardirqs_off_finish();
                if (!do_int3(regs))
                        die("int3", regs, 0);
-               if (regs->flags & X86_EFLAGS_IF)
-                       trace_hardirqs_on_prepare();
                instrumentation_end();
-               nmi_exit();
+               idtentry_exit_nmi(regs, irq_state);
        }
 }
 
@@ -867,9 +864,8 @@ out:
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
                                             unsigned long dr6)
 {
-       nmi_enter();
+       bool irq_state = idtentry_enter_nmi(regs);
        instrumentation_begin();
-       trace_hardirqs_off_finish();
 
        /*
         * If something gets miswired and we end up here for a user mode
@@ -886,10 +882,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
 
        handle_debug(regs, dr6, false);
 
-       if (regs->flags & X86_EFLAGS_IF)
-               trace_hardirqs_on_prepare();
        instrumentation_end();
-       nmi_exit();
+       idtentry_exit_nmi(regs, irq_state);
 }
 
 static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -905,6 +899,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
        instrumentation_begin();
 
        handle_debug(regs, dr6, true);
+
        instrumentation_end();
        idtentry_exit_user(regs);
 }
index 5bf72fc..4ce2ddd 100644 (file)
@@ -2195,7 +2195,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
-       if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
+       if (!kvm_apic_present(vcpu) || apic_lvtt_oneshot(apic) ||
                        apic_lvtt_period(apic))
                return;
 
index c0da4dd..5bbf761 100644 (file)
@@ -1090,7 +1090,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        svm->nested.vmcb = 0;
        svm->vcpu.arch.hflags = 0;
 
-       if (pause_filter_count) {
+       if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
                control->pause_filter_count = pause_filter_count;
                if (pause_filter_thresh)
                        control->pause_filter_thresh = pause_filter_thresh;
@@ -2693,7 +2693,7 @@ static int pause_interception(struct vcpu_svm *svm)
        struct kvm_vcpu *vcpu = &svm->vcpu;
        bool in_kernel = (svm_get_cpl(vcpu) == 0);
 
-       if (pause_filter_thresh)
+       if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);
 
        kvm_vcpu_on_spin(vcpu, in_kernel);
@@ -3780,7 +3780,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
-       if (pause_filter_thresh)
+       if (!kvm_pause_in_guest(vcpu->kvm))
                shrink_ple_window(vcpu);
 }
 
@@ -3958,6 +3958,9 @@ static void svm_vm_destroy(struct kvm *kvm)
 
 static int svm_vm_init(struct kvm *kvm)
 {
+       if (!pause_filter_count || !pause_filter_thresh)
+               kvm->arch.pause_in_guest = true;
+
        if (avic) {
                int ret = avic_vm_init(kvm);
                if (ret)
index d4a4cec..11e4df5 100644 (file)
@@ -6079,6 +6079,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
            ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
                return -EINVAL;
 
+       if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
+               return -EINVAL;
+
        /*
         * SMM temporarily disables VMX, so we cannot be in guest mode,
         * nor can VMLAUNCH/VMRESUME be pending.  Outside SMM, SMM flags
@@ -6108,9 +6111,16 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
        if (ret)
                return ret;
 
-       /* Empty 'VMXON' state is permitted */
-       if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
-               return 0;
+       /* Empty 'VMXON' state is permitted if no VMCS loaded */
+       if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
+               /* See vmx_has_valid_vmcs12.  */
+               if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
+                   (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
+                   (kvm_state->hdr.vmx.vmcs12_pa != -1ull))
+                       return -EINVAL;
+               else
+                       return 0;
+       }
 
        if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
                if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
index 758bccc..197148d 100644 (file)
@@ -47,6 +47,11 @@ static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
        return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
 }
 
+/*
+ * Note: the same condition is checked against the state provided by userspace
+ * in vmx_set_nested_state; if it is satisfied, the nested state must include
+ * the VMCS12.
+ */
 static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
index 1ead568..5e41949 100644 (file)
@@ -1377,7 +1377,7 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
 DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
 {
        unsigned long address = read_cr2();
-       bool rcu_exit;
+       idtentry_state_t state;
 
        prefetchw(&current->mm->mmap_lock);
 
@@ -1412,11 +1412,11 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
         * code reenabled RCU to avoid subsequent wreckage which helps
         * debugability.
         */
-       rcu_exit = idtentry_enter_cond_rcu(regs);
+       state = idtentry_enter(regs);
 
        instrumentation_begin();
        handle_page_fault(regs, error_code, address);
        instrumentation_end();
 
-       idtentry_exit_cond_rcu(regs, rcu_exit);
+       idtentry_exit(regs, state);
 }
index 001dd7d..c7a4760 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/cpufeature.h>
 #include <asm/pti.h>
 #include <asm/text-patching.h>
+#include <asm/memtype.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -912,8 +913,6 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
                set_memory_np_noalias(begin_ul, len_pages);
 }
 
-void __weak mem_encrypt_free_decrypted_mem(void) { }
-
 void __ref free_initmem(void)
 {
        e820__reallocate_tables();
index 4a781cf..9f1177e 100644 (file)
@@ -376,7 +376,6 @@ bool force_dma_unencrypted(struct device *dev)
        return false;
 }
 
-/* Architecture __weak replacement functions */
 void __init mem_encrypt_free_decrypted_mem(void)
 {
        unsigned long vaddr, vaddr_end, npages;
@@ -401,6 +400,7 @@ void __init mem_encrypt_free_decrypted_mem(void)
        free_init_pages("unused decrypted", vaddr, vaddr_end);
 }
 
+/* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void)
 {
        if (!sme_me_mask)
index 77e0430..d1b2a88 100644 (file)
@@ -135,7 +135,7 @@ static inline void cpa_inc_2m_checked(void)
 
 static inline void cpa_inc_4k_install(void)
 {
-       cpa_4k_install++;
+       data_race(cpa_4k_install++);
 }
 
 static inline void cpa_inc_lp_sameprot(int level)
index c46b9f2..2aab43a 100644 (file)
@@ -873,7 +873,7 @@ static void xen_load_sp0(unsigned long sp0)
 static void xen_invalidate_io_bitmap(void)
 {
        struct physdev_set_iobitmap iobitmap = {
-               .bitmap = 0,
+               .bitmap = NULL,
                .nr_ports = 0,
        };
 
index 3e7c613..744c2f4 100644 (file)
@@ -19,8 +19,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define ATOMIC_INIT(i) { (i) }
-
 /*
  * This Xtensa implementation assumes that the right mechanism
  * for exclusion is for locking interrupts to level EXCM_LEVEL.
index 49322b6..3447556 100644 (file)
@@ -101,9 +101,9 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
        spin_unlock(&dev->lock);
 }
 
-static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t simdisk_submit_bio(struct bio *bio)
 {
-       struct simdisk *dev = q->queuedata;
+       struct simdisk *dev = bio->bi_disk->private_data;
        struct bio_vec bvec;
        struct bvec_iter iter;
        sector_t sector = bio->bi_iter.bi_sector;
@@ -127,8 +127,6 @@ static int simdisk_open(struct block_device *bdev, fmode_t mode)
        struct simdisk *dev = bdev->bd_disk->private_data;
 
        spin_lock(&dev->lock);
-       if (!dev->users)
-               check_disk_change(bdev);
        ++dev->users;
        spin_unlock(&dev->lock);
        return 0;
@@ -144,6 +142,7 @@ static void simdisk_release(struct gendisk *disk, fmode_t mode)
 
 static const struct block_device_operations simdisk_ops = {
        .owner          = THIS_MODULE,
+       .submit_bio     = simdisk_submit_bio,
        .open           = simdisk_open,
        .release        = simdisk_release,
 };
@@ -267,14 +266,12 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
        spin_lock_init(&dev->lock);
        dev->users = 0;
 
-       dev->queue = blk_alloc_queue(simdisk_make_request, NUMA_NO_NODE);
+       dev->queue = blk_alloc_queue(NUMA_NO_NODE);
        if (dev->queue == NULL) {
                pr_err("blk_alloc_queue failed\n");
                goto out_alloc_queue;
        }
 
-       dev->queue->queuedata = dev;
-
        dev->gd = alloc_disk(SIMDISK_MINORS);
        if (dev->gd == NULL) {
                pr_err("alloc_disk failed\n");
index 7871916..8d841f5 100644 (file)
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
                        blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
-                       blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
+                       blk-exec.o blk-merge.o blk-timeout.o \
                        blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
                        blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
                        genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
index 50c8f03..a4c0bec 100644 (file)
@@ -4714,7 +4714,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
         * some unlucky request wait for as long as the device
         * wishes.
         *
-        * Of course, serving one request at at time may cause loss of
+        * Of course, serving one request at a time may cause loss of
         * throughput.
         */
        if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0)
index a7366c0..c63ba04 100644 (file)
@@ -234,8 +234,12 @@ fallback:
 
 void bio_uninit(struct bio *bio)
 {
-       bio_disassociate_blkg(bio);
-
+#ifdef CONFIG_BLK_CGROUP
+       if (bio->bi_blkg) {
+               blkg_put(bio->bi_blkg);
+               bio->bi_blkg = NULL;
+       }
+#endif
        if (bio_integrity(bio))
                bio_integrity_free(bio);
 
@@ -354,7 +358,7 @@ static void bio_alloc_rescue(struct work_struct *work)
                if (!bio)
                        break;
 
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        }
 }
 
@@ -412,19 +416,19 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  *   submit the previously allocated bio for IO before attempting to allocate
  *   a new one. Failure to do so can cause deadlocks under memory pressure.
  *
- *   Note that when running under generic_make_request() (i.e. any block
+ *   Note that when running under submit_bio_noacct() (i.e. any block
  *   driver), bios are not submitted until after you return - see the code in
- *   generic_make_request() that converts recursion into iteration, to prevent
+ *   submit_bio_noacct() that converts recursion into iteration, to prevent
  *   stack overflows.
  *
  *   This would normally mean allocating multiple bios under
- *   generic_make_request() would be susceptible to deadlocks, but we have
+ *   submit_bio_noacct() would be susceptible to deadlocks, but we have
  *   deadlock avoidance code that resubmits any blocked bios from a rescuer
  *   thread.
  *
  *   However, we do not guarantee forward progress for allocations from other
  *   mempools. Doing multiple allocations from the same mempool under
- *   generic_make_request() should be avoided - instead, use bio_set's front_pad
+ *   submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
  *   for per bio allocations.
  *
  *   RETURNS:
@@ -444,9 +448,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
                if (nr_iovecs > UIO_MAXIOV)
                        return NULL;
 
-               p = kmalloc(sizeof(struct bio) +
-                           nr_iovecs * sizeof(struct bio_vec),
-                           gfp_mask);
+               p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
                front_pad = 0;
                inline_vecs = nr_iovecs;
        } else {
@@ -455,14 +457,14 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
                                 nr_iovecs > 0))
                        return NULL;
                /*
-                * generic_make_request() converts recursion to iteration; this
+                * submit_bio_noacct() converts recursion to iteration; this
                 * means if we're running beneath it, any bios we allocate and
                 * submit will not be submitted (and thus freed) until after we
                 * return.
                 *
                 * This exposes us to a potential deadlock if we allocate
                 * multiple bios from the same bio_set() while running
-                * underneath generic_make_request(). If we were to allocate
+                * underneath submit_bio_noacct(). If we were to allocate
                 * multiple bios (say a stacking block driver that was splitting
                 * bios), we would deadlock if we exhausted the mempool's
                 * reserve.
@@ -860,7 +862,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
  * @same_page: return if the segment has been merged inside the same page
  *
  * Try to add the data at @page + @off to the last bvec of @bio.  This is a
- * useful optimisation for file systems with a block size smaller than the
+ * useful optimisation for file systems with a block size smaller than the
  * page size.
  *
  * Warn if (@len, @off) crosses pages in case that @same_page is true.
@@ -986,7 +988,7 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
  * Pins pages from *iter and appends them to @bio's bvec array. The
  * pages will have to be released using put_page() when done.
  * For multi-segment *iter, this function only adds pages from the
- * the next non-empty segment of the iov iterator.
+ * next non-empty segment of the iov iterator.
  */
 static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
@@ -1625,141 +1627,6 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
 }
 EXPORT_SYMBOL(bioset_init_from_src);
 
-#ifdef CONFIG_BLK_CGROUP
-
-/**
- * bio_disassociate_blkg - puts back the blkg reference if associated
- * @bio: target bio
- *
- * Helper to disassociate the blkg from @bio if a blkg is associated.
- */
-void bio_disassociate_blkg(struct bio *bio)
-{
-       if (bio->bi_blkg) {
-               blkg_put(bio->bi_blkg);
-               bio->bi_blkg = NULL;
-       }
-}
-EXPORT_SYMBOL_GPL(bio_disassociate_blkg);
-
-/**
- * __bio_associate_blkg - associate a bio with the a blkg
- * @bio: target bio
- * @blkg: the blkg to associate
- *
- * This tries to associate @bio with the specified @blkg.  Association failure
- * is handled by walking up the blkg tree.  Therefore, the blkg associated can
- * be anything between @blkg and the root_blkg.  This situation only happens
- * when a cgroup is dying and then the remaining bios will spill to the closest
- * alive blkg.
- *
- * A reference will be taken on the @blkg and will be released when @bio is
- * freed.
- */
-static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
-{
-       bio_disassociate_blkg(bio);
-
-       bio->bi_blkg = blkg_tryget_closest(blkg);
-}
-
-/**
- * bio_associate_blkg_from_css - associate a bio with a specified css
- * @bio: target bio
- * @css: target css
- *
- * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio.  This falls back to the queue's root_blkg if
- * the association fails with the css.
- */
-void bio_associate_blkg_from_css(struct bio *bio,
-                                struct cgroup_subsys_state *css)
-{
-       struct request_queue *q = bio->bi_disk->queue;
-       struct blkcg_gq *blkg;
-
-       rcu_read_lock();
-
-       if (!css || !css->parent)
-               blkg = q->root_blkg;
-       else
-               blkg = blkg_lookup_create(css_to_blkcg(css), q);
-
-       __bio_associate_blkg(bio, blkg);
-
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
-
-#ifdef CONFIG_MEMCG
-/**
- * bio_associate_blkg_from_page - associate a bio with the page's blkg
- * @bio: target bio
- * @page: the page to lookup the blkcg from
- *
- * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue.  If cgroup_e_css returns %NULL, fall back to the queue's
- * root_blkg.
- */
-void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
-{
-       struct cgroup_subsys_state *css;
-
-       if (!page->mem_cgroup)
-               return;
-
-       rcu_read_lock();
-
-       css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-       bio_associate_blkg_from_css(bio, css);
-
-       rcu_read_unlock();
-}
-#endif /* CONFIG_MEMCG */
-
-/**
- * bio_associate_blkg - associate a bio with a blkg
- * @bio: target bio
- *
- * Associate @bio with the blkg found from the bio's css and request_queue.
- * If one is not found, bio_lookup_blkg() creates the blkg.  If a blkg is
- * already associated, the css is reused and association redone as the
- * request_queue may have changed.
- */
-void bio_associate_blkg(struct bio *bio)
-{
-       struct cgroup_subsys_state *css;
-
-       rcu_read_lock();
-
-       if (bio->bi_blkg)
-               css = &bio_blkcg(bio)->css;
-       else
-               css = blkcg_css();
-
-       bio_associate_blkg_from_css(bio, css);
-
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg);
-
-/**
- * bio_clone_blkg_association - clone blkg association from src to dst bio
- * @dst: destination bio
- * @src: source bio
- */
-void bio_clone_blkg_association(struct bio *dst, struct bio *src)
-{
-       rcu_read_lock();
-
-       if (src->bi_blkg)
-               __bio_associate_blkg(dst, src->bi_blkg);
-
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
-#endif /* CONFIG_BLK_CGROUP */
-
 static void __init biovec_init_slabs(void)
 {
        int i;
index 0ecc897..619a79b 100644 (file)
@@ -95,9 +95,6 @@ static void __blkg_release(struct rcu_head *rcu)
        css_put(&blkg->blkcg->css);
        if (blkg->parent)
                blkg_put(blkg->parent);
-
-       wb_congested_put(blkg->wb_congested);
-
        blkg_free(blkg);
 }
 
@@ -227,7 +224,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                                    struct blkcg_gq *new_blkg)
 {
        struct blkcg_gq *blkg;
-       struct bdi_writeback_congested *wb_congested;
        int i, ret;
 
        WARN_ON_ONCE(!rcu_read_lock_held());
@@ -245,31 +241,22 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                goto err_free_blkg;
        }
 
-       wb_congested = wb_congested_get_create(q->backing_dev_info,
-                                              blkcg->css.id,
-                                              GFP_NOWAIT | __GFP_NOWARN);
-       if (!wb_congested) {
-               ret = -ENOMEM;
-               goto err_put_css;
-       }
-
        /* allocate */
        if (!new_blkg) {
                new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
                if (unlikely(!new_blkg)) {
                        ret = -ENOMEM;
-                       goto err_put_congested;
+                       goto err_put_css;
                }
        }
        blkg = new_blkg;
-       blkg->wb_congested = wb_congested;
 
        /* link parent */
        if (blkcg_parent(blkcg)) {
                blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
                if (WARN_ON_ONCE(!blkg->parent)) {
                        ret = -ENODEV;
-                       goto err_put_congested;
+                       goto err_put_css;
                }
                blkg_get(blkg->parent);
        }
@@ -306,8 +293,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        blkg_put(blkg);
        return ERR_PTR(ret);
 
-err_put_congested:
-       wb_congested_put(wb_congested);
 err_put_css:
        css_put(&blkcg->css);
 err_free_blkg:
@@ -316,30 +301,35 @@ err_free_blkg:
 }
 
 /**
- * __blkg_lookup_create - lookup blkg, try to create one if not there
+ * blkg_lookup_create - lookup blkg, try to create one if not there
  * @blkcg: blkcg of interest
  * @q: request_queue of interest
  *
  * Lookup blkg for the @blkcg - @q pair.  If it doesn't exist, try to
  * create one.  blkg creation is performed recursively from blkcg_root such
  * that all non-root blkg's have access to the parent blkg.  This function
- * should be called under RCU read lock and @q->queue_lock.
+ * should be called under RCU read lock and takes @q->queue_lock.
  *
  * Returns the blkg or the closest blkg if blkg_create() fails as it walks
  * down from root.
  */
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                     struct request_queue *q)
+static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+               struct request_queue *q)
 {
        struct blkcg_gq *blkg;
+       unsigned long flags;
 
        WARN_ON_ONCE(!rcu_read_lock_held());
-       lockdep_assert_held(&q->queue_lock);
 
-       blkg = __blkg_lookup(blkcg, q, true);
+       blkg = blkg_lookup(blkcg, q);
        if (blkg)
                return blkg;
 
+       spin_lock_irqsave(&q->queue_lock, flags);
+       blkg = __blkg_lookup(blkcg, q, true);
+       if (blkg)
+               goto found;
+
        /*
         * Create blkgs walking down from blkcg_root to @blkcg, so that all
         * non-root blkgs have access to their parents.  Returns the closest
@@ -362,34 +352,16 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
                }
 
                blkg = blkg_create(pos, q, NULL);
-               if (IS_ERR(blkg))
-                       return ret_blkg;
+               if (IS_ERR(blkg)) {
+                       blkg = ret_blkg;
+                       break;
+               }
                if (pos == blkcg)
-                       return blkg;
-       }
-}
-
-/**
- * blkg_lookup_create - find or create a blkg
- * @blkcg: target block cgroup
- * @q: target request_queue
- *
- * This looks up or creates the blkg representing the unique pair
- * of the blkcg and the request_queue.
- */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-                                   struct request_queue *q)
-{
-       struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
-
-       if (unlikely(!blkg)) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&q->queue_lock, flags);
-               blkg = __blkg_lookup_create(blkcg, q);
-               spin_unlock_irqrestore(&q->queue_lock, flags);
+                       break;
        }
 
+found:
+       spin_unlock_irqrestore(&q->queue_lock, flags);
        return blkg;
 }
 
@@ -739,12 +711,137 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] = src->bytes[i];
+               dst->ios[i] = src->ios[i];
+       }
+}
+
+static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] += src->bytes[i];
+               dst->ios[i] += src->ios[i];
+       }
+}
+
+static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] -= src->bytes[i];
+               dst->ios[i] -= src->ios[i];
+       }
+}
+
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+       struct blkcg *blkcg = css_to_blkcg(css);
+       struct blkcg_gq *blkg;
+
+       rcu_read_lock();
+
+       hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+               struct blkcg_gq *parent = blkg->parent;
+               struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
+               struct blkg_iostat cur, delta;
+               unsigned int seq;
+
+               /* fetch the current per-cpu values */
+               do {
+                       seq = u64_stats_fetch_begin(&bisc->sync);
+                       blkg_iostat_set(&cur, &bisc->cur);
+               } while (u64_stats_fetch_retry(&bisc->sync, seq));
+
+               /* propagate percpu delta to global */
+               u64_stats_update_begin(&blkg->iostat.sync);
+               blkg_iostat_set(&delta, &cur);
+               blkg_iostat_sub(&delta, &bisc->last);
+               blkg_iostat_add(&blkg->iostat.cur, &delta);
+               blkg_iostat_add(&bisc->last, &delta);
+               u64_stats_update_end(&blkg->iostat.sync);
+
+               /* propagate global delta to parent */
+               if (parent) {
+                       u64_stats_update_begin(&parent->iostat.sync);
+                       blkg_iostat_set(&delta, &blkg->iostat.cur);
+                       blkg_iostat_sub(&delta, &blkg->iostat.last);
+                       blkg_iostat_add(&parent->iostat.cur, &delta);
+                       blkg_iostat_add(&blkg->iostat.last, &delta);
+                       u64_stats_update_end(&parent->iostat.sync);
+               }
+       }
+
+       rcu_read_unlock();
+}
+
+/*
+ * The rstat algorithms intentionally don't handle the root cgroup to avoid
+ * incurring overhead when no cgroups are defined. For that reason,
+ * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
+ * iostat in the root cgroup's blkcg_gq.
+ *
+ * However, we would like to re-use the printing code between the root and
+ * non-root cgroups to the extent possible. For that reason, we simulate
+ * flushing the root cgroup's stats by explicitly filling in the iostat
+ * with disk level statistics.
+ */
+static void blkcg_fill_root_iostats(void)
+{
+       struct class_dev_iter iter;
+       struct device *dev;
+
+       class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+       while ((dev = class_dev_iter_next(&iter))) {
+               struct gendisk *disk = dev_to_disk(dev);
+               struct hd_struct *part = disk_get_part(disk, 0);
+               struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
+               struct blkg_iostat tmp;
+               int cpu;
+
+               memset(&tmp, 0, sizeof(tmp));
+               for_each_possible_cpu(cpu) {
+                       struct disk_stats *cpu_dkstats;
+
+                       cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
+                       tmp.ios[BLKG_IOSTAT_READ] +=
+                               cpu_dkstats->ios[STAT_READ];
+                       tmp.ios[BLKG_IOSTAT_WRITE] +=
+                               cpu_dkstats->ios[STAT_WRITE];
+                       tmp.ios[BLKG_IOSTAT_DISCARD] +=
+                               cpu_dkstats->ios[STAT_DISCARD];
+                       // convert sectors to bytes
+                       tmp.bytes[BLKG_IOSTAT_READ] +=
+                               cpu_dkstats->sectors[STAT_READ] << 9;
+                       tmp.bytes[BLKG_IOSTAT_WRITE] +=
+                               cpu_dkstats->sectors[STAT_WRITE] << 9;
+                       tmp.bytes[BLKG_IOSTAT_DISCARD] +=
+                               cpu_dkstats->sectors[STAT_DISCARD] << 9;
+
+                       u64_stats_update_begin(&blkg->iostat.sync);
+                       blkg_iostat_set(&blkg->iostat.cur, &tmp);
+                       u64_stats_update_end(&blkg->iostat.sync);
+               }
+       }
+}
+
 static int blkcg_print_stat(struct seq_file *sf, void *v)
 {
        struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
        struct blkcg_gq *blkg;
 
-       cgroup_rstat_flush(blkcg->css.cgroup);
+       if (!seq_css(sf)->parent)
+               blkcg_fill_root_iostats();
+       else
+               cgroup_rstat_flush(blkcg->css.cgroup);
+
        rcu_read_lock();
 
        hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
@@ -833,7 +930,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 static struct cftype blkcg_files[] = {
        {
                .name = "stat",
-               .flags = CFTYPE_NOT_ON_ROOT,
                .seq_show = blkcg_print_stat,
        },
        { }     /* terminate */
@@ -1025,7 +1121,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
  * blkcg_init_queue - initialize blkcg part of request queue
  * @q: request_queue to initialize
  *
- * Called from __blk_alloc_queue(). Responsible for initializing blkcg
+ * Called from blk_alloc_queue(). Responsible for initializing blkcg
  * part of new request_queue @q.
  *
  * RETURNS:
@@ -1114,77 +1210,6 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
        return ret;
 }
 
-static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
-       int i;
-
-       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
-               dst->bytes[i] = src->bytes[i];
-               dst->ios[i] = src->ios[i];
-       }
-}
-
-static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
-       int i;
-
-       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
-               dst->bytes[i] += src->bytes[i];
-               dst->ios[i] += src->ios[i];
-       }
-}
-
-static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
-       int i;
-
-       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
-               dst->bytes[i] -= src->bytes[i];
-               dst->ios[i] -= src->ios[i];
-       }
-}
-
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
-{
-       struct blkcg *blkcg = css_to_blkcg(css);
-       struct blkcg_gq *blkg;
-
-       rcu_read_lock();
-
-       hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-               struct blkcg_gq *parent = blkg->parent;
-               struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
-               struct blkg_iostat cur, delta;
-               unsigned seq;
-
-               /* fetch the current per-cpu values */
-               do {
-                       seq = u64_stats_fetch_begin(&bisc->sync);
-                       blkg_iostat_set(&cur, &bisc->cur);
-               } while (u64_stats_fetch_retry(&bisc->sync, seq));
-
-               /* propagate percpu delta to global */
-               u64_stats_update_begin(&blkg->iostat.sync);
-               blkg_iostat_set(&delta, &cur);
-               blkg_iostat_sub(&delta, &bisc->last);
-               blkg_iostat_add(&blkg->iostat.cur, &delta);
-               blkg_iostat_add(&bisc->last, &delta);
-               u64_stats_update_end(&blkg->iostat.sync);
-
-               /* propagate global delta to parent */
-               if (parent) {
-                       u64_stats_update_begin(&parent->iostat.sync);
-                       blkg_iostat_set(&delta, &blkg->iostat.cur);
-                       blkg_iostat_sub(&delta, &blkg->iostat.last);
-                       blkg_iostat_add(&parent->iostat.cur, &delta);
-                       blkg_iostat_add(&blkg->iostat.last, &delta);
-                       u64_stats_update_end(&parent->iostat.sync);
-               }
-       }
-
-       rcu_read_unlock();
-}
-
 static void blkcg_bind(struct cgroup_subsys_state *root_css)
 {
        int i;
@@ -1727,6 +1752,139 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
        atomic64_add(delta, &blkg->delay_nsec);
 }
 
+/**
+ * blkg_tryget_closest - try and get a blkg ref on the closet blkg
+ * @bio: target bio
+ * @css: target css
+ *
+ * As the failure mode here is to walk up the blkg tree, this ensure that the
+ * blkg->parent pointers are always valid.  This returns the blkg that it ended
+ * up taking a reference on or %NULL if no reference was taken.
+ */
+static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
+               struct cgroup_subsys_state *css)
+{
+       struct blkcg_gq *blkg, *ret_blkg = NULL;
+
+       rcu_read_lock();
+       blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
+       while (blkg) {
+               if (blkg_tryget(blkg)) {
+                       ret_blkg = blkg;
+                       break;
+               }
+               blkg = blkg->parent;
+       }
+       rcu_read_unlock();
+
+       return ret_blkg;
+}
+
+/**
+ * bio_associate_blkg_from_css - associate a bio with a specified css
+ * @bio: target bio
+ * @css: target css
+ *
+ * Associate @bio with the blkg found by combining the css's blkg and the
+ * request_queue of the @bio.  An association failure is handled by walking up
+ * the blkg tree.  Therefore, the blkg associated can be anything between @blkg
+ * and q->root_blkg.  This situation only happens when a cgroup is dying and
+ * then the remaining bios will spill to the closest alive blkg.
+ *
+ * A reference will be taken on the blkg and will be released when @bio is
+ * freed.
+ */
+void bio_associate_blkg_from_css(struct bio *bio,
+                                struct cgroup_subsys_state *css)
+{
+       if (bio->bi_blkg)
+               blkg_put(bio->bi_blkg);
+
+       if (css && css->parent) {
+               bio->bi_blkg = blkg_tryget_closest(bio, css);
+       } else {
+               blkg_get(bio->bi_disk->queue->root_blkg);
+               bio->bi_blkg = bio->bi_disk->queue->root_blkg;
+       }
+}
+EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
+
+/**
+ * bio_associate_blkg - associate a bio with a blkg
+ * @bio: target bio
+ *
+ * Associate @bio with the blkg found from the bio's css and request_queue.
+ * If one is not found, bio_lookup_blkg() creates the blkg.  If a blkg is
+ * already associated, the css is reused and association redone as the
+ * request_queue may have changed.
+ */
+void bio_associate_blkg(struct bio *bio)
+{
+       struct cgroup_subsys_state *css;
+
+       rcu_read_lock();
+
+       if (bio->bi_blkg)
+               css = &bio_blkcg(bio)->css;
+       else
+               css = blkcg_css();
+
+       bio_associate_blkg_from_css(bio, css);
+
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(bio_associate_blkg);
+
+/**
+ * bio_clone_blkg_association - clone blkg association from src to dst bio
+ * @dst: destination bio
+ * @src: source bio
+ */
+void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+{
+       if (src->bi_blkg) {
+               if (dst->bi_blkg)
+                       blkg_put(dst->bi_blkg);
+               blkg_get(src->bi_blkg);
+               dst->bi_blkg = src->bi_blkg;
+       }
+}
+EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+
+static int blk_cgroup_io_type(struct bio *bio)
+{
+       if (op_is_discard(bio->bi_opf))
+               return BLKG_IOSTAT_DISCARD;
+       if (op_is_write(bio->bi_opf))
+               return BLKG_IOSTAT_WRITE;
+       return BLKG_IOSTAT_READ;
+}
+
+void blk_cgroup_bio_start(struct bio *bio)
+{
+       int rwd = blk_cgroup_io_type(bio), cpu;
+       struct blkg_iostat_set *bis;
+
+       cpu = get_cpu();
+       bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
+       u64_stats_update_begin(&bis->sync);
+
+       /*
+        * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
+        * bio and we would have already accounted for the size of the bio.
+        */
+       if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
+               bio_set_flag(bio, BIO_CGROUP_ACCT);
+               bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
+       }
+       bis->cur.ios[rwd]++;
+
+       u64_stats_update_end(&bis->sync);
+       if (cgroup_subsys_on_dfl(io_cgrp_subsys))
+               cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
+       put_cpu();
+}
+
 static int __init blkcg_init(void)
 {
        blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
index 03252af..d9d6326 100644 (file)
@@ -51,9 +51,7 @@
 #include "blk-pm.h"
 #include "blk-rq-qos.h"
 
-#ifdef CONFIG_DEBUG_FS
 struct dentry *blk_debugfs_root;
-#endif
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -285,7 +283,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
  *     A block device may call blk_sync_queue to ensure that any
  *     such activity is cancelled, thus allowing it to release resources
  *     that the callbacks might use. The caller must already have made sure
- *     that its ->make_request_fn will not re-add plugging prior to calling
+ *     that its ->submit_bio will not re-add plugging prior to calling
  *     this function.
  *
  *     This function does not cancel any asynchronous activity arising
@@ -321,6 +319,16 @@ void blk_clear_pm_only(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_clear_pm_only);
 
+/**
+ * blk_put_queue - decrement the request_queue refcount
+ * @q: the request_queue structure to decrement the refcount for
+ *
+ * Decrements the refcount of the request_queue kobject. When this reaches 0
+ * we'll have blk_release_queue() called.
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ *          atomic context.
+ */
 void blk_put_queue(struct request_queue *q)
 {
        kobject_put(&q->kobj);
@@ -352,9 +360,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
  *
  * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
  * put it.  All future requests will be failed immediately with -ENODEV.
+ *
+ * Context: can sleep
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
+       /* cannot be called from atomic context */
+       might_sleep();
+
        WARN_ON_ONCE(blk_queue_registered(q));
 
        /* mark @q DYING, no new request or merges will be allowed afterwards */
@@ -497,7 +510,7 @@ static void blk_timeout_work(struct work_struct *work)
 {
 }
 
-struct request_queue *__blk_alloc_queue(int node_id)
+struct request_queue *blk_alloc_queue(int node_id)
 {
        struct request_queue *q;
        int ret;
@@ -540,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
 
        kobject_init(&q->kobj, &blk_queue_ktype);
 
-#ifdef CONFIG_BLK_DEV_IO_TRACE
-       mutex_init(&q->blk_trace_mutex);
-#endif
+       mutex_init(&q->debugfs_mutex);
        mutex_init(&q->sysfs_lock);
        mutex_init(&q->sysfs_dir_lock);
        spin_lock_init(&q->queue_lock);
@@ -564,6 +575,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
 
        blk_queue_dma_alignment(q, 511);
        blk_set_default_limits(&q->limits);
+       q->nr_requests = BLKDEV_MAX_RQ;
 
        return q;
 
@@ -581,23 +593,16 @@ fail_q:
        kmem_cache_free(blk_requestq_cachep, q);
        return NULL;
 }
-
-struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
-{
-       struct request_queue *q;
-
-       if (WARN_ON_ONCE(!make_request))
-               return NULL;
-
-       q = __blk_alloc_queue(node_id);
-       if (!q)
-               return NULL;
-       q->make_request_fn = make_request;
-       q->nr_requests = BLKDEV_MAX_RQ;
-       return q;
-}
 EXPORT_SYMBOL(blk_alloc_queue);
 
+/**
+ * blk_get_queue - increment the request_queue refcount
+ * @q: the request_queue structure to increment the refcount for
+ *
+ * Increment the refcount of the request_queue kobject.
+ *
+ * Context: Any context.
+ */
 bool blk_get_queue(struct request_queue *q)
 {
        if (likely(!blk_queue_dying(q))) {
@@ -850,8 +855,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
                        return false;
 
                WARN_ONCE(1,
-                      "generic_make_request: Trying to write "
-                       "to read-only block-device %s (partno %d)\n",
+                      "Trying to write to read-only block-device %s (partno %d)\n",
                        bio_devname(bio, b), part->partno);
                /* Older lvm-tools actually trigger this */
                return false;
@@ -952,24 +956,17 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
        return BLK_STS_OK;
 }
 
-static noinline_for_stack bool
-generic_make_request_checks(struct bio *bio)
+static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 {
-       struct request_queue *q;
-       int nr_sectors = bio_sectors(bio);
+       struct request_queue *q = bio->bi_disk->queue;
        blk_status_t status = BLK_STS_IOERR;
-       char b[BDEVNAME_SIZE];
+       struct blk_plug *plug;
 
        might_sleep();
 
-       q = bio->bi_disk->queue;
-       if (unlikely(!q)) {
-               printk(KERN_ERR
-                      "generic_make_request: Trying to access "
-                       "nonexistent block-device %s (%Lu)\n",
-                       bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
-               goto end_io;
-       }
+       plug = blk_mq_plug(q, bio);
+       if (plug && plug->nowait)
+               bio->bi_opf |= REQ_NOWAIT;
 
        /*
         * For a REQ_NOWAIT based request, return -EOPNOTSUPP
@@ -992,14 +989,13 @@ generic_make_request_checks(struct bio *bio)
        }
 
        /*
-        * Filter flush bio's early so that make_request based
-        * drivers without flush support don't have to worry
-        * about them.
+        * Filter flush bio's early so that bio based drivers without flush
+        * support don't have to worry about them.
         */
        if (op_is_flush(bio->bi_opf) &&
            !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
                bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
-               if (!nr_sectors) {
+               if (!bio_sectors(bio)) {
                        status = BLK_STS_OK;
                        goto end_io;
                }
@@ -1054,8 +1050,13 @@ generic_make_request_checks(struct bio *bio)
        if (unlikely(!current->io_context))
                create_task_io_context(current, GFP_ATOMIC, q->node);
 
-       if (!blkcg_bio_issue_check(q, bio))
+       if (blk_throtl_bio(bio)) {
+               blkcg_bio_issue_init(bio);
                return false;
+       }
+
+       blk_cgroup_bio_start(bio);
+       blkcg_bio_issue_init(bio);
 
        if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
                trace_block_bio_queue(q, bio);
@@ -1074,138 +1075,144 @@ end_io:
        return false;
 }
 
-static blk_qc_t do_make_request(struct bio *bio)
+static blk_qc_t __submit_bio(struct bio *bio)
 {
-       struct request_queue *q = bio->bi_disk->queue;
+       struct gendisk *disk = bio->bi_disk;
        blk_qc_t ret = BLK_QC_T_NONE;
 
        if (blk_crypto_bio_prep(&bio)) {
-               if (!q->make_request_fn)
-                       return blk_mq_make_request(q, bio);
-               ret = q->make_request_fn(q, bio);
+               if (!disk->fops->submit_bio)
+                       return blk_mq_submit_bio(bio);
+               ret = disk->fops->submit_bio(bio);
        }
-       blk_queue_exit(q);
+       blk_queue_exit(disk->queue);
        return ret;
 }
 
-/**
- * generic_make_request - re-submit a bio to the block device layer for I/O
- * @bio:  The bio describing the location in memory and on the device.
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
  *
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers.  All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
+ *  - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ *    that), so we have a list with a single bio.
+ *  - We pretend that we have just taken it off a longer list, so we assign
+ *    bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ *    bio_list of new bios to be added.  ->submit_bio() may indeed add some more
+ *    bios through a recursive call to submit_bio_noacct.  If it did, we find a
+ *    non-NULL value in bio_list and re-enter the loop from the top.
+ *  - In this case we really did just take the bio of the top of the list (no
+ *    pretending) and so remove it from bio_list, and call into ->submit_bio()
+ *    again.
+ *
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ *     ->submit_bio_bio, but that haven't been processed yet.
  */
-blk_qc_t generic_make_request(struct bio *bio)
+static blk_qc_t __submit_bio_noacct(struct bio *bio)
 {
-       /*
-        * bio_list_on_stack[0] contains bios submitted by the current
-        * make_request_fn.
-        * bio_list_on_stack[1] contains bios that were submitted before
-        * the current make_request_fn, but that haven't been processed
-        * yet.
-        */
        struct bio_list bio_list_on_stack[2];
        blk_qc_t ret = BLK_QC_T_NONE;
 
-       if (!generic_make_request_checks(bio))
-               goto out;
-
-       /*
-        * We only want one ->make_request_fn to be active at a time, else
-        * stack usage with stacked devices could be a problem.  So use
-        * current->bio_list to keep a list of requests submited by a
-        * make_request_fn function.  current->bio_list is also used as a
-        * flag to say if generic_make_request is currently active in this
-        * task or not.  If it is NULL, then no make_request is active.  If
-        * it is non-NULL, then a make_request is active, and new requests
-        * should be added at the tail
-        */
-       if (current->bio_list) {
-               bio_list_add(&current->bio_list[0], bio);
-               goto out;
-       }
-
-       /* following loop may be a bit non-obvious, and so deserves some
-        * explanation.
-        * Before entering the loop, bio->bi_next is NULL (as all callers
-        * ensure that) so we have a list with a single bio.
-        * We pretend that we have just taken it off a longer list, so
-        * we assign bio_list to a pointer to the bio_list_on_stack,
-        * thus initialising the bio_list of new bios to be
-        * added.  ->make_request() may indeed add some more bios
-        * through a recursive call to generic_make_request.  If it
-        * did, we find a non-NULL value in bio_list and re-enter the loop
-        * from the top.  In this case we really did just take the bio
-        * of the top of the list (no pretending) and so remove it from
-        * bio_list, and call into ->make_request() again.
-        */
        BUG_ON(bio->bi_next);
+
        bio_list_init(&bio_list_on_stack[0]);
        current->bio_list = bio_list_on_stack;
+
        do {
                struct request_queue *q = bio->bi_disk->queue;
+               struct bio_list lower, same;
+
+               if (unlikely(bio_queue_enter(bio) != 0))
+                       continue;
 
-               if (likely(bio_queue_enter(bio) == 0)) {
-                       struct bio_list lower, same;
+               /*
+                * Create a fresh bio_list for all subordinate requests.
+                */
+               bio_list_on_stack[1] = bio_list_on_stack[0];
+               bio_list_init(&bio_list_on_stack[0]);
 
-                       /* Create a fresh bio_list for all subordinate requests */
-                       bio_list_on_stack[1] = bio_list_on_stack[0];
-                       bio_list_init(&bio_list_on_stack[0]);
-                       ret = do_make_request(bio);
+               ret = __submit_bio(bio);
 
-                       /* sort new bios into those for a lower level
-                        * and those for the same level
-                        */
-                       bio_list_init(&lower);
-                       bio_list_init(&same);
-                       while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
-                               if (q == bio->bi_disk->queue)
-                                       bio_list_add(&same, bio);
-                               else
-                                       bio_list_add(&lower, bio);
-                       /* now assemble so we handle the lowest level first */
-                       bio_list_merge(&bio_list_on_stack[0], &lower);
-                       bio_list_merge(&bio_list_on_stack[0], &same);
-                       bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+               /*
+                * Sort new bios into those for a lower level and those for the
+                * same level.
+                */
+               bio_list_init(&lower);
+               bio_list_init(&same);
+               while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+                       if (q == bio->bi_disk->queue)
+                               bio_list_add(&same, bio);
+                       else
+                               bio_list_add(&lower, bio);
+
+               /*
+                * Now assemble so we handle the lowest level first.
+                */
+               bio_list_merge(&bio_list_on_stack[0], &lower);
+               bio_list_merge(&bio_list_on_stack[0], &same);
+               bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+       } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+       current->bio_list = NULL;
+       return ret;
+}
+
+static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+{
+       struct bio_list bio_list[2] = { };
+       blk_qc_t ret = BLK_QC_T_NONE;
+
+       current->bio_list = bio_list;
+
+       do {
+               struct gendisk *disk = bio->bi_disk;
+
+               if (unlikely(bio_queue_enter(bio) != 0))
+                       continue;
+
+               if (!blk_crypto_bio_prep(&bio)) {
+                       blk_queue_exit(disk->queue);
+                       ret = BLK_QC_T_NONE;
+                       continue;
                }
-               bio = bio_list_pop(&bio_list_on_stack[0]);
-       } while (bio);
-       current->bio_list = NULL; /* deactivate */
 
-out:
+               ret = blk_mq_submit_bio(bio);
+       } while ((bio = bio_list_pop(&bio_list[0])));
+
+       current->bio_list = NULL;
        return ret;
 }
-EXPORT_SYMBOL(generic_make_request);
 
 /**
- * direct_make_request - hand a buffer directly to its device driver for I/O
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
- * This function behaves like generic_make_request(), but does not protect
- * against recursion.  Must only be used if the called driver is known
- * to be blk-mq based.
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers.  All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
  */
-blk_qc_t direct_make_request(struct bio *bio)
+blk_qc_t submit_bio_noacct(struct bio *bio)
 {
-       struct request_queue *q = bio->bi_disk->queue;
-
-       if (WARN_ON_ONCE(q->make_request_fn)) {
-               bio_io_error(bio);
-               return BLK_QC_T_NONE;
-       }
-       if (!generic_make_request_checks(bio))
-               return BLK_QC_T_NONE;
-       if (unlikely(bio_queue_enter(bio)))
+       if (!submit_bio_checks(bio))
                return BLK_QC_T_NONE;
-       if (!blk_crypto_bio_prep(&bio)) {
-               blk_queue_exit(q);
+
+       /*
+        * We only want one ->submit_bio to be active at a time, else stack
+        * usage with stacked devices could be a problem.  Use current->bio_list
+        * to collect a list of requests submited by a ->submit_bio method while
+        * it is active, and then process them after it returned.
+        */
+       if (current->bio_list) {
+               bio_list_add(&current->bio_list[0], bio);
                return BLK_QC_T_NONE;
        }
-       return blk_mq_make_request(q, bio);
+
+       if (!bio->bi_disk->fops->submit_bio)
+               return __submit_bio_noacct_mq(bio);
+       return __submit_bio_noacct(bio);
 }
-EXPORT_SYMBOL_GPL(direct_make_request);
+EXPORT_SYMBOL(submit_bio_noacct);
 
 /**
  * submit_bio - submit a bio to the block device layer for I/O
@@ -1266,13 +1273,13 @@ blk_qc_t submit_bio(struct bio *bio)
                blk_qc_t ret;
 
                psi_memstall_enter(&pflags);
-               ret = generic_make_request(bio);
+               ret = submit_bio_noacct(bio);
                psi_memstall_leave(&pflags);
 
                return ret;
        }
 
-       return generic_make_request(bio);
+       return submit_bio_noacct(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 
@@ -1800,6 +1807,7 @@ void blk_start_plug(struct blk_plug *plug)
        INIT_LIST_HEAD(&plug->cb_list);
        plug->rq_count = 0;
        plug->multiple_queues = false;
+       plug->nowait = false;
 
        /*
         * Store ordering should not be needed here, since a potential
@@ -1908,9 +1916,7 @@ int __init blk_dev_init(void)
        blk_requestq_cachep = kmem_cache_create("request_queue",
                        sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 
-#ifdef CONFIG_DEBUG_FS
        blk_debugfs_root = debugfs_create_dir("block", NULL);
-#endif
 
        return 0;
 }
index 6e49688..c162b75 100644 (file)
@@ -228,7 +228,7 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
                        return false;
                }
                bio_chain(split_bio, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                *bio_ptr = split_bio;
        }
 
index 6533c9b..2d5e600 100644 (file)
@@ -239,7 +239,7 @@ void __blk_crypto_free_request(struct request *rq)
  * kernel crypto API. When the crypto API fallback is used for encryption,
  * blk-crypto may choose to split the bio into 2 - the first one that will
  * continue to be processed and the second one that will be resubmitted via
- * generic_make_request. A bounce bio will be allocated to encrypt the contents
+ * submit_bio_noacct. A bounce bio will be allocated to encrypt the contents
  * of the aforementioned "first one", and *bio_ptr will be updated to this
  * bounce bio.
  *
index 15ae015..6e1543c 100644 (file)
@@ -219,7 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
        struct request *rq, *n;
        unsigned long flags = 0;
        struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
-       struct blk_mq_hw_ctx *hctx;
 
        blk_account_io_flush(flush_rq);
 
@@ -235,13 +234,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
        if (fq->rq_status != BLK_STS_OK)
                error = fq->rq_status;
 
-       hctx = flush_rq->mq_hctx;
        if (!q->elevator) {
-               blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
-               flush_rq->tag = -1;
+               flush_rq->tag = BLK_MQ_NO_TAG;
        } else {
                blk_mq_put_driver_tag(flush_rq);
-               flush_rq->internal_tag = -1;
+               flush_rq->internal_tag = BLK_MQ_NO_TAG;
        }
 
        running = &fq->flush_queue[fq->flush_running_idx];
@@ -286,13 +283,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
        if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
                return;
 
-       /* C2 and C3
-        *
-        * For blk-mq + scheduling, we can risk having all driver tags
-        * assigned to empty flushes, and we deadlock if we are expecting
-        * other requests to make progress. Don't defer for that case.
-        */
-       if (!list_empty(&fq->flush_data_in_flight) && q->elevator &&
+       /* C2 and C3 */
+       if (!list_empty(&fq->flush_data_in_flight) &&
            time_before(jiffies,
                        fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
                return;
@@ -316,13 +308,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
        flush_rq->mq_ctx = first_rq->mq_ctx;
        flush_rq->mq_hctx = first_rq->mq_hctx;
 
-       if (!q->elevator) {
-               fq->orig_rq = first_rq;
+       if (!q->elevator)
                flush_rq->tag = first_rq->tag;
-               blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq);
-       } else {
+       else
                flush_rq->internal_tag = first_rq->internal_tag;
-       }
 
        flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
        flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
index 9df50fb..57299f8 100644 (file)
@@ -96,15 +96,7 @@ static void ioc_release_fn(struct work_struct *work)
 {
        struct io_context *ioc = container_of(work, struct io_context,
                                              release_work);
-       unsigned long flags;
-
-       /*
-        * Exiting icq may call into put_io_context() through elevator
-        * which will trigger lockdep warning.  The ioc's are guaranteed to
-        * be different, use a different locking subclass here.  Use
-        * irqsave variant as there's no spin_lock_irq_nested().
-        */
-       spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+       spin_lock_irq(&ioc->lock);
 
        while (!hlist_empty(&ioc->icq_list)) {
                struct io_cq *icq = hlist_entry(ioc->icq_list.first,
@@ -115,13 +107,27 @@ static void ioc_release_fn(struct work_struct *work)
                        ioc_destroy_icq(icq);
                        spin_unlock(&q->queue_lock);
                } else {
-                       spin_unlock_irqrestore(&ioc->lock, flags);
-                       cpu_relax();
-                       spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+                       /* Make sure q and icq cannot be freed. */
+                       rcu_read_lock();
+
+                       /* Re-acquire the locks in the correct order. */
+                       spin_unlock(&ioc->lock);
+                       spin_lock(&q->queue_lock);
+                       spin_lock(&ioc->lock);
+
+                       /*
+                        * The icq may have been destroyed when the ioc lock
+                        * was released.
+                        */
+                       if (!(icq->flags & ICQ_DESTROYED))
+                               ioc_destroy_icq(icq);
+
+                       spin_unlock(&q->queue_lock);
+                       rcu_read_unlock();
                }
        }
 
-       spin_unlock_irqrestore(&ioc->lock, flags);
+       spin_unlock_irq(&ioc->lock);
 
        kmem_cache_free(iocontext_cachep, ioc);
 }
@@ -170,7 +176,6 @@ void put_io_context(struct io_context *ioc)
  */
 void put_io_context_active(struct io_context *ioc)
 {
-       unsigned long flags;
        struct io_cq *icq;
 
        if (!atomic_dec_and_test(&ioc->active_ref)) {
@@ -178,19 +183,14 @@ void put_io_context_active(struct io_context *ioc)
                return;
        }
 
-       /*
-        * Need ioc lock to walk icq_list and q lock to exit icq.  Perform
-        * reverse double locking.  Read comment in ioc_release_fn() for
-        * explanation on the nested locking annotation.
-        */
-       spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+       spin_lock_irq(&ioc->lock);
        hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
                if (icq->flags & ICQ_EXITED)
                        continue;
 
                ioc_exit_icq(icq);
        }
-       spin_unlock_irqrestore(&ioc->lock, flags);
+       spin_unlock_irq(&ioc->lock);
 
        put_io_context(ioc);
 }
index 8ac4aad..521c29b 100644 (file)
@@ -1370,7 +1370,7 @@ static void ioc_timer_fn(struct timer_list *timer)
         * should have woken up in the last period and expire idle iocgs.
         */
        list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
-               if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
+               if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
                    !iocg_is_idle(iocg))
                        continue;
 
@@ -2045,8 +2045,7 @@ static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q,
        int levels = blkcg->css.cgroup->level + 1;
        struct ioc_gq *iocg;
 
-       iocg = kzalloc_node(sizeof(*iocg) + levels * sizeof(iocg->ancestors[0]),
-                           gfp, q->node);
+       iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node);
        if (!iocg)
                return NULL;
 
index c128d50..f90429c 100644 (file)
@@ -591,7 +591,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
        struct rq_wait *rqw;
        struct iolatency_grp *iolat;
        u64 window_start;
-       u64 now = ktime_to_ns(ktime_get());
+       u64 now;
        bool issue_as_root = bio_issue_as_root_blkg(bio);
        bool enabled = false;
        int inflight = 0;
@@ -608,6 +608,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
        if (!enabled)
                return;
 
+       now = ktime_to_ns(ktime_get());
        while (blkg && blkg->parent) {
                iolat = blkg_to_lat(blkg);
                if (!iolat) {
index 5f2c429..019e09b 100644 (file)
@@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        struct request_queue *q = bdev_get_queue(bdev);
        struct bio *bio = *biop;
        unsigned int op;
-       sector_t bs_mask;
+       sector_t bs_mask, part_offset = 0;
 
        if (!q)
                return -ENXIO;
@@ -54,9 +54,34 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        if (!nr_sects)
                return -EINVAL;
 
+       /* In case the discard request is in a partition */
+       if (bdev->bd_partno)
+               part_offset = bdev->bd_part->start_sect;
+
        while (nr_sects) {
-               sector_t req_sects = min_t(sector_t, nr_sects,
-                               bio_allowed_max_sectors(q));
+               sector_t granularity_aligned_lba, req_sects;
+               sector_t sector_mapped = sector + part_offset;
+
+               granularity_aligned_lba = round_up(sector_mapped,
+                               q->limits.discard_granularity >> SECTOR_SHIFT);
+
+               /*
+                * Check whether the discard bio starts at a discard_granularity
+                * aligned LBA,
+                * - If no: set (granularity_aligned_lba - sector_mapped) to
+                *   bi_size of the first split bio, then the second bio will
+                *   start at a discard_granularity aligned LBA on the device.
+                * - If yes: use bio_aligned_discard_max_sectors() as the max
+                *   possible bi_size of the first split bio. Then when this bio
+                *   is split in device drive, the split ones are very probably
+                *   to be aligned to discard_granularity of the device's queue.
+                */
+               if (granularity_aligned_lba == sector_mapped)
+                       req_sects = min_t(sector_t, nr_sects,
+                                         bio_aligned_discard_max_sectors(q));
+               else
+                       req_sects = min_t(sector_t, nr_sects,
+                                         granularity_aligned_lba - sector_mapped);
 
                WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
 
index f0b0bae..5196dc1 100644 (file)
@@ -283,20 +283,20 @@ split:
 
 /**
  * __blk_queue_split - split a bio and submit the second half
- * @q:       [in] request queue pointer
  * @bio:     [in, out] bio to be split
  * @nr_segs: [out] number of segments in the first bio
  *
  * Split a bio into two bios, chain the two bios, submit the second half and
  * store a pointer to the first half in *@bio. If the second bio is still too
  * big it will be split by a recursive call to this function. Since this
- * function may allocate a new bio from @q->bio_split, it is the responsibility
- * of the caller to ensure that @q is only released after processing of the
+ * function may allocate a new bio from @bio->bi_disk->queue->bio_split, it is
+ * the responsibility of the caller to ensure that
+ * @bio->bi_disk->queue->bio_split is only released after processing of the
  * split bio has finished.
  */
-void __blk_queue_split(struct request_queue *q, struct bio **bio,
-               unsigned int *nr_segs)
+void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
 {
+       struct request_queue *q = (*bio)->bi_disk->queue;
        struct bio *split = NULL;
 
        switch (bio_op(*bio)) {
@@ -338,27 +338,26 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
 
                bio_chain(split, *bio);
                trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
-               generic_make_request(*bio);
+               submit_bio_noacct(*bio);
                *bio = split;
        }
 }
 
 /**
  * blk_queue_split - split a bio and submit the second half
- * @q:   [in] request queue pointer
  * @bio: [in, out] bio to be split
  *
  * Split a bio into two bios, chains the two bios, submit the second half and
  * store a pointer to the first half in *@bio. Since this function may allocate
- * a new bio from @q->bio_split, it is the responsibility of the caller to
- * ensure that @q is only released after processing of the split bio has
- * finished.
+ * a new bio from @bio->bi_disk->queue->bio_split, it is the responsibility of
+ * the caller to ensure that @bio->bi_disk->queue->bio_split is only released
+ * after processing of the split bio has finished.
  */
-void blk_queue_split(struct request_queue *q, struct bio **bio)
+void blk_queue_split(struct bio **bio)
 {
        unsigned int nr_segs;
 
-       __blk_queue_split(q, bio, &nr_segs);
+       __blk_queue_split(bio, &nr_segs);
 }
 EXPORT_SYMBOL(blk_queue_split);
 
@@ -793,6 +792,8 @@ static struct request *attempt_merge(struct request_queue *q,
         */
        blk_account_io_merge_request(next);
 
+       trace_block_rq_merge(q, next);
+
        /*
         * ownership of bio passed from next to req, return 'next' for
         * the caller to free
index e0b2bc1..3f09bcb 100644 (file)
@@ -404,8 +404,7 @@ static bool hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
        const struct show_busy_params *params = data;
 
        if (rq->mq_hctx == params->hctx)
-               __blk_mq_debugfs_rq_show(params->m,
-                                        list_entry_rq(&rq->queuelist));
+               __blk_mq_debugfs_rq_show(params->m, rq);
 
        return true;
 }
@@ -827,9 +826,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
        struct blk_mq_hw_ctx *hctx;
        int i;
 
-       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
-                                           blk_debugfs_root);
-
        debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
 
        /*
@@ -860,9 +856,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
 
 void blk_mq_debugfs_unregister(struct request_queue *q)
 {
-       debugfs_remove_recursive(q->debugfs_dir);
        q->sched_debugfs_dir = NULL;
-       q->debugfs_dir = NULL;
 }
 
 static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
index fdcc2c1..a19cdf1 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/blk-mq.h>
+#include <linux/list_sort.h>
 
 #include <trace/events/block.h>
 
@@ -80,6 +81,35 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
        blk_mq_run_hw_queue(hctx, true);
 }
 
+static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+       struct request *rqa = container_of(a, struct request, queuelist);
+       struct request *rqb = container_of(b, struct request, queuelist);
+
+       return rqa->mq_hctx > rqb->mq_hctx;
+}
+
+static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
+{
+       struct blk_mq_hw_ctx *hctx =
+               list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
+       struct request *rq;
+       LIST_HEAD(hctx_list);
+       unsigned int count = 0;
+
+       list_for_each_entry(rq, rq_list, queuelist) {
+               if (rq->mq_hctx != hctx) {
+                       list_cut_before(&hctx_list, rq_list, &rq->queuelist);
+                       goto dispatch;
+               }
+               count++;
+       }
+       list_splice_tail_init(rq_list, &hctx_list);
+
+dispatch:
+       return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
+}
+
 #define BLK_MQ_BUDGET_DELAY    3               /* ms units */
 
 /*
@@ -90,12 +120,20 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  * be run again.  This is necessary to avoid starving flushes.
  */
-static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 {
        struct request_queue *q = hctx->queue;
        struct elevator_queue *e = q->elevator;
+       bool multi_hctxs = false, run_queue = false;
+       bool dispatched = false, busy = false;
+       unsigned int max_dispatch;
        LIST_HEAD(rq_list);
-       int ret = 0;
+       int count = 0;
+
+       if (hctx->dispatch_busy)
+               max_dispatch = 1;
+       else
+               max_dispatch = hctx->queue->nr_requests;
 
        do {
                struct request *rq;
@@ -104,16 +142,16 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
                        break;
 
                if (!list_empty_careful(&hctx->dispatch)) {
-                       ret = -EAGAIN;
+                       busy = true;
                        break;
                }
 
-               if (!blk_mq_get_dispatch_budget(hctx))
+               if (!blk_mq_get_dispatch_budget(q))
                        break;
 
                rq = e->type->ops.dispatch_request(hctx);
                if (!rq) {
-                       blk_mq_put_dispatch_budget(hctx);
+                       blk_mq_put_dispatch_budget(q);
                        /*
                         * We're releasing without dispatching. Holding the
                         * budget could have blocked any "hctx"s with the
@@ -121,7 +159,7 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
                         * no guarantee anyone will kick the queue.  Kick it
                         * ourselves.
                         */
-                       blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
+                       run_queue = true;
                        break;
                }
 
@@ -130,8 +168,42 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
                 * if this rq won't be queued to driver via .queue_rq()
                 * in blk_mq_dispatch_rq_list().
                 */
-               list_add(&rq->queuelist, &rq_list);
-       } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+               list_add_tail(&rq->queuelist, &rq_list);
+               if (rq->mq_hctx != hctx)
+                       multi_hctxs = true;
+       } while (++count < max_dispatch);
+
+       if (!count) {
+               if (run_queue)
+                       blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
+       } else if (multi_hctxs) {
+               /*
+                * Requests from different hctx may be dequeued from some
+                * schedulers, such as bfq and deadline.
+                *
+                * Sort the requests in the list according to their hctx,
+                * dispatch batching requests from same hctx at a time.
+                */
+               list_sort(NULL, &rq_list, sched_rq_cmp);
+               do {
+                       dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
+               } while (!list_empty(&rq_list));
+       } else {
+               dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
+       }
+
+       if (busy)
+               return -EAGAIN;
+       return !!dispatched;
+}
+
+static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+{
+       int ret;
+
+       do {
+               ret = __blk_mq_do_dispatch_sched(hctx);
+       } while (ret == 1);
 
        return ret;
 }
@@ -153,7 +225,7 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
  * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
  *
  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
- * to be run again.  This is necessary to avoid starving flushes.
+ * be run again.  This is necessary to avoid starving flushes.
  */
 static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
 {
@@ -161,10 +233,9 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
        LIST_HEAD(rq_list);
        struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
        int ret = 0;
+       struct request *rq;
 
        do {
-               struct request *rq;
-
                if (!list_empty_careful(&hctx->dispatch)) {
                        ret = -EAGAIN;
                        break;
@@ -173,12 +244,12 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
                if (!sbitmap_any_bit_set(&hctx->ctx_map))
                        break;
 
-               if (!blk_mq_get_dispatch_budget(hctx))
+               if (!blk_mq_get_dispatch_budget(q))
                        break;
 
                rq = blk_mq_dequeue_from_ctx(hctx, ctx);
                if (!rq) {
-                       blk_mq_put_dispatch_budget(hctx);
+                       blk_mq_put_dispatch_budget(q);
                        /*
                         * We're releasing without dispatching. Holding the
                         * budget could have blocked any "hctx"s with the
@@ -200,7 +271,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
                /* round robin for fair dispatch */
                ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
 
-       } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+       } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
 
        WRITE_ONCE(hctx->dispatch_from, ctx);
        return ret;
@@ -240,7 +311,7 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
         */
        if (!list_empty(&rq_list)) {
                blk_mq_sched_mark_restart_hctx(hctx);
-               if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
+               if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
                        if (has_sched_dispatch)
                                ret = blk_mq_do_dispatch_sched(hctx);
                        else
@@ -253,7 +324,7 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
                ret = blk_mq_do_dispatch_ctx(hctx);
        } else {
                blk_mq_flush_busy_ctxs(hctx, &rq_list);
-               blk_mq_dispatch_rq_list(q, &rq_list, false);
+               blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
        }
 
        return ret;
index ae722f8..32d82e2 100644 (file)
@@ -56,43 +56,12 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
        blk_mq_tag_wakeup_all(tags, false);
 }
 
-/*
- * For shared tag users, we track the number of currently active users
- * and attempt to provide a fair share of the tag depth for each of them.
- */
-static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
-                                 struct sbitmap_queue *bt)
-{
-       unsigned int depth, users;
-
-       if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
-               return true;
-       if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-               return true;
-
-       /*
-        * Don't try dividing an ant
-        */
-       if (bt->sb.depth == 1)
-               return true;
-
-       users = atomic_read(&hctx->tags->active_queues);
-       if (!users)
-               return true;
-
-       /*
-        * Allow at least some tags
-        */
-       depth = max((bt->sb.depth + users - 1) / users, 4U);
-       return atomic_read(&hctx->nr_active) < depth;
-}
-
 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
                            struct sbitmap_queue *bt)
 {
-       if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
-           !hctx_may_queue(data->hctx, bt))
+       if (!data->q->elevator && !hctx_may_queue(data->hctx, bt))
                return BLK_MQ_NO_TAG;
+
        if (data->shallow_depth)
                return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
        else
@@ -191,33 +160,6 @@ found_tag:
        return tag + tag_offset;
 }
 
-bool __blk_mq_get_driver_tag(struct request *rq)
-{
-       struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
-       unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
-       bool shared = blk_mq_tag_busy(rq->mq_hctx);
-       int tag;
-
-       if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
-               bt = &rq->mq_hctx->tags->breserved_tags;
-               tag_offset = 0;
-       }
-
-       if (!hctx_may_queue(rq->mq_hctx, bt))
-               return false;
-       tag = __sbitmap_queue_get(bt);
-       if (tag == BLK_MQ_NO_TAG)
-               return false;
-
-       rq->tag = tag + tag_offset;
-       if (shared) {
-               rq->rq_flags |= RQF_MQ_INFLIGHT;
-               atomic_inc(&rq->mq_hctx->nr_active);
-       }
-       rq->mq_hctx->tags->rqs[rq->tag] = rq;
-       return true;
-}
-
 void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
                    unsigned int tag)
 {
index 2e4ef51..b1acac5 100644 (file)
@@ -51,14 +51,6 @@ enum {
        BLK_MQ_TAG_MAX          = BLK_MQ_NO_TAG - 1,
 };
 
-bool __blk_mq_get_driver_tag(struct request *rq);
-static inline bool blk_mq_get_driver_tag(struct request *rq)
-{
-       if (rq->tag != BLK_MQ_NO_TAG)
-               return true;
-       return __blk_mq_get_driver_tag(rq);
-}
-
 extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
 extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
 
@@ -79,15 +71,34 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 }
 
 /*
- * This helper should only be used for flush request to share tag
- * with the request cloned from, and both the two requests can't be
- * in flight at the same time. The caller has to make sure the tag
- * can't be freed.
+ * For shared tag users, we track the number of currently active users
+ * and attempt to provide a fair share of the tag depth for each of them.
  */
-static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
-               unsigned int tag, struct request *rq)
+static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
+                                 struct sbitmap_queue *bt)
 {
-       hctx->tags->rqs[tag] = rq;
+       unsigned int depth, users;
+
+       if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
+               return true;
+       if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+               return true;
+
+       /*
+        * Don't try dividing an ant
+        */
+       if (bt->sb.depth == 1)
+               return true;
+
+       users = atomic_read(&hctx->tags->active_queues);
+       if (!users)
+               return true;
+
+       /*
+        * Allow at least some tags
+        */
+       depth = max((bt->sb.depth + users - 1) / users, 4U);
+       return atomic_read(&hctx->nr_active) < depth;
 }
 
 static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
index 4e0d173..0015a18 100644 (file)
@@ -41,6 +41,8 @@
 #include "blk-mq-sched.h"
 #include "blk-rq-qos.h"
 
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
@@ -275,26 +277,20 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 {
        struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
        struct request *rq = tags->static_rqs[tag];
-       req_flags_t rq_flags = 0;
 
-       if (data->flags & BLK_MQ_REQ_INTERNAL) {
+       if (data->q->elevator) {
                rq->tag = BLK_MQ_NO_TAG;
                rq->internal_tag = tag;
        } else {
-               if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
-                       rq_flags = RQF_MQ_INFLIGHT;
-                       atomic_inc(&data->hctx->nr_active);
-               }
                rq->tag = tag;
                rq->internal_tag = BLK_MQ_NO_TAG;
-               data->hctx->tags->rqs[rq->tag] = rq;
        }
 
        /* csd/requeue_work/fifo_time is initialized before use */
        rq->q = data->q;
        rq->mq_ctx = data->ctx;
        rq->mq_hctx = data->hctx;
-       rq->rq_flags = rq_flags;
+       rq->rq_flags = 0;
        rq->cmd_flags = data->cmd_flags;
        if (data->flags & BLK_MQ_REQ_PREEMPT)
                rq->rq_flags |= RQF_PREEMPT;
@@ -362,8 +358,6 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
                data->flags |= BLK_MQ_REQ_NOWAIT;
 
        if (e) {
-               data->flags |= BLK_MQ_REQ_INTERNAL;
-
                /*
                 * Flush requests are special and go directly to the
                 * dispatch list. Don't include reserved tags in the
@@ -378,7 +372,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
 retry:
        data->ctx = blk_mq_get_ctx(q);
        data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
-       if (!(data->flags & BLK_MQ_REQ_INTERNAL))
+       if (!e)
                blk_mq_tag_busy(data->hctx);
 
        /*
@@ -394,7 +388,7 @@ retry:
                /*
                 * Give up the CPU and sleep for a random short time to ensure
                 * that thread using a realtime scheduling class are migrated
-                * off the the CPU, and thus off the hctx that is going away.
+                * off the CPU, and thus off the hctx that is going away.
                 */
                msleep(3);
                goto retry;
@@ -474,9 +468,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
        cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
        data.ctx = __blk_mq_get_ctx(q, cpu);
 
-       if (q->elevator)
-               data.flags |= BLK_MQ_REQ_INTERNAL;
-       else
+       if (!q->elevator)
                blk_mq_tag_busy(data.hctx);
 
        ret = -EWOULDBLOCK;
@@ -552,8 +544,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
                blk_stat_add(rq, now);
        }
 
-       if (rq->internal_tag != BLK_MQ_NO_TAG)
-               blk_mq_sched_completed_request(rq, now);
+       blk_mq_sched_completed_request(rq, now);
 
        blk_account_io_done(rq, now);
 
@@ -574,71 +565,139 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
 }
 EXPORT_SYMBOL(blk_mq_end_request);
 
-static void __blk_mq_complete_request_remote(void *data)
+/*
+ * Softirq action handler - move entries to local list and loop over them
+ * while passing them to the queue registered handler.
+ */
+static __latent_entropy void blk_done_softirq(struct softirq_action *h)
 {
-       struct request *rq = data;
-       struct request_queue *q = rq->q;
+       struct list_head *cpu_list, local_list;
 
-       q->mq_ops->complete(rq);
+       local_irq_disable();
+       cpu_list = this_cpu_ptr(&blk_cpu_done);
+       list_replace_init(cpu_list, &local_list);
+       local_irq_enable();
+
+       while (!list_empty(&local_list)) {
+               struct request *rq;
+
+               rq = list_entry(local_list.next, struct request, ipi_list);
+               list_del_init(&rq->ipi_list);
+               rq->q->mq_ops->complete(rq);
+       }
 }
 
-/**
- * blk_mq_force_complete_rq() - Force complete the request, bypassing any error
- *                             injection that could drop the completion.
- * @rq: Request to be force completed
- *
- * Drivers should use blk_mq_complete_request() to complete requests in their
- * normal IO path. For timeout error recovery, drivers may call this forced
- * completion routine after they've reclaimed timed out requests to bypass
- * potentially subsequent fake timeouts.
- */
-void blk_mq_force_complete_rq(struct request *rq)
+static void blk_mq_trigger_softirq(struct request *rq)
 {
-       struct blk_mq_ctx *ctx = rq->mq_ctx;
-       struct request_queue *q = rq->q;
-       bool shared = false;
-       int cpu;
+       struct list_head *list;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       list = this_cpu_ptr(&blk_cpu_done);
+       list_add_tail(&rq->ipi_list, list);
+
+       /*
+        * If the list only contains our just added request, signal a raise of
+        * the softirq.  If there are already entries there, someone already
+        * raised the irq but it hasn't run yet.
+        */
+       if (list->next == &rq->ipi_list)
+               raise_softirq_irqoff(BLOCK_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+static int blk_softirq_cpu_dead(unsigned int cpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       local_irq_disable();
+       list_splice_init(&per_cpu(blk_cpu_done, cpu),
+                        this_cpu_ptr(&blk_cpu_done));
+       raise_softirq_irqoff(BLOCK_SOFTIRQ);
+       local_irq_enable();
+
+       return 0;
+}
+
+
+static void __blk_mq_complete_request_remote(void *data)
+{
+       struct request *rq = data;
 
-       WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
        /*
-        * Most of single queue controllers, there is only one irq vector
-        * for handling IO completion, and the only irq's affinity is set
-        * as all possible CPUs. On most of ARCHs, this affinity means the
-        * irq is handled on one specific CPU.
+        * For most of single queue controllers, there is only one irq vector
+        * for handling I/O completion, and the only irq's affinity is set
+        * to all possible CPUs.  On most of ARCHs, this affinity means the irq
+        * is handled on one specific CPU.
         *
-        * So complete IO reqeust in softirq context in case of single queue
-        * for not degrading IO performance by irqsoff latency.
+        * So complete I/O requests in softirq context in case of single queue
+        * devices to avoid degrading I/O performance due to irqsoff latency.
         */
-       if (q->nr_hw_queues == 1) {
-               __blk_complete_request(rq);
-               return;
-       }
+       if (rq->q->nr_hw_queues == 1)
+               blk_mq_trigger_softirq(rq);
+       else
+               rq->q->mq_ops->complete(rq);
+}
+
+static inline bool blk_mq_complete_need_ipi(struct request *rq)
+{
+       int cpu = raw_smp_processor_id();
+
+       if (!IS_ENABLED(CONFIG_SMP) ||
+           !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
+               return false;
+
+       /* same CPU or cache domain?  Complete locally */
+       if (cpu == rq->mq_ctx->cpu ||
+           (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
+            cpus_share_cache(cpu, rq->mq_ctx->cpu)))
+               return false;
+
+       /* don't try to IPI to an offline CPU */
+       return cpu_online(rq->mq_ctx->cpu);
+}
+
+bool blk_mq_complete_request_remote(struct request *rq)
+{
+       WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 
        /*
         * For a polled request, always complete locallly, it's pointless
         * to redirect the completion.
         */
-       if ((rq->cmd_flags & REQ_HIPRI) ||
-           !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
-               q->mq_ops->complete(rq);
-               return;
-       }
-
-       cpu = get_cpu();
-       if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-               shared = cpus_share_cache(cpu, ctx->cpu);
+       if (rq->cmd_flags & REQ_HIPRI)
+               return false;
 
-       if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
+       if (blk_mq_complete_need_ipi(rq)) {
                rq->csd.func = __blk_mq_complete_request_remote;
                rq->csd.info = rq;
                rq->csd.flags = 0;
-               smp_call_function_single_async(ctx->cpu, &rq->csd);
+               smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
        } else {
-               q->mq_ops->complete(rq);
+               if (rq->q->nr_hw_queues > 1)
+                       return false;
+               blk_mq_trigger_softirq(rq);
        }
-       put_cpu();
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
+
+/**
+ * blk_mq_complete_request - end I/O on a request
+ * @rq:                the request being processed
+ *
+ * Description:
+ *     Complete a request by scheduling the ->complete_rq operation.
+ **/
+void blk_mq_complete_request(struct request *rq)
+{
+       if (!blk_mq_complete_request_remote(rq))
+               rq->q->mq_ops->complete(rq);
 }
-EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
+EXPORT_SYMBOL(blk_mq_complete_request);
 
 static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
        __releases(hctx->srcu)
@@ -660,23 +719,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
                *srcu_idx = srcu_read_lock(hctx->srcu);
 }
 
-/**
- * blk_mq_complete_request - end I/O on a request
- * @rq:                the request being processed
- *
- * Description:
- *     Ends all I/O on a request. It does not handle partial completions.
- *     The actual completion happens out-of-order, through a IPI handler.
- **/
-bool blk_mq_complete_request(struct request *rq)
-{
-       if (unlikely(blk_should_fake_timeout(rq->q)))
-               return false;
-       blk_mq_force_complete_rq(rq);
-       return true;
-}
-EXPORT_SYMBOL(blk_mq_complete_request);
-
 /**
  * blk_mq_start_request - Start processing a request
  * @rq: Pointer to request to be started
@@ -1052,6 +1094,45 @@ static inline unsigned int queued_to_index(unsigned int queued)
        return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
+static bool __blk_mq_get_driver_tag(struct request *rq)
+{
+       struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+       unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
+       int tag;
+
+       blk_mq_tag_busy(rq->mq_hctx);
+
+       if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
+               bt = &rq->mq_hctx->tags->breserved_tags;
+               tag_offset = 0;
+       }
+
+       if (!hctx_may_queue(rq->mq_hctx, bt))
+               return false;
+       tag = __sbitmap_queue_get(bt);
+       if (tag == BLK_MQ_NO_TAG)
+               return false;
+
+       rq->tag = tag + tag_offset;
+       return true;
+}
+
+static bool blk_mq_get_driver_tag(struct request *rq)
+{
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+       if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
+               return false;
+
+       if ((hctx->flags & BLK_MQ_F_TAG_SHARED) &&
+                       !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
+               rq->rq_flags |= RQF_MQ_INFLIGHT;
+               atomic_inc(&hctx->nr_active);
+       }
+       hctx->tags->rqs[rq->tag] = rq;
+       return true;
+}
+
 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
                                int flags, void *key)
 {
@@ -1204,25 +1285,70 @@ static void blk_mq_handle_zone_resource(struct request *rq,
        __blk_mq_requeue_request(rq);
 }
 
+enum prep_dispatch {
+       PREP_DISPATCH_OK,
+       PREP_DISPATCH_NO_TAG,
+       PREP_DISPATCH_NO_BUDGET,
+};
+
+static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
+                                                 bool need_budget)
+{
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+       if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) {
+               blk_mq_put_driver_tag(rq);
+               return PREP_DISPATCH_NO_BUDGET;
+       }
+
+       if (!blk_mq_get_driver_tag(rq)) {
+               /*
+                * The initial allocation attempt failed, so we need to
+                * rerun the hardware queue when a tag is freed. The
+                * waitqueue takes care of that. If the queue is run
+                * before we add this entry back on the dispatch list,
+                * we'll re-run it below.
+                */
+               if (!blk_mq_mark_tag_wait(hctx, rq)) {
+                       /*
+                        * All budgets not got from this function will be put
+                        * together during handling partial dispatch
+                        */
+                       if (need_budget)
+                               blk_mq_put_dispatch_budget(rq->q);
+                       return PREP_DISPATCH_NO_TAG;
+               }
+       }
+
+       return PREP_DISPATCH_OK;
+}
+
+/* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
+static void blk_mq_release_budgets(struct request_queue *q,
+               unsigned int nr_budgets)
+{
+       int i;
+
+       for (i = 0; i < nr_budgets; i++)
+               blk_mq_put_dispatch_budget(q);
+}
+
 /*
  * Returns true if we did some work AND can potentially do more.
  */
-bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
-                            bool got_budget)
+bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
+                            unsigned int nr_budgets)
 {
-       struct blk_mq_hw_ctx *hctx;
+       enum prep_dispatch prep;
+       struct request_queue *q = hctx->queue;
        struct request *rq, *nxt;
-       bool no_tag = false;
        int errors, queued;
        blk_status_t ret = BLK_STS_OK;
-       bool no_budget_avail = false;
        LIST_HEAD(zone_list);
 
        if (list_empty(list))
                return false;
 
-       WARN_ON(!list_is_singular(list) && got_budget);
-
        /*
         * Now process all the entries, sending them to the driver.
         */
@@ -1232,32 +1358,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 
                rq = list_first_entry(list, struct request, queuelist);
 
-               hctx = rq->mq_hctx;
-               if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
-                       blk_mq_put_driver_tag(rq);
-                       no_budget_avail = true;
+               WARN_ON_ONCE(hctx != rq->mq_hctx);
+               prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
+               if (prep != PREP_DISPATCH_OK)
                        break;
-               }
-
-               if (!blk_mq_get_driver_tag(rq)) {
-                       /*
-                        * The initial allocation attempt failed, so we need to
-                        * rerun the hardware queue when a tag is freed. The
-                        * waitqueue takes care of that. If the queue is run
-                        * before we add this entry back on the dispatch list,
-                        * we'll re-run it below.
-                        */
-                       if (!blk_mq_mark_tag_wait(hctx, rq)) {
-                               blk_mq_put_dispatch_budget(hctx);
-                               /*
-                                * For non-shared tags, the RESTART check
-                                * will suffice.
-                                */
-                               if (hctx->flags & BLK_MQ_F_TAG_SHARED)
-                                       no_tag = true;
-                               break;
-                       }
-               }
 
                list_del_init(&rq->queuelist);
 
@@ -1274,31 +1378,35 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                        bd.last = !blk_mq_get_driver_tag(nxt);
                }
 
+               /*
+                * once the request is queued to lld, no need to cover the
+                * budget any more
+                */
+               if (nr_budgets)
+                       nr_budgets--;
                ret = q->mq_ops->queue_rq(hctx, &bd);
-               if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
-                       blk_mq_handle_dev_resource(rq, list);
+               switch (ret) {
+               case BLK_STS_OK:
+                       queued++;
                        break;
-               } else if (ret == BLK_STS_ZONE_RESOURCE) {
+               case BLK_STS_RESOURCE:
+               case BLK_STS_DEV_RESOURCE:
+                       blk_mq_handle_dev_resource(rq, list);
+                       goto out;
+               case BLK_STS_ZONE_RESOURCE:
                        /*
                         * Move the request to zone_list and keep going through
                         * the dispatch list to find more requests the drive can
                         * accept.
                         */
                        blk_mq_handle_zone_resource(rq, &zone_list);
-                       if (list_empty(list))
-                               break;
-                       continue;
-               }
-
-               if (unlikely(ret != BLK_STS_OK)) {
+                       break;
+               default:
                        errors++;
                        blk_mq_end_request(rq, BLK_STS_IOERR);
-                       continue;
                }
-
-               queued++;
        } while (!list_empty(list));
-
+out:
        if (!list_empty(&zone_list))
                list_splice_tail_init(&zone_list, list);
 
@@ -1310,6 +1418,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
         */
        if (!list_empty(list)) {
                bool needs_restart;
+               /* For non-shared tags, the RESTART check will suffice */
+               bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
+                        (hctx->flags & BLK_MQ_F_TAG_SHARED);
+               bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
+
+               blk_mq_release_budgets(q, nr_budgets);
 
                /*
                 * If we didn't flush the entire list, we could have told
@@ -1361,13 +1475,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
        } else
                blk_mq_update_dispatch_busy(hctx, false);
 
-       /*
-        * If the host/device is unable to accept more work, inform the
-        * caller of that.
-        */
-       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
-               return false;
-
        return (queued + errors) != 0;
 }
 
@@ -1896,11 +2003,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
        if (q->elevator && !bypass_insert)
                goto insert;
 
-       if (!blk_mq_get_dispatch_budget(hctx))
+       if (!blk_mq_get_dispatch_budget(q))
                goto insert;
 
        if (!blk_mq_get_driver_tag(rq)) {
-               blk_mq_put_dispatch_budget(hctx);
+               blk_mq_put_dispatch_budget(q);
                goto insert;
        }
 
@@ -2005,8 +2112,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 }
 
 /**
- * blk_mq_make_request - Create and send a request to block device.
- * @q: Request queue pointer.
+ * blk_mq_submit_bio - Create and send a request to block device.
  * @bio: Bio pointer.
  *
  * Builds up a request structure from @q and @bio and send to the device. The
@@ -2020,8 +2126,9 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
  *
  * Returns: Request queue cookie.
  */
-blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t blk_mq_submit_bio(struct bio *bio)
 {
+       struct request_queue *q = bio->bi_disk->queue;
        const int is_sync = op_is_sync(bio->bi_opf);
        const int is_flush_fua = op_is_flush(bio->bi_opf);
        struct blk_mq_alloc_data data = {
@@ -2035,7 +2142,7 @@ blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
        blk_status_t ret;
 
        blk_queue_bounce(q, &bio);
-       __blk_queue_split(q, &bio, &nr_segs);
+       __blk_queue_split(&bio, &nr_segs);
 
        if (!bio_integrity_prep(bio))
                goto queue_exit;
@@ -2146,7 +2253,7 @@ queue_exit:
        blk_queue_exit(q);
        return BLK_QC_T_NONE;
 }
-EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
+EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */
 
 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                     unsigned int hctx_idx)
@@ -2792,7 +2899,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
        struct blk_mq_tag_set *set = q->tag_set;
 
        mutex_lock(&set->tag_list_lock);
-       list_del_rcu(&q->tag_set_list);
+       list_del(&q->tag_set_list);
        if (list_is_singular(&set->tag_list)) {
                /* just transitioned to unshared */
                set->flags &= ~BLK_MQ_F_TAG_SHARED;
@@ -2819,7 +2926,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
        }
        if (set->flags & BLK_MQ_F_TAG_SHARED)
                queue_set_hctx_shared(q, true);
-       list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
+       list_add_tail(&q->tag_set_list, &set->tag_list);
 
        mutex_unlock(&set->tag_list_lock);
 }
@@ -2886,7 +2993,7 @@ struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
 {
        struct request_queue *uninit_q, *q;
 
-       uninit_q = __blk_alloc_queue(set->numa_node);
+       uninit_q = blk_alloc_queue(set->numa_node);
        if (!uninit_q)
                return ERR_PTR(-ENOMEM);
        uninit_q->queuedata = queuedata;
@@ -3760,6 +3867,15 @@ EXPORT_SYMBOL(blk_mq_rq_cpu);
 
 static int __init blk_mq_init(void)
 {
+       int i;
+
+       for_each_possible_cpu(i)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+       open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+
+       cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
+                                 "block/softirq:dead", NULL,
+                                 blk_softirq_cpu_dead);
        cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
                                blk_mq_hctx_notify_dead);
        cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
index b3ce0f3..863a2f3 100644 (file)
@@ -40,7 +40,8 @@ struct blk_mq_ctx {
 void blk_mq_exit_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
-bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
+bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
+                            unsigned int);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
                                bool kick_requeue_list);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
@@ -159,7 +160,7 @@ struct blk_mq_alloc_data {
 
 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
 {
-       if (data->flags & BLK_MQ_REQ_INTERNAL)
+       if (data->q->elevator)
                return data->hctx->sched_tags;
 
        return data->hctx->tags;
@@ -179,20 +180,16 @@ unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part);
 void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
                         unsigned int inflight[2]);
 
-static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
+static inline void blk_mq_put_dispatch_budget(struct request_queue *q)
 {
-       struct request_queue *q = hctx->queue;
-
        if (q->mq_ops->put_budget)
-               q->mq_ops->put_budget(hctx);
+               q->mq_ops->put_budget(q);
 }
 
-static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
+static inline bool blk_mq_get_dispatch_budget(struct request_queue *q)
 {
-       struct request_queue *q = hctx->queue;
-
        if (q->mq_ops->get_budget)
-               return q->mq_ops->get_budget(hctx);
+               return q->mq_ops->get_budget(q);
        return true;
 }
 
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
deleted file mode 100644 (file)
index 6e7ec87..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Functions related to softirq rq completions
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/sched/topology.h>
-
-#include "blk.h"
-
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-
-/*
- * Softirq action handler - move entries to local list and loop over them
- * while passing them to the queue registered handler.
- */
-static __latent_entropy void blk_done_softirq(struct softirq_action *h)
-{
-       struct list_head *cpu_list, local_list;
-
-       local_irq_disable();
-       cpu_list = this_cpu_ptr(&blk_cpu_done);
-       list_replace_init(cpu_list, &local_list);
-       local_irq_enable();
-
-       while (!list_empty(&local_list)) {
-               struct request *rq;
-
-               rq = list_entry(local_list.next, struct request, ipi_list);
-               list_del_init(&rq->ipi_list);
-               rq->q->mq_ops->complete(rq);
-       }
-}
-
-#ifdef CONFIG_SMP
-static void trigger_softirq(void *data)
-{
-       struct request *rq = data;
-       struct list_head *list;
-
-       list = this_cpu_ptr(&blk_cpu_done);
-       list_add_tail(&rq->ipi_list, list);
-
-       if (list->next == &rq->ipi_list)
-               raise_softirq_irqoff(BLOCK_SOFTIRQ);
-}
-
-/*
- * Setup and invoke a run of 'trigger_softirq' on the given cpu.
- */
-static int raise_blk_irq(int cpu, struct request *rq)
-{
-       if (cpu_online(cpu)) {
-               call_single_data_t *data = &rq->csd;
-
-               data->func = trigger_softirq;
-               data->info = rq;
-               data->flags = 0;
-
-               smp_call_function_single_async(cpu, data);
-               return 0;
-       }
-
-       return 1;
-}
-#else /* CONFIG_SMP */
-static int raise_blk_irq(int cpu, struct request *rq)
-{
-       return 1;
-}
-#endif
-
-static int blk_softirq_cpu_dead(unsigned int cpu)
-{
-       /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
-        */
-       local_irq_disable();
-       list_splice_init(&per_cpu(blk_cpu_done, cpu),
-                        this_cpu_ptr(&blk_cpu_done));
-       raise_softirq_irqoff(BLOCK_SOFTIRQ);
-       local_irq_enable();
-
-       return 0;
-}
-
-void __blk_complete_request(struct request *req)
-{
-       struct request_queue *q = req->q;
-       int cpu, ccpu = req->mq_ctx->cpu;
-       unsigned long flags;
-       bool shared = false;
-
-       BUG_ON(!q->mq_ops->complete);
-
-       local_irq_save(flags);
-       cpu = smp_processor_id();
-
-       /*
-        * Select completion CPU
-        */
-       if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
-               if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-                       shared = cpus_share_cache(cpu, ccpu);
-       } else
-               ccpu = cpu;
-
-       /*
-        * If current CPU and requested CPU share a cache, run the softirq on
-        * the current CPU. One might concern this is just like
-        * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
-        * running in interrupt handler, and currently I/O controller doesn't
-        * support multiple interrupts, so current CPU is unique actually. This
-        * avoids IPI sending from current CPU to the first CPU of a group.
-        */
-       if (ccpu == cpu || shared) {
-               struct list_head *list;
-do_local:
-               list = this_cpu_ptr(&blk_cpu_done);
-               list_add_tail(&req->ipi_list, list);
-
-               /*
-                * if the list only contains our just added request,
-                * signal a raise of the softirq. If there are already
-                * entries there, someone already raised the irq but it
-                * hasn't run yet.
-                */
-               if (list->next == &req->ipi_list)
-                       raise_softirq_irqoff(BLOCK_SOFTIRQ);
-       } else if (raise_blk_irq(ccpu, req))
-               goto do_local;
-
-       local_irq_restore(flags);
-}
-
-static __init int blk_softirq_init(void)
-{
-       int i;
-
-       for_each_possible_cpu(i)
-               INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
-
-       open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
-       cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
-                                 "block/softirq:dead", NULL,
-                                 blk_softirq_cpu_dead);
-       return 0;
-}
-subsys_initcall(blk_softirq_init);
index 02643e1..be67952 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/blktrace_api.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-cgroup.h>
+#include <linux/debugfs.h>
 
 #include "blk.h"
 #include "blk-mq.h"
@@ -873,22 +874,32 @@ static void blk_exit_queue(struct request_queue *q)
        bdi_put(q->backing_dev_info);
 }
 
-
 /**
- * __blk_release_queue - release a request queue
- * @work: pointer to the release_work member of the request queue to be released
+ * blk_release_queue - releases all allocated resources of the request_queue
+ * @kobj: pointer to a kobject, whose container is a request_queue
+ *
+ * This function releases all allocated resources of the request queue.
+ *
+ * The struct request_queue refcount is incremented with blk_get_queue() and
+ * decremented with blk_put_queue(). Once the refcount reaches 0 this function
+ * is called.
+ *
+ * For drivers that have a request_queue on a gendisk and added with
+ * __device_add_disk() the refcount to request_queue will reach 0 with
+ * the last put_disk() called by the driver. For drivers which don't use
+ * __device_add_disk() this happens with blk_cleanup_queue().
+ *
+ * Drivers exist which depend on the release of the request_queue to be
+ * synchronous, it should not be deferred.
  *
- * Description:
- *     This function is called when a block device is being unregistered. The
- *     process of releasing a request queue starts with blk_cleanup_queue, which
- *     set the appropriate flags and then calls blk_put_queue, that decrements
- *     the reference counter of the request queue. Once the reference counter
- *     of the request queue reaches zero, blk_release_queue is called to release
- *     all allocated resources of the request queue.
+ * Context: can sleep
  */
-static void __blk_release_queue(struct work_struct *work)
+static void blk_release_queue(struct kobject *kobj)
 {
-       struct request_queue *q = container_of(work, typeof(*q), release_work);
+       struct request_queue *q =
+               container_of(kobj, struct request_queue, kobj);
+
+       might_sleep();
 
        if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
                blk_stat_remove_callback(q, q->poll_cb);
@@ -907,6 +918,9 @@ static void __blk_release_queue(struct work_struct *work)
                blk_mq_release(q);
 
        blk_trace_shutdown(q);
+       mutex_lock(&q->debugfs_mutex);
+       debugfs_remove_recursive(q->debugfs_dir);
+       mutex_unlock(&q->debugfs_mutex);
 
        if (queue_is_mq(q))
                blk_mq_debugfs_unregister(q);
@@ -917,15 +931,6 @@ static void __blk_release_queue(struct work_struct *work)
        call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
-static void blk_release_queue(struct kobject *kobj)
-{
-       struct request_queue *q =
-               container_of(kobj, struct request_queue, kobj);
-
-       INIT_WORK(&q->release_work, __blk_release_queue);
-       schedule_work(&q->release_work);
-}
-
 static const struct sysfs_ops queue_sysfs_ops = {
        .show   = queue_attr_show,
        .store  = queue_attr_store,
@@ -988,6 +993,11 @@ int blk_register_queue(struct gendisk *disk)
                goto unlock;
        }
 
+       mutex_lock(&q->debugfs_mutex);
+       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
+                                           blk_debugfs_root);
+       mutex_unlock(&q->debugfs_mutex);
+
        if (queue_is_mq(q)) {
                __blk_mq_register_dev(dev, q);
                blk_mq_debugfs_register(q);
index 209fdd8..fee3325 100644 (file)
@@ -1339,8 +1339,8 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
 
        if (!bio_list_empty(&bio_list_on_stack)) {
                blk_start_plug(&plug);
-               while((bio = bio_list_pop(&bio_list_on_stack)))
-                       generic_make_request(bio);
+               while ((bio = bio_list_pop(&bio_list_on_stack)))
+                       submit_bio_noacct(bio);
                blk_finish_plug(&plug);
        }
 }
@@ -2158,17 +2158,18 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
-bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
-                   struct bio *bio)
+bool blk_throtl_bio(struct bio *bio)
 {
+       struct request_queue *q = bio->bi_disk->queue;
+       struct blkcg_gq *blkg = bio->bi_blkg;
        struct throtl_qnode *qn = NULL;
-       struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
+       struct throtl_grp *tg = blkg_to_tg(blkg);
        struct throtl_service_queue *sq;
        bool rw = bio_data_dir(bio);
        bool throttled = false;
        struct throtl_data *td = tg->td;
 
-       WARN_ON_ONCE(!rcu_read_lock_held());
+       rcu_read_lock();
 
        /* see throtl_charge_bio() */
        if (bio_flagged(bio, BIO_THROTTLED))
@@ -2273,6 +2274,7 @@ out:
        if (throttled || !td->track_bio_latency)
                bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
 #endif
+       rcu_read_unlock();
        return throttled;
 }
 
index 8aa68fa..1b8de04 100644 (file)
@@ -20,13 +20,11 @@ static int __init setup_fail_io_timeout(char *str)
 }
 __setup("fail_io_timeout=", setup_fail_io_timeout);
 
-int blk_should_fake_timeout(struct request_queue *q)
+bool __blk_should_fake_timeout(struct request_queue *q)
 {
-       if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
-               return 0;
-
        return should_fail(&fail_io_timeout, 1);
 }
+EXPORT_SYMBOL_GPL(__blk_should_fake_timeout);
 
 static int __init fail_io_timeout_debugfs(void)
 {
@@ -70,7 +68,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
 #endif /* CONFIG_FAIL_IO_TIMEOUT */
 
 /**
- * blk_abort_request -- Request request recovery for the specified command
+ * blk_abort_request - Request recovery for the specified command
  * @req:       pointer to the request of interest
  *
  * This function requests that the block layer start recovery for the
@@ -90,11 +88,29 @@ void blk_abort_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_abort_request);
 
+static unsigned long blk_timeout_mask __read_mostly;
+
+static int __init blk_timeout_init(void)
+{
+       blk_timeout_mask = roundup_pow_of_two(HZ) - 1;
+       return 0;
+}
+
+late_initcall(blk_timeout_init);
+
+/*
+ * Just a rough estimate, we don't care about specific values for timeouts.
+ */
+static inline unsigned long blk_round_jiffies(unsigned long j)
+{
+       return (j + blk_timeout_mask) + 1;
+}
+
 unsigned long blk_rq_timeout(unsigned long timeout)
 {
        unsigned long maxt;
 
-       maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
+       maxt = blk_round_jiffies(jiffies + BLK_MAX_TIMEOUT);
        if (time_after(timeout, maxt))
                timeout = maxt;
 
@@ -131,7 +147,7 @@ void blk_add_timer(struct request *req)
         * than an existing one, modify the timer. Round up to next nearest
         * second.
         */
-       expiry = blk_rq_timeout(round_jiffies_up(expiry));
+       expiry = blk_rq_timeout(blk_round_jiffies(expiry));
 
        if (!timer_pending(&q->timeout) ||
            time_before(expiry, q->timeout.expires)) {
index b5d1f0f..49e2928 100644 (file)
@@ -14,9 +14,7 @@
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT                (5 * HZ)
 
-#ifdef CONFIG_DEBUG_FS
 extern struct dentry *blk_debugfs_root;
-#endif
 
 struct blk_flush_queue {
        unsigned int            flush_pending_idx:1;
@@ -27,11 +25,6 @@ struct blk_flush_queue {
        struct list_head        flush_data_in_flight;
        struct request          *flush_rq;
 
-       /*
-        * flush_rq shares tag with this rq, both can't be active
-        * at the same time
-        */
-       struct request          *orig_rq;
        struct lock_class_key   key;
        spinlock_t              mq_flush_lock;
 };
@@ -223,21 +216,11 @@ ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
                char *buf);
 ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count);
-
-#ifdef CONFIG_FAIL_IO_TIMEOUT
-int blk_should_fake_timeout(struct request_queue *);
 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
                                const char *, size_t);
-#else
-static inline int blk_should_fake_timeout(struct request_queue *q)
-{
-       return 0;
-}
-#endif
 
-void __blk_queue_split(struct request_queue *q, struct bio **bio,
-               unsigned int *nr_segs);
+void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
 int ll_back_merge_fn(struct request *req, struct bio *bio,
                unsigned int nr_segs);
 int ll_front_merge_fn(struct request *req,  struct bio *bio,
@@ -281,6 +264,20 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
        return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
 }
 
+/*
+ * The max bio size which is aligned to q->limits.discard_granularity. This
+ * is a hint to split large discard bio in generic block layer, then if device
+ * driver needs to split the discard bio into smaller ones, their bi_size can
+ * be very probably and easily aligned to discard_granularity of the device's
+ * queue.
+ */
+static inline unsigned int bio_aligned_discard_max_sectors(
+                                       struct request_queue *q)
+{
+       return round_down(UINT_MAX, q->limits.discard_granularity) >>
+                       SECTOR_SHIFT;
+}
+
 /*
  * Internal io_context interface
  */
@@ -299,10 +296,12 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
 extern void blk_throtl_register_queue(struct request_queue *q);
+bool blk_throtl_bio(struct bio *bio);
 #else /* CONFIG_BLK_DEV_THROTTLING */
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
 static inline void blk_throtl_register_queue(struct request_queue *q) { }
+static inline bool blk_throtl_bio(struct bio *bio) { return false; }
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
@@ -434,8 +433,6 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
 #endif
 }
 
-struct request_queue *__blk_alloc_queue(int node_id);
-
 int bio_add_hw_page(struct request_queue *q, struct bio *bio,
                struct page *page, unsigned int len, unsigned int offset,
                unsigned int max_sectors, bool *same_page);
index c3aaed0..431be88 100644 (file)
@@ -309,7 +309,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        if (!passthrough && sectors < bio_sectors(*bio_orig)) {
                bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
                bio_chain(bio, *bio_orig);
-               generic_make_request(*bio_orig);
+               submit_bio_noacct(*bio_orig);
                *bio_orig = bio;
        }
        bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
index 6cbb792..fb7b347 100644 (file)
@@ -181,9 +181,12 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
 void bsg_job_done(struct bsg_job *job, int result,
                  unsigned int reply_payload_rcv_len)
 {
+       struct request *rq = blk_mq_rq_from_pdu(job);
+
        job->result = result;
        job->reply_payload_rcv_len = reply_payload_rcv_len;
-       blk_mq_complete_request(blk_mq_rq_from_pdu(job));
+       if (likely(!blk_should_fake_timeout(rq->q)))
+               blk_mq_complete_request(rq);
 }
 EXPORT_SYMBOL_GPL(bsg_job_done);
 
index 4eab3d7..90ed7a2 100644 (file)
@@ -95,8 +95,8 @@ static inline bool elv_support_features(unsigned int elv_features,
  * @name: Elevator name to test
  * @required_features: Features that the elevator must provide
  *
- * Return true is the elevator @e name matches @name and if @e provides all the
- * the feratures spcified by @required_features.
+ * Return true if the elevator @e name matches @name and if @e provides all
+ * the features specified by @required_features.
  */
 static bool elevator_match(const struct elevator_type *e, const char *name,
                           unsigned int required_features)
index 1a76593..99c6464 100644 (file)
@@ -38,8 +38,6 @@ static struct kobject *block_depr;
 static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
-static const struct device_type disk_type;
-
 static void disk_check_events(struct disk_events *ev,
                              unsigned int *clearing_ptr);
 static void disk_alloc_events(struct gendisk *disk);
@@ -876,11 +874,32 @@ static void invalidate_partition(struct gendisk *disk, int partno)
        bdput(bdev);
 }
 
+/**
+ * del_gendisk - remove the gendisk
+ * @disk: the struct gendisk to remove
+ *
+ * Removes the gendisk and all its associated resources. This deletes the
+ * partitions associated with the gendisk, and unregisters the associated
+ * request_queue.
+ *
+ * This is the counter to the respective __device_add_disk() call.
+ *
+ * The final removal of the struct gendisk happens when its refcount reaches 0
+ * with put_disk(), which should be called after del_gendisk(), if
+ * __device_add_disk() was used.
+ *
+ * Drivers exist which depend on the release of the gendisk to be synchronous,
+ * it should not be deferred.
+ *
+ * Context: can sleep
+ */
 void del_gendisk(struct gendisk *disk)
 {
        struct disk_part_iter piter;
        struct hd_struct *part;
 
+       might_sleep();
+
        blk_integrity_del(disk);
        disk_del_events(disk);
 
@@ -971,11 +990,15 @@ static ssize_t disk_badblocks_store(struct device *dev,
  *
  * This function gets the structure containing partitioning
  * information for the given device @devt.
+ *
+ * Context: can sleep
  */
 struct gendisk *get_gendisk(dev_t devt, int *partno)
 {
        struct gendisk *disk = NULL;
 
+       might_sleep();
+
        if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
                struct kobject *kobj;
 
@@ -1514,10 +1537,31 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
        return 0;
 }
 
+/**
+ * disk_release - releases all allocated resources of the gendisk
+ * @dev: the device representing this disk
+ *
+ * This function releases all allocated resources of the gendisk.
+ *
+ * The struct gendisk refcount is incremented with get_gendisk() or
+ * get_disk_and_module(), and its refcount is decremented with
+ * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this
+ * function is called.
+ *
+ * Drivers which used __device_add_disk() have a gendisk with a request_queue
+ * assigned. Since the request_queue sits on top of the gendisk for these
+ * drivers we also call blk_put_queue() for them, and we expect the
+ * request_queue refcount to reach 0 at this point, and so the request_queue
+ * will also be freed prior to the disk.
+ *
+ * Context: can sleep
+ */
 static void disk_release(struct device *dev)
 {
        struct gendisk *disk = dev_to_disk(dev);
 
+       might_sleep();
+
        blk_free_devt(dev->devt);
        disk_release_events(disk);
        kfree(disk->random);
@@ -1541,7 +1585,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
        return NULL;
 }
 
-static const struct device_type disk_type = {
+const struct device_type disk_type = {
        .name           = "disk",
        .groups         = disk_attr_groups,
        .release        = disk_release,
@@ -1727,6 +1771,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 }
 EXPORT_SYMBOL(__alloc_disk_node);
 
+/**
+ * get_disk_and_module - increments the gendisk and gendisk fops module refcount
+ * @disk: the struct gendisk to increment the refcount for
+ *
+ * This increments the refcount for the struct gendisk, and the gendisk's
+ * fops module owner.
+ *
+ * Context: Any context.
+ */
 struct kobject *get_disk_and_module(struct gendisk *disk)
 {
        struct module *owner;
@@ -1747,6 +1800,16 @@ struct kobject *get_disk_and_module(struct gendisk *disk)
 }
 EXPORT_SYMBOL(get_disk_and_module);
 
+/**
+ * put_disk - decrements the gendisk refcount
+ * @disk: the struct gendisk to decrement the refcount for
+ *
+ * This decrements the refcount for the struct gendisk. When this reaches 0
+ * we'll have disk_release() called.
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ *          atomic context.
+ */
 void put_disk(struct gendisk *disk)
 {
        if (disk)
@@ -1754,9 +1817,15 @@ void put_disk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(put_disk);
 
-/*
+/**
+ * put_disk_and_module - decrements the module and gendisk refcount
+ * @disk: the struct gendisk to decrement the refcount for
+ *
  * This is a counterpart of get_disk_and_module() and thus also of
  * get_gendisk().
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ *          atomic context.
  */
 void put_disk_and_module(struct gendisk *disk)
 {
@@ -1985,18 +2054,12 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
  */
 unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 {
-       const struct block_device_operations *bdops = disk->fops;
        struct disk_events *ev = disk->ev;
        unsigned int pending;
        unsigned int clearing = mask;
 
-       if (!ev) {
-               /* for drivers still using the old ->media_changed method */
-               if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
-                   bdops->media_changed && bdops->media_changed(disk))
-                       return DISK_EVENT_MEDIA_CHANGE;
+       if (!ev)
                return 0;
-       }
 
        disk_block_events(disk);
 
index 78951e3..e62a98a 100644 (file)
@@ -619,8 +619,6 @@ int blk_drop_partitions(struct block_device *bdev)
        struct disk_part_iter piter;
        struct hd_struct *part;
 
-       if (!disk_part_scan_enabled(bdev->bd_disk))
-               return 0;
        if (bdev->bd_part_count)
                return -EBUSY;
 
index 091c0a0..1b57419 100644 (file)
@@ -548,7 +548,7 @@ config CRYPTO_XCBC
        select CRYPTO_MANAGER
        help
          XCBC: Keyed-Hashing with encryption algorithm
-               http://www.ietf.org/rfc/rfc3566.txt
+               https://www.ietf.org/rfc/rfc3566.txt
                http://csrc.nist.gov/encryption/modes/proposedmodes/
                 xcbc-mac/xcbc-mac-spec.pdf
 
@@ -561,7 +561,7 @@ config CRYPTO_VMAC
          very high speed on 64-bit architectures.
 
          See also:
-         <http://fastcrypto.org/vmac>
+         <https://fastcrypto.org/vmac>
 
 comment "Digest"
 
@@ -816,7 +816,7 @@ config CRYPTO_RMD128
          RIPEMD-160 should be used.
 
          Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
-         See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
+         See <https://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
 
 config CRYPTO_RMD160
        tristate "RIPEMD-160 digest algorithm"
@@ -833,7 +833,7 @@ config CRYPTO_RMD160
          against RIPEMD-160.
 
          Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
-         See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
+         See <https://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
 
 config CRYPTO_RMD256
        tristate "RIPEMD-256 digest algorithm"
@@ -845,7 +845,7 @@ config CRYPTO_RMD256
          (than RIPEMD-128).
 
          Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
-         See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
+         See <https://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
 
 config CRYPTO_RMD320
        tristate "RIPEMD-320 digest algorithm"
@@ -857,7 +857,7 @@ config CRYPTO_RMD320
          (than RIPEMD-160).
 
          Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel.
-         See <http://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
+         See <https://homes.esat.kuleuven.be/~bosselae/ripemd160.html>
 
 config CRYPTO_SHA1
        tristate "SHA1 digest algorithm"
@@ -1045,7 +1045,7 @@ config CRYPTO_TGR192
          Tiger was developed by Ross Anderson and Eli Biham.
 
          See also:
-         <http://www.cs.technion.ac.il/~biham/Reports/Tiger/>.
+         <https://www.cs.technion.ac.il/~biham/Reports/Tiger/>.
 
 config CRYPTO_WP512
        tristate "Whirlpool digest algorithms"
@@ -1221,7 +1221,7 @@ config CRYPTO_BLOWFISH
          designed for use on "large microprocessors".
 
          See also:
-         <http://www.schneier.com/blowfish.html>
+         <https://www.schneier.com/blowfish.html>
 
 config CRYPTO_BLOWFISH_COMMON
        tristate
@@ -1230,7 +1230,7 @@ config CRYPTO_BLOWFISH_COMMON
          generic c and the assembler implementations.
 
          See also:
-         <http://www.schneier.com/blowfish.html>
+         <https://www.schneier.com/blowfish.html>
 
 config CRYPTO_BLOWFISH_X86_64
        tristate "Blowfish cipher algorithm (x86_64)"
@@ -1245,7 +1245,7 @@ config CRYPTO_BLOWFISH_X86_64
          designed for use on "large microprocessors".
 
          See also:
-         <http://www.schneier.com/blowfish.html>
+         <https://www.schneier.com/blowfish.html>
 
 config CRYPTO_CAMELLIA
        tristate "Camellia cipher algorithms"
@@ -1441,10 +1441,10 @@ config CRYPTO_SALSA20
          Salsa20 stream cipher algorithm.
 
          Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-         Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+         Stream Cipher Project. See <https://www.ecrypt.eu.org/stream/>
 
          The Salsa20 stream cipher algorithm is designed by Daniel J.
-         Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
+         Bernstein <djb@cr.yp.to>. See <https://cr.yp.to/snuffle.html>
 
 config CRYPTO_CHACHA20
        tristate "ChaCha stream cipher algorithms"
@@ -1456,7 +1456,7 @@ config CRYPTO_CHACHA20
          ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
          Bernstein and further specified in RFC7539 for use in IETF protocols.
          This is the portable C implementation of ChaCha20.  See also:
-         <http://cr.yp.to/chacha/chacha-20080128.pdf>
+         <https://cr.yp.to/chacha/chacha-20080128.pdf>
 
          XChaCha20 is the application of the XSalsa20 construction to ChaCha20
          rather than to Salsa20.  XChaCha20 extends ChaCha20's nonce length
@@ -1509,7 +1509,7 @@ config CRYPTO_SERPENT
          variant of Serpent for compatibility with old kerneli.org code.
 
          See also:
-         <http://www.cl.cam.ac.uk/~rja14/serpent.html>
+         <https://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 config CRYPTO_SERPENT_SSE2_X86_64
        tristate "Serpent cipher algorithm (x86_64/SSE2)"
@@ -1528,7 +1528,7 @@ config CRYPTO_SERPENT_SSE2_X86_64
          blocks parallel using SSE2 instruction set.
 
          See also:
-         <http://www.cl.cam.ac.uk/~rja14/serpent.html>
+         <https://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 config CRYPTO_SERPENT_SSE2_586
        tristate "Serpent cipher algorithm (i586/SSE2)"
@@ -1547,7 +1547,7 @@ config CRYPTO_SERPENT_SSE2_586
          blocks parallel using SSE2 instruction set.
 
          See also:
-         <http://www.cl.cam.ac.uk/~rja14/serpent.html>
+         <https://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 config CRYPTO_SERPENT_AVX_X86_64
        tristate "Serpent cipher algorithm (x86_64/AVX)"
@@ -1567,7 +1567,7 @@ config CRYPTO_SERPENT_AVX_X86_64
          eight blocks parallel using the AVX instruction set.
 
          See also:
-         <http://www.cl.cam.ac.uk/~rja14/serpent.html>
+         <https://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 config CRYPTO_SERPENT_AVX2_X86_64
        tristate "Serpent cipher algorithm (x86_64/AVX2)"
@@ -1583,7 +1583,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
          blocks parallel using AVX2 instruction set.
 
          See also:
-         <http://www.cl.cam.ac.uk/~rja14/serpent.html>
+         <https://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 config CRYPTO_SM4
        tristate "SM4 cipher algorithm"
@@ -1640,7 +1640,7 @@ config CRYPTO_TWOFISH
          bits.
 
          See also:
-         <http://www.schneier.com/twofish.html>
+         <https://www.schneier.com/twofish.html>
 
 config CRYPTO_TWOFISH_COMMON
        tristate
@@ -1662,7 +1662,7 @@ config CRYPTO_TWOFISH_586
          bits.
 
          See also:
-         <http://www.schneier.com/twofish.html>
+         <https://www.schneier.com/twofish.html>
 
 config CRYPTO_TWOFISH_X86_64
        tristate "Twofish cipher algorithm (x86_64)"
@@ -1678,7 +1678,7 @@ config CRYPTO_TWOFISH_X86_64
          bits.
 
          See also:
-         <http://www.schneier.com/twofish.html>
+         <https://www.schneier.com/twofish.html>
 
 config CRYPTO_TWOFISH_X86_64_3WAY
        tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
@@ -1699,7 +1699,7 @@ config CRYPTO_TWOFISH_X86_64_3WAY
          blocks parallel, utilizing resources of out-of-order CPUs better.
 
          See also:
-         <http://www.schneier.com/twofish.html>
+         <https://www.schneier.com/twofish.html>
 
 config CRYPTO_TWOFISH_AVX_X86_64
        tristate "Twofish cipher algorithm (x86_64/AVX)"
@@ -1722,7 +1722,7 @@ config CRYPTO_TWOFISH_AVX_X86_64
          eight blocks parallel using the AVX Instruction Set.
 
          See also:
-         <http://www.schneier.com/twofish.html>
+         <https://www.schneier.com/twofish.html>
 
 comment "Compression"
 
index 84a7672..c32c720 100644 (file)
@@ -109,6 +109,14 @@ struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_acomp);
 
+struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
+                                       u32 mask, int node)
+{
+       return crypto_alloc_tfm_node(alg_name, &crypto_acomp_type, type, mask,
+                               node);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
+
 struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
 {
        struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
index cf2b9f4..7fbdc32 100644 (file)
@@ -490,7 +490,6 @@ static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg,
 
 static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        const char *nhpoly1305_name;
        struct skcipher_instance *inst;
@@ -500,14 +499,9 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct shash_alg *hash_alg;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
        if (!inst)
@@ -565,8 +559,6 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
                     hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = streamcipher_alg->base.cra_flags &
-                                  CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
        inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx);
        inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask |
index 28fc323..5882ed4 100644 (file)
@@ -635,6 +635,7 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
 
        if (!ctx->used)
                ctx->merge = 0;
+       ctx->init = ctx->more;
 }
 EXPORT_SYMBOL_GPL(af_alg_pull_tsgl);
 
@@ -734,9 +735,10 @@ EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
  *
  * @sk socket of connection to user space
  * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @min Set to minimum request size if partial requests are allowed.
  * @return 0 when writable memory is available, < 0 upon error
  */
-int af_alg_wait_for_data(struct sock *sk, unsigned flags)
+int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min)
 {
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct alg_sock *ask = alg_sk(sk);
@@ -754,7 +756,9 @@ int af_alg_wait_for_data(struct sock *sk, unsigned flags)
                if (signal_pending(current))
                        break;
                timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, (ctx->used || !ctx->more),
+               if (sk_wait_event(sk, &timeout,
+                                 ctx->init && (!ctx->more ||
+                                               (min && ctx->used >= min)),
                                  &wait)) {
                        err = 0;
                        break;
@@ -843,10 +847,11 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
        }
 
        lock_sock(sk);
-       if (!ctx->more && ctx->used) {
+       if (ctx->init && (init || !ctx->more)) {
                err = -EINVAL;
                goto unlock;
        }
+       ctx->init = true;
 
        if (init) {
                ctx->enc = enc;
index 92abdf6..fdabf26 100644 (file)
@@ -690,6 +690,8 @@ int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst,
                spawn->mask = mask;
                spawn->next = inst->spawns;
                inst->spawns = spawn;
+               inst->alg.cra_flags |=
+                       (alg->cra_flags & CRYPTO_ALG_INHERITED_FLAGS);
                err = 0;
        }
        up_write(&crypto_alg_sem);
@@ -816,7 +818,23 @@ struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb)
 }
 EXPORT_SYMBOL_GPL(crypto_get_attr_type);
 
-int crypto_check_attr_type(struct rtattr **tb, u32 type)
+/**
+ * crypto_check_attr_type() - check algorithm type and compute inherited mask
+ * @tb: the template parameters
+ * @type: the algorithm type the template would be instantiated as
+ * @mask_ret: (output) the mask that should be passed to crypto_grab_*()
+ *           to restrict the flags of any inner algorithms
+ *
+ * Validate that the algorithm type the user requested is compatible with the
+ * one the template would actually be instantiated as.  E.g., if the user is
+ * doing crypto_alloc_shash("cbc(aes)", ...), this would return an error because
+ * the "cbc" template creates an "skcipher" algorithm, not an "shash" algorithm.
+ *
+ * Also compute the mask to use to restrict the flags of any inner algorithms.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret)
 {
        struct crypto_attr_type *algt;
 
@@ -827,6 +845,7 @@ int crypto_check_attr_type(struct rtattr **tb, u32 type)
        if ((algt->type ^ type) & algt->mask)
                return -EINVAL;
 
+       *mask_ret = crypto_algt_inherited_mask(algt);
        return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_check_attr_type);
index 0ae000a..d48d215 100644 (file)
@@ -106,8 +106,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
        size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
        size_t processed = 0;           /* [in]  TX bufs to be consumed */
 
-       if (!ctx->used) {
-               err = af_alg_wait_for_data(sk, flags);
+       if (!ctx->init || ctx->more) {
+               err = af_alg_wait_for_data(sk, flags, 0);
                if (err)
                        return err;
        }
index ec5567c..a51ba22 100644 (file)
@@ -61,8 +61,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
        int err = 0;
        size_t len = 0;
 
-       if (!ctx->used) {
-               err = af_alg_wait_for_data(sk, flags);
+       if (!ctx->init || (ctx->more && ctx->used < bs)) {
+               err = af_alg_wait_for_data(sk, flags, bs);
                if (err)
                        return err;
        }
index edcf690..5d8fe60 100644 (file)
@@ -433,8 +433,9 @@ err:
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
 
-void *crypto_create_tfm(struct crypto_alg *alg,
-                       const struct crypto_type *frontend)
+void *crypto_create_tfm_node(struct crypto_alg *alg,
+                       const struct crypto_type *frontend,
+                       int node)
 {
        char *mem;
        struct crypto_tfm *tfm = NULL;
@@ -445,12 +446,13 @@ void *crypto_create_tfm(struct crypto_alg *alg,
        tfmsize = frontend->tfmsize;
        total = tfmsize + sizeof(*tfm) + frontend->extsize(alg);
 
-       mem = kzalloc(total, GFP_KERNEL);
+       mem = kzalloc_node(total, GFP_KERNEL, node);
        if (mem == NULL)
                goto out_err;
 
        tfm = (struct crypto_tfm *)(mem + tfmsize);
        tfm->__crt_alg = alg;
+       tfm->node = node;
 
        err = frontend->init_tfm(tfm);
        if (err)
@@ -472,7 +474,7 @@ out_err:
 out:
        return mem;
 }
-EXPORT_SYMBOL_GPL(crypto_create_tfm);
+EXPORT_SYMBOL_GPL(crypto_create_tfm_node);
 
 struct crypto_alg *crypto_find_alg(const char *alg_name,
                                   const struct crypto_type *frontend,
@@ -490,11 +492,13 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 EXPORT_SYMBOL_GPL(crypto_find_alg);
 
 /*
- *     crypto_alloc_tfm - Locate algorithm and allocate transform
+ *     crypto_alloc_tfm_node - Locate algorithm and allocate transform
  *     @alg_name: Name of algorithm
  *     @frontend: Frontend algorithm type
  *     @type: Type of algorithm
  *     @mask: Mask for type comparison
+ *     @node: NUMA node in which users desire to put requests, if node is
+ *             NUMA_NO_NODE, it means users have no special requirement.
  *
  *     crypto_alloc_tfm() will first attempt to locate an already loaded
  *     algorithm.  If that fails and the kernel supports dynamically loadable
@@ -509,8 +513,10 @@ EXPORT_SYMBOL_GPL(crypto_find_alg);
  *
  *     In case of error the return value is an error pointer.
  */
-void *crypto_alloc_tfm(const char *alg_name,
-                      const struct crypto_type *frontend, u32 type, u32 mask)
+
+void *crypto_alloc_tfm_node(const char *alg_name,
+                      const struct crypto_type *frontend, u32 type, u32 mask,
+                      int node)
 {
        void *tfm;
        int err;
@@ -524,7 +530,7 @@ void *crypto_alloc_tfm(const char *alg_name,
                        goto err;
                }
 
-               tfm = crypto_create_tfm(alg, frontend);
+               tfm = crypto_create_tfm_node(alg, frontend, node);
                if (!IS_ERR(tfm))
                        return tfm;
 
@@ -542,7 +548,7 @@ err:
 
        return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
+EXPORT_SYMBOL_GPL(crypto_alloc_tfm_node);
 
 /*
  *     crypto_destroy_tfm - Free crypto transform
index 775e713..670bf1a 100644 (file)
@@ -372,7 +372,6 @@ static void crypto_authenc_free(struct aead_instance *inst)
 static int crypto_authenc_create(struct crypto_template *tmpl,
                                 struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct authenc_instance_ctx *ctx;
@@ -381,14 +380,9 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
        struct skcipher_alg *enc;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -423,8 +417,6 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
                     enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = (auth_base->cra_flags |
-                                   enc->base.cra_flags) & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = enc->base.cra_priority * 10 +
                                      auth_base->cra_priority;
        inst->alg.base.cra_blocksize = enc->base.cra_blocksize;
index 149b70d..b60e61b 100644 (file)
@@ -390,7 +390,6 @@ static void crypto_authenc_esn_free(struct aead_instance *inst)
 static int crypto_authenc_esn_create(struct crypto_template *tmpl,
                                     struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct authenc_esn_instance_ctx *ctx;
@@ -399,14 +398,9 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
        struct skcipher_alg *enc;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -437,8 +431,6 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
                     enc->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = (auth_base->cra_flags |
-                                   enc->base.cra_flags) & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = enc->base.cra_priority * 10 +
                                      auth_base->cra_priority;
        inst->alg.base.cra_blocksize = enc->base.cra_blocksize;
index 0ffd8d9..a2ffe60 100644 (file)
@@ -8,7 +8,7 @@
  *
  * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
  * - OpenSSL license   : https://www.openssl.org/source/license.html
- * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ * - Apache 2.0        : https://www.apache.org/licenses/LICENSE-2.0
  *
  * More information about the BLAKE2 hash function can be found at
  * https://blake2.net.
index 9a5783e..0b9f409 100644 (file)
@@ -6,7 +6,7 @@
 
 /*
  * Algorithm Specification
- *  http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
+ *  https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
  */
 
 /*
index d1fb01b..494d709 100644 (file)
@@ -447,7 +447,6 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
                                    const char *ctr_name,
                                    const char *mac_name)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct ccm_instance_ctx *ictx;
@@ -455,14 +454,9 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
        struct hash_alg_common *mac;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
        if (!inst)
@@ -470,7 +464,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
        ictx = aead_instance_ctx(inst);
 
        err = crypto_grab_ahash(&ictx->mac, aead_crypto_instance(inst),
-                               mac_name, 0, CRYPTO_ALG_ASYNC);
+                               mac_name, 0, mask | CRYPTO_ALG_ASYNC);
        if (err)
                goto err_free_inst;
        mac = crypto_spawn_ahash_alg(&ictx->mac);
@@ -507,7 +501,6 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
                     mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = (mac->base.cra_priority +
                                       ctr->base.cra_priority) / 2;
        inst->alg.base.cra_blocksize = 1;
@@ -712,21 +705,15 @@ static void crypto_rfc4309_free(struct aead_instance *inst)
 static int crypto_rfc4309_create(struct crypto_template *tmpl,
                                 struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct crypto_aead_spawn *spawn;
        struct aead_alg *alg;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
@@ -759,7 +746,6 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = 1;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
@@ -878,9 +864,10 @@ static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct shash_instance *inst;
        struct crypto_cipher_spawn *spawn;
        struct crypto_alg *alg;
+       u32 mask;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
        if (err)
                return err;
 
@@ -890,7 +877,7 @@ static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        spawn = shash_instance_ctx(inst);
 
        err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
-                                crypto_attr_alg_name(tb[1]), 0, 0);
+                                crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
        alg = crypto_spawn_cipher_alg(spawn);
index ccaea5c..97bbb13 100644 (file)
@@ -555,7 +555,6 @@ static void chachapoly_free(struct aead_instance *inst)
 static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
                             const char *name, unsigned int ivsize)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct chachapoly_instance_ctx *ctx;
@@ -566,14 +565,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
        if (ivsize > CHACHAPOLY_IV_SIZE)
                return -EINVAL;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -613,8 +607,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
                     poly->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = (chacha->base.cra_flags |
-                                   poly->base.cra_flags) & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = (chacha->base.cra_priority +
                                       poly->base.cra_priority) / 2;
        inst->alg.base.cra_blocksize = 1;
index 143a654..df36be1 100644 (file)
@@ -225,9 +225,10 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct crypto_cipher_spawn *spawn;
        struct crypto_alg *alg;
        unsigned long alignmask;
+       u32 mask;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
        if (err)
                return err;
 
@@ -237,7 +238,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        spawn = shash_instance_ctx(inst);
 
        err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
-                                crypto_attr_alg_name(tb[1]), 0, 0);
+                                crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
        alg = crypto_spawn_cipher_alg(spawn);
index 2832122..a1bea0f 100644 (file)
@@ -191,17 +191,20 @@ static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
        return ictx->queue;
 }
 
-static inline void cryptd_check_internal(struct rtattr **tb, u32 *type,
-                                        u32 *mask)
+static void cryptd_type_and_mask(struct crypto_attr_type *algt,
+                                u32 *type, u32 *mask)
 {
-       struct crypto_attr_type *algt;
+       /*
+        * cryptd is allowed to wrap internal algorithms, but in that case the
+        * resulting cryptd instance will be marked as internal as well.
+        */
+       *type = algt->type & CRYPTO_ALG_INTERNAL;
+       *mask = algt->mask & CRYPTO_ALG_INTERNAL;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return;
+       /* No point in cryptd wrapping an algorithm that's already async. */
+       *mask |= CRYPTO_ALG_ASYNC;
 
-       *type |= algt->type & CRYPTO_ALG_INTERNAL;
-       *mask |= algt->mask & CRYPTO_ALG_INTERNAL;
+       *mask |= crypto_algt_inherited_mask(algt);
 }
 
 static int cryptd_init_instance(struct crypto_instance *inst,
@@ -364,6 +367,7 @@ static void cryptd_skcipher_free(struct skcipher_instance *inst)
 
 static int cryptd_create_skcipher(struct crypto_template *tmpl,
                                  struct rtattr **tb,
+                                 struct crypto_attr_type *algt,
                                  struct cryptd_queue *queue)
 {
        struct skcipherd_instance_ctx *ctx;
@@ -373,10 +377,7 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
        u32 mask;
        int err;
 
-       type = 0;
-       mask = CRYPTO_ALG_ASYNC;
-
-       cryptd_check_internal(tb, &type, &mask);
+       cryptd_type_and_mask(algt, &type, &mask);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -395,9 +396,8 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl,
        if (err)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
-                                  (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
-
+       inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC |
+               (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
        inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
        inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
        inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
@@ -633,16 +633,17 @@ static void cryptd_hash_free(struct ahash_instance *inst)
 }
 
 static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
+                             struct crypto_attr_type *algt,
                              struct cryptd_queue *queue)
 {
        struct hashd_instance_ctx *ctx;
        struct ahash_instance *inst;
        struct shash_alg *alg;
-       u32 type = 0;
-       u32 mask = 0;
+       u32 type;
+       u32 mask;
        int err;
 
-       cryptd_check_internal(tb, &type, &mask);
+       cryptd_type_and_mask(algt, &type, &mask);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -661,10 +662,9 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
        if (err)
                goto err_free_inst;
 
-       inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
-               (alg->base.cra_flags & (CRYPTO_ALG_INTERNAL |
+       inst->alg.halg.base.cra_flags |= CRYPTO_ALG_ASYNC |
+               (alg->base.cra_flags & (CRYPTO_ALG_INTERNAL|
                                        CRYPTO_ALG_OPTIONAL_KEY));
-
        inst->alg.halg.digestsize = alg->digestsize;
        inst->alg.halg.statesize = alg->statesize;
        inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx);
@@ -820,16 +820,17 @@ static void cryptd_aead_free(struct aead_instance *inst)
 
 static int cryptd_create_aead(struct crypto_template *tmpl,
                              struct rtattr **tb,
+                             struct crypto_attr_type *algt,
                              struct cryptd_queue *queue)
 {
        struct aead_instance_ctx *ctx;
        struct aead_instance *inst;
        struct aead_alg *alg;
-       u32 type = 0;
-       u32 mask = CRYPTO_ALG_ASYNC;
+       u32 type;
+       u32 mask;
        int err;
 
-       cryptd_check_internal(tb, &type, &mask);
+       cryptd_type_and_mask(algt, &type, &mask);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -848,8 +849,8 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
        if (err)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
-                                  (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
+       inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC |
+               (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
        inst->alg.base.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
 
        inst->alg.ivsize = crypto_aead_alg_ivsize(alg);
@@ -884,11 +885,11 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 
        switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
        case CRYPTO_ALG_TYPE_SKCIPHER:
-               return cryptd_create_skcipher(tmpl, tb, &queue);
+               return cryptd_create_skcipher(tmpl, tb, algt, &queue);
        case CRYPTO_ALG_TYPE_HASH:
-               return cryptd_create_hash(tmpl, tb, &queue);
+               return cryptd_create_hash(tmpl, tb, algt, &queue);
        case CRYPTO_ALG_TYPE_AEAD:
-               return cryptd_create_aead(tmpl, tb, &queue);
+               return cryptd_create_aead(tmpl, tb, algt, &queue);
        }
 
        return -EINVAL;
index 31ac4ae..c39fcff 100644 (file)
@@ -256,29 +256,20 @@ static void crypto_rfc3686_free(struct skcipher_instance *inst)
 static int crypto_rfc3686_create(struct crypto_template *tmpl,
                                 struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        struct skcipher_instance *inst;
        struct skcipher_alg *alg;
        struct crypto_skcipher_spawn *spawn;
        u32 mask;
-
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
-               return -EINVAL;
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
                return -ENOMEM;
 
-       mask = crypto_requires_sync(algt->type, algt->mask) |
-               crypto_requires_off(algt->type, algt->mask,
-                                   CRYPTO_ALG_NEED_FALLBACK);
-
        spawn = skcipher_instance_ctx(inst);
 
        err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst),
@@ -310,8 +301,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
        inst->alg.base.cra_blocksize = 1;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
-
        inst->alg.ivsize = CTR_RFC3686_IV_SIZE;
        inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
        inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
index 5e005c4..3766d47 100644 (file)
@@ -325,19 +325,13 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
        struct crypto_skcipher_spawn *spawn;
        struct skcipher_instance *inst;
-       struct crypto_attr_type *algt;
        struct skcipher_alg *alg;
        u32 mask;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
@@ -364,7 +358,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
        if (err)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
index 566f624..cd4f320 100644 (file)
@@ -9,6 +9,7 @@
 #include <crypto/internal/kpp.h>
 #include <crypto/kpp.h>
 #include <crypto/dh.h>
+#include <linux/fips.h>
 #include <linux/mpi.h>
 
 struct dh_ctx {
@@ -179,6 +180,43 @@ static int dh_compute_value(struct kpp_request *req)
        if (ret)
                goto err_free_base;
 
+       if (fips_enabled) {
+               /* SP800-56A rev3 5.7.1.1 check: Validation of shared secret */
+               if (req->src) {
+                       MPI pone;
+
+                       /* z <= 1 */
+                       if (mpi_cmp_ui(val, 1) < 1) {
+                               ret = -EBADMSG;
+                               goto err_free_base;
+                       }
+
+                       /* z == p - 1 */
+                       pone = mpi_alloc(0);
+
+                       if (!pone) {
+                               ret = -ENOMEM;
+                               goto err_free_base;
+                       }
+
+                       ret = mpi_sub_ui(pone, ctx->p, 1);
+                       if (!ret && !mpi_cmp(pone, val))
+                               ret = -EBADMSG;
+
+                       mpi_free(pone);
+
+                       if (ret)
+                               goto err_free_base;
+
+               /* SP800-56A rev 3 5.6.2.1.3 key check */
+               } else {
+                       if (dh_is_pubkey_valid(ctx, val)) {
+                               ret = -EAGAIN;
+                               goto err_free_val;
+                       }
+               }
+       }
+
        ret = mpi_write_to_sgl(val, req->dst, req->dst_len, &sign);
        if (ret)
                goto err_free_base;
index 02d35be..8acf843 100644 (file)
@@ -940,7 +940,7 @@ static bool ecc_point_is_zero(const struct ecc_point *point)
 }
 
 /* Point multiplication algorithm using Montgomery's ladder with co-Z
- * coordinates. From http://eprint.iacr.org/2011/338.pdf
+ * coordinates. From https://eprint.iacr.org/2011/338.pdf
  */
 
 /* Double in place */
@@ -1404,7 +1404,9 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
        }
 
        ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits);
-       if (ecc_point_is_zero(pk)) {
+
+       /* SP800-56A rev 3 5.6.2.1.3 key check */
+       if (ecc_is_pubkey_valid_full(curve, pk)) {
                ret = -EAGAIN;
                goto err_free_point;
        }
@@ -1452,6 +1454,33 @@ int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
 }
 EXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
 
+/* SP800-56A section 5.6.2.3.3 full verification */
+int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
+                            struct ecc_point *pk)
+{
+       struct ecc_point *nQ;
+
+       /* Checks 1 through 3 */
+       int ret = ecc_is_pubkey_valid_partial(curve, pk);
+
+       if (ret)
+               return ret;
+
+       /* Check 4: Verify that nQ is the zero point. */
+       nQ = ecc_alloc_point(pk->ndigits);
+       if (!nQ)
+               return -ENOMEM;
+
+       ecc_point_mult(nQ, pk, curve->n, NULL, curve, pk->ndigits);
+       if (!ecc_point_is_zero(nQ))
+               ret = -EINVAL;
+
+       ecc_free_point(nQ);
+
+       return ret;
+}
+EXPORT_SYMBOL(ecc_is_pubkey_valid_full);
+
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
                              const u64 *private_key, const u64 *public_key,
                              u64 *secret)
@@ -1495,11 +1524,16 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 
        ecc_point_mult(product, pk, priv, rand_z, curve, ndigits);
 
-       ecc_swap_digits(product->x, secret, ndigits);
-
-       if (ecc_point_is_zero(product))
+       if (ecc_point_is_zero(product)) {
                ret = -EFAULT;
+               goto err_validity;
+       }
+
+       ecc_swap_digits(product->x, secret, ndigits);
 
+err_validity:
+       memzero_explicit(priv, sizeof(priv));
+       memzero_explicit(rand_z, sizeof(rand_z));
        ecc_free_point(product);
 err_alloc_product:
        ecc_free_point(pk);
index ab0eb70..d4e546b 100644 (file)
@@ -147,6 +147,20 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
                                struct ecc_point *pk);
 
+/**
+ * ecc_is_pubkey_valid_full() - Full public key validation
+ *
+ * @curve:             elliptic curve domain parameters
+ * @pk:                        public key as a point
+ *
+ * Valdiate public key according to SP800-56A section 5.6.2.3.3 ECC Full
+ * Public-Key Validation Routine.
+ *
+ * Return: 0 if validation is successful, -EINVAL if validation is failed.
+ */
+int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
+                            struct ecc_point *pk);
+
 /**
  * vli_is_zero() - Determine is vli is zero
  *
index 4a2f02b..6968666 100644 (file)
@@ -115,7 +115,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
        struct aead_instance *inst;
        int err;
 
-       inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+       inst = aead_geniv_alloc(tmpl, tb);
 
        if (IS_ERR(inst))
                return PTR_ERR(inst);
index a7f45db..d012be2 100644 (file)
@@ -466,7 +466,7 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
                return PTR_ERR(shash_name);
 
        type = algt->type & algt->mask;
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       mask = crypto_algt_inherited_mask(algt);
 
        switch (type) {
        case CRYPTO_ALG_TYPE_SKCIPHER:
@@ -525,7 +525,7 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
        /* Synchronous hash, e.g., "sha256" */
        _hash_alg = crypto_alg_mod_lookup(shash_name,
                                          CRYPTO_ALG_TYPE_SHASH,
-                                         CRYPTO_ALG_TYPE_MASK);
+                                         CRYPTO_ALG_TYPE_MASK | mask);
        if (IS_ERR(_hash_alg)) {
                err = PTR_ERR(_hash_alg);
                goto out_drop_skcipher;
@@ -557,7 +557,12 @@ static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb)
                     hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto out_free_hash;
 
-       base->cra_flags         = block_base->cra_flags & CRYPTO_ALG_ASYNC;
+       /*
+        * hash_alg wasn't gotten via crypto_grab*(), so we need to inherit its
+        * flags manually.
+        */
+       base->cra_flags        |= (hash_alg->base.cra_flags &
+                                  CRYPTO_ALG_INHERITED_FLAGS);
        base->cra_blocksize     = block_base->cra_blocksize;
        base->cra_ctxsize       = sizeof(struct essiv_tfm_ctx);
        base->cra_alignmask     = block_base->cra_alignmask;
index 0103d28..3a36a95 100644 (file)
@@ -578,7 +578,6 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
                                    const char *ctr_name,
                                    const char *ghash_name)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct gcm_instance_ctx *ctx;
@@ -586,14 +585,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
        struct hash_alg_common *ghash;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -635,8 +629,6 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = (ghash->base.cra_flags |
-                                   ctr->base.cra_flags) & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = (ghash->base.cra_priority +
                                       ctr->base.cra_priority) / 2;
        inst->alg.base.cra_blocksize = 1;
@@ -835,21 +827,15 @@ static void crypto_rfc4106_free(struct aead_instance *inst)
 static int crypto_rfc4106_create(struct crypto_template *tmpl,
                                 struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct crypto_aead_spawn *spawn;
        struct aead_alg *alg;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
@@ -882,7 +868,6 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = 1;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
@@ -1057,21 +1042,15 @@ static void crypto_rfc4543_free(struct aead_instance *inst)
 static int crypto_rfc4543_create(struct crypto_template *tmpl,
                                struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct aead_instance *inst;
        struct aead_alg *alg;
        struct crypto_rfc4543_instance_ctx *ctx;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -1104,7 +1083,6 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = 1;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
index 6a90c52..bee4621 100644 (file)
@@ -39,22 +39,19 @@ static void aead_geniv_free(struct aead_instance *inst)
 }
 
 struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
-                                      struct rtattr **tb, u32 type, u32 mask)
+                                      struct rtattr **tb)
 {
        struct crypto_aead_spawn *spawn;
-       struct crypto_attr_type *algt;
        struct aead_instance *inst;
        struct aead_alg *alg;
        unsigned int ivsize;
        unsigned int maxauthsize;
+       u32 mask;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return ERR_CAST(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return ERR_PTR(-EINVAL);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask);
+       if (err)
+               return ERR_PTR(err);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
@@ -62,11 +59,8 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
 
        spawn = aead_instance_ctx(inst);
 
-       /* Ignore async algorithms if necessary. */
-       mask |= crypto_requires_sync(algt->type, algt->mask);
-
        err = crypto_grab_aead(spawn, aead_crypto_instance(inst),
-                              crypto_attr_alg_name(tb[1]), type, mask);
+                              crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
 
@@ -89,7 +83,6 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
index e38bfb9..25856aa 100644 (file)
@@ -168,11 +168,12 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct crypto_shash_spawn *spawn;
        struct crypto_alg *alg;
        struct shash_alg *salg;
+       u32 mask;
        int err;
        int ds;
        int ss;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
        if (err)
                return err;
 
@@ -182,7 +183,7 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        spawn = shash_instance_ctx(inst);
 
        err = crypto_grab_shash(spawn, shash_crypto_instance(inst),
-                               crypto_attr_alg_name(tb[1]), 0, 0);
+                               crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
        salg = crypto_spawn_shash_alg(spawn);
index ff06a3b..1b92a5a 100644 (file)
@@ -68,13 +68,28 @@ void crypto_remove_final(struct list_head *list);
 void crypto_shoot_alg(struct crypto_alg *alg);
 struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
                                      u32 mask);
-void *crypto_create_tfm(struct crypto_alg *alg,
-                       const struct crypto_type *frontend);
+void *crypto_create_tfm_node(struct crypto_alg *alg,
+                       const struct crypto_type *frontend, int node);
+
+static inline void *crypto_create_tfm(struct crypto_alg *alg,
+                       const struct crypto_type *frontend)
+{
+       return crypto_create_tfm_node(alg, frontend, NUMA_NO_NODE);
+}
+
 struct crypto_alg *crypto_find_alg(const char *alg_name,
                                   const struct crypto_type *frontend,
                                   u32 type, u32 mask);
-void *crypto_alloc_tfm(const char *alg_name,
-                      const struct crypto_type *frontend, u32 type, u32 mask);
+
+void *crypto_alloc_tfm_node(const char *alg_name,
+                      const struct crypto_type *frontend, u32 type, u32 mask,
+                      int node);
+
+static inline void *crypto_alloc_tfm(const char *alg_name,
+                      const struct crypto_type *frontend, u32 type, u32 mask)
+{
+       return crypto_alloc_tfm_node(alg_name, frontend, type, mask, NUMA_NO_NODE);
+}
 
 int crypto_probing_notify(unsigned long val, void *v);
 
index 57f4a1a..6e147c4 100644 (file)
@@ -7,7 +7,7 @@
  * Design
  * ======
  *
- * See http://www.chronox.de/jent.html
+ * See https://www.chronox.de/jent.html
  *
  * License
  * =======
@@ -47,7 +47,7 @@
 
 /*
  * This Jitterentropy RNG is based on the jitterentropy library
- * version 2.2.0 provided at http://www.chronox.de/jent.html
+ * version 2.2.0 provided at https://www.chronox.de/jent.html
  */
 
 #ifdef __OPTIMIZE__
index 5b07a7c..bcf09fb 100644 (file)
@@ -9,7 +9,7 @@
  */
 /* This implementation is checked against the test vectors in the above
  * document and by a test vector provided by Ken Buchanan at
- * http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html
+ * https://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html
  *
  * The test vectors are included in the testing module tcrypt.[ch] */
 
@@ -27,7 +27,7 @@
 
 #define LRW_BLOCK_SIZE 16
 
-struct priv {
+struct lrw_tfm_ctx {
        struct crypto_skcipher *child;
 
        /*
@@ -49,12 +49,12 @@ struct priv {
        be128 mulinc[128];
 };
 
-struct rctx {
+struct lrw_request_ctx {
        be128 t;
        struct skcipher_request subreq;
 };
 
-static inline void setbit128_bbe(void *b, int bit)
+static inline void lrw_setbit128_bbe(void *b, int bit)
 {
        __set_bit(bit ^ (0x80 -
 #ifdef __BIG_ENDIAN
@@ -65,10 +65,10 @@ static inline void setbit128_bbe(void *b, int bit)
                        ), b);
 }
 
-static int setkey(struct crypto_skcipher *parent, const u8 *key,
-                 unsigned int keylen)
+static int lrw_setkey(struct crypto_skcipher *parent, const u8 *key,
+                     unsigned int keylen)
 {
-       struct priv *ctx = crypto_skcipher_ctx(parent);
+       struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(parent);
        struct crypto_skcipher *child = ctx->child;
        int err, bsize = LRW_BLOCK_SIZE;
        const u8 *tweak = key + keylen - bsize;
@@ -92,7 +92,7 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
 
        /* initialize optimization table */
        for (i = 0; i < 128; i++) {
-               setbit128_bbe(&tmp, i);
+               lrw_setbit128_bbe(&tmp, i);
                ctx->mulinc[i] = tmp;
                gf128mul_64k_bbe(&ctx->mulinc[i], ctx->table);
        }
@@ -108,10 +108,10 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
  * For example:
  *
  * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
- * int i = next_index(&counter);
+ * int i = lrw_next_index(&counter);
  * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
  */
-static int next_index(u32 *counter)
+static int lrw_next_index(u32 *counter)
 {
        int i, res = 0;
 
@@ -135,14 +135,14 @@ static int next_index(u32 *counter)
  * We compute the tweak masks twice (both before and after the ECB encryption or
  * decryption) to avoid having to allocate a temporary buffer and/or make
  * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
- * just doing the next_index() calls again.
+ * just doing the lrw_next_index() calls again.
  */
-static int xor_tweak(struct skcipher_request *req, bool second_pass)
+static int lrw_xor_tweak(struct skcipher_request *req, bool second_pass)
 {
        const int bs = LRW_BLOCK_SIZE;
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-       struct priv *ctx = crypto_skcipher_ctx(tfm);
-       struct rctx *rctx = skcipher_request_ctx(req);
+       const struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct lrw_request_ctx *rctx = skcipher_request_ctx(req);
        be128 t = rctx->t;
        struct skcipher_walk w;
        __be32 *iv;
@@ -178,7 +178,8 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass)
 
                        /* T <- I*Key2, using the optimization
                         * discussed in the specification */
-                       be128_xor(&t, &t, &ctx->mulinc[next_index(counter)]);
+                       be128_xor(&t, &t,
+                                 &ctx->mulinc[lrw_next_index(counter)]);
                } while ((avail -= bs) >= bs);
 
                if (second_pass && w.nbytes == w.total) {
@@ -194,38 +195,40 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass)
        return err;
 }
 
-static int xor_tweak_pre(struct skcipher_request *req)
+static int lrw_xor_tweak_pre(struct skcipher_request *req)
 {
-       return xor_tweak(req, false);
+       return lrw_xor_tweak(req, false);
 }
 
-static int xor_tweak_post(struct skcipher_request *req)
+static int lrw_xor_tweak_post(struct skcipher_request *req)
 {
-       return xor_tweak(req, true);
+       return lrw_xor_tweak(req, true);
 }
 
-static void crypt_done(struct crypto_async_request *areq, int err)
+static void lrw_crypt_done(struct crypto_async_request *areq, int err)
 {
        struct skcipher_request *req = areq->data;
 
        if (!err) {
-               struct rctx *rctx = skcipher_request_ctx(req);
+               struct lrw_request_ctx *rctx = skcipher_request_ctx(req);
 
                rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-               err = xor_tweak_post(req);
+               err = lrw_xor_tweak_post(req);
        }
 
        skcipher_request_complete(req, err);
 }
 
-static void init_crypt(struct skcipher_request *req)
+static void lrw_init_crypt(struct skcipher_request *req)
 {
-       struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
-       struct rctx *rctx = skcipher_request_ctx(req);
+       const struct lrw_tfm_ctx *ctx =
+               crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+       struct lrw_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
 
        skcipher_request_set_tfm(subreq, ctx->child);
-       skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
+       skcipher_request_set_callback(subreq, req->base.flags, lrw_crypt_done,
+                                     req);
        /* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */
        skcipher_request_set_crypt(subreq, req->dst, req->dst,
                                   req->cryptlen, req->iv);
@@ -237,33 +240,33 @@ static void init_crypt(struct skcipher_request *req)
        gf128mul_64k_bbe(&rctx->t, ctx->table);
 }
 
-static int encrypt(struct skcipher_request *req)
+static int lrw_encrypt(struct skcipher_request *req)
 {
-       struct rctx *rctx = skcipher_request_ctx(req);
+       struct lrw_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
 
-       init_crypt(req);
-       return xor_tweak_pre(req) ?:
+       lrw_init_crypt(req);
+       return lrw_xor_tweak_pre(req) ?:
                crypto_skcipher_encrypt(subreq) ?:
-               xor_tweak_post(req);
+               lrw_xor_tweak_post(req);
 }
 
-static int decrypt(struct skcipher_request *req)
+static int lrw_decrypt(struct skcipher_request *req)
 {
-       struct rctx *rctx = skcipher_request_ctx(req);
+       struct lrw_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
 
-       init_crypt(req);
-       return xor_tweak_pre(req) ?:
+       lrw_init_crypt(req);
+       return lrw_xor_tweak_pre(req) ?:
                crypto_skcipher_decrypt(subreq) ?:
-               xor_tweak_post(req);
+               lrw_xor_tweak_post(req);
 }
 
-static int init_tfm(struct crypto_skcipher *tfm)
+static int lrw_init_tfm(struct crypto_skcipher *tfm)
 {
        struct skcipher_instance *inst = skcipher_alg_instance(tfm);
        struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
-       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct crypto_skcipher *cipher;
 
        cipher = crypto_spawn_skcipher(spawn);
@@ -273,45 +276,39 @@ static int init_tfm(struct crypto_skcipher *tfm)
        ctx->child = cipher;
 
        crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) +
-                                        sizeof(struct rctx));
+                                        sizeof(struct lrw_request_ctx));
 
        return 0;
 }
 
-static void exit_tfm(struct crypto_skcipher *tfm)
+static void lrw_exit_tfm(struct crypto_skcipher *tfm)
 {
-       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        if (ctx->table)
                gf128mul_free_64k(ctx->table);
        crypto_free_skcipher(ctx->child);
 }
 
-static void crypto_lrw_free(struct skcipher_instance *inst)
+static void lrw_free_instance(struct skcipher_instance *inst)
 {
        crypto_drop_skcipher(skcipher_instance_ctx(inst));
        kfree(inst);
 }
 
-static int create(struct crypto_template *tmpl, struct rtattr **tb)
+static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
        struct crypto_skcipher_spawn *spawn;
        struct skcipher_instance *inst;
-       struct crypto_attr_type *algt;
        struct skcipher_alg *alg;
        const char *cipher_name;
        char ecb_name[CRYPTO_MAX_ALG_NAME];
        u32 mask;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+       if (err)
+               return err;
 
        cipher_name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(cipher_name))
@@ -379,7 +376,6 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        } else
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
@@ -391,43 +387,43 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) +
                                LRW_BLOCK_SIZE;
 
-       inst->alg.base.cra_ctxsize = sizeof(struct priv);
+       inst->alg.base.cra_ctxsize = sizeof(struct lrw_tfm_ctx);
 
-       inst->alg.init = init_tfm;
-       inst->alg.exit = exit_tfm;
+       inst->alg.init = lrw_init_tfm;
+       inst->alg.exit = lrw_exit_tfm;
 
-       inst->alg.setkey = setkey;
-       inst->alg.encrypt = encrypt;
-       inst->alg.decrypt = decrypt;
+       inst->alg.setkey = lrw_setkey;
+       inst->alg.encrypt = lrw_encrypt;
+       inst->alg.decrypt = lrw_decrypt;
 
-       inst->free = crypto_lrw_free;
+       inst->free = lrw_free_instance;
 
        err = skcipher_register_instance(tmpl, inst);
        if (err) {
 err_free_inst:
-               crypto_lrw_free(inst);
+               lrw_free_instance(inst);
        }
        return err;
 }
 
-static struct crypto_template crypto_tmpl = {
+static struct crypto_template lrw_tmpl = {
        .name = "lrw",
-       .create = create,
+       .create = lrw_create,
        .module = THIS_MODULE,
 };
 
-static int __init crypto_module_init(void)
+static int __init lrw_module_init(void)
 {
-       return crypto_register_template(&crypto_tmpl);
+       return crypto_register_template(&lrw_tmpl);
 }
 
-static void __exit crypto_module_exit(void)
+static void __exit lrw_module_exit(void)
 {
-       crypto_unregister_template(&crypto_tmpl);
+       crypto_unregister_template(&lrw_tmpl);
 }
 
-subsys_initcall(crypto_module_init);
-module_exit(crypto_module_exit);
+subsys_initcall(lrw_module_init);
+module_exit(lrw_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LRW block cipher mode");
index 8bddc65..d569c7e 100644 (file)
@@ -226,18 +226,14 @@ static int pcrypt_init_instance(struct crypto_instance *inst,
 }
 
 static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
-                             u32 type, u32 mask)
+                             struct crypto_attr_type *algt)
 {
        struct pcrypt_instance_ctx *ctx;
-       struct crypto_attr_type *algt;
        struct aead_instance *inst;
        struct aead_alg *alg;
+       u32 mask = crypto_algt_inherited_mask(algt);
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
                return -ENOMEM;
@@ -254,7 +250,7 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
                goto err_free_inst;
 
        err = crypto_grab_aead(&ctx->spawn, aead_crypto_instance(inst),
-                              crypto_attr_alg_name(tb[1]), 0, 0);
+                              crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
 
@@ -263,7 +259,7 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
        if (err)
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC;
 
        inst->alg.ivsize = crypto_aead_alg_ivsize(alg);
        inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
@@ -298,7 +294,7 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
 
        switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
        case CRYPTO_ALG_TYPE_AEAD:
-               return pcrypt_create_aead(tmpl, tb, algt->type, algt->mask);
+               return pcrypt_create_aead(tmpl, tb, algt);
        }
 
        return -EINVAL;
@@ -320,7 +316,7 @@ static int pcrypt_init_padata(struct padata_instance **pinst, const char *name)
 {
        int ret = -ENOMEM;
 
-       *pinst = padata_alloc_possible(name);
+       *pinst = padata_alloc(name);
        if (!*pinst)
                return ret;
 
@@ -331,12 +327,6 @@ static int pcrypt_init_padata(struct padata_instance **pinst, const char *name)
        return ret;
 }
 
-static void pcrypt_fini_padata(struct padata_instance *pinst)
-{
-       padata_stop(pinst);
-       padata_free(pinst);
-}
-
 static struct crypto_template pcrypt_tmpl = {
        .name = "pcrypt",
        .create = pcrypt_create,
@@ -359,13 +349,10 @@ static int __init pcrypt_init(void)
        if (err)
                goto err_deinit_pencrypt;
 
-       padata_start(pencrypt);
-       padata_start(pdecrypt);
-
        return crypto_register_template(&pcrypt_tmpl);
 
 err_deinit_pencrypt:
-       pcrypt_fini_padata(pencrypt);
+       padata_free(pencrypt);
 err_unreg_kset:
        kset_unregister(pcrypt_kset);
 err:
@@ -376,8 +363,8 @@ static void __exit pcrypt_exit(void)
 {
        crypto_unregister_template(&pcrypt_tmpl);
 
-       pcrypt_fini_padata(pencrypt);
-       pcrypt_fini_padata(pdecrypt);
+       padata_free(pencrypt);
+       padata_free(pdecrypt);
 
        kset_unregister(pcrypt_kset);
 }
index d31031d..4983b2b 100644 (file)
@@ -596,7 +596,6 @@ static void pkcs1pad_free(struct akcipher_instance *inst)
 
 static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct akcipher_instance *inst;
        struct pkcs1pad_inst_ctx *ctx;
@@ -604,14 +603,9 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
        const char *hash_name;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AKCIPHER) & algt->mask)
-               return -EINVAL;
-
-       mask = crypto_requires_sync(algt->type, algt->mask);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AKCIPHER, &mask);
+       if (err)
+               return err;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -658,7 +652,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
                        goto err_free_inst;
        }
 
-       inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = rsa_alg->base.cra_priority;
        inst->alg.base.cra_ctxsize = sizeof(struct pkcs1pad_ctx);
 
index c81a444..3418869 100644 (file)
@@ -9,8 +9,8 @@
  * Salsa20 is a stream cipher candidate in eSTREAM, the ECRYPT Stream
  * Cipher Project. It is designed by Daniel J. Bernstein <djb@cr.yp.to>.
  * More information about eSTREAM and Salsa20 can be found here:
- *   http://www.ecrypt.eu.org/stream/
- *   http://cr.yp.to/snuffle.html
+ *   https://www.ecrypt.eu.org/stream/
+ *   https://cr.yp.to/snuffle.html
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
index f124b9b..23e22d8 100644 (file)
@@ -138,7 +138,7 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct aead_instance *inst;
        int err;
 
-       inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+       inst = aead_geniv_alloc(tmpl, tb);
 
        if (IS_ERR(inst))
                return PTR_ERR(inst);
@@ -164,23 +164,9 @@ free_inst:
        return err;
 }
 
-static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb)
-{
-       struct crypto_attr_type *algt;
-
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
-               return -EINVAL;
-
-       return seqiv_aead_create(tmpl, tb);
-}
-
 static struct crypto_template seqiv_tmpl = {
        .name = "seqiv",
-       .create = seqiv_create,
+       .create = seqiv_aead_create,
        .module = THIS_MODULE,
 };
 
index 44e263e..3e40699 100644 (file)
@@ -3,7 +3,7 @@
  * Cryptographic API.
  *
  * SHA-3, as specified in
- * http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
+ * https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
  *
  * SHA-3 code by Jeff Garzik <jeff@garzik.org>
  *               Ard Biesheuvel <ard.biesheuvel@linaro.org>
index 56885af..edaa479 100644 (file)
@@ -171,7 +171,8 @@ struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
                     drvname) >= CRYPTO_MAX_ALG_NAME)
                goto out_free_salg;
 
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC |
+               (ialg->base.cra_flags & CRYPTO_ALG_INHERITED_FLAGS);
        alg->base.cra_priority = ialg->base.cra_priority;
        alg->base.cra_blocksize = ialg->base.cra_blocksize;
        alg->base.cra_alignmask = ialg->base.cra_alignmask;
@@ -417,7 +418,8 @@ struct simd_aead_alg *simd_aead_create_compat(const char *algname,
                     drvname) >= CRYPTO_MAX_ALG_NAME)
                goto out_free_salg;
 
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC |
+               (ialg->base.cra_flags & CRYPTO_ALG_INHERITED_FLAGS);
        alg->base.cra_priority = ialg->base.cra_priority;
        alg->base.cra_blocksize = ialg->base.cra_blocksize;
        alg->base.cra_alignmask = ialg->base.cra_alignmask;
index 7221def..467af52 100644 (file)
@@ -934,22 +934,15 @@ static void skcipher_free_instance_simple(struct skcipher_instance *inst)
 struct skcipher_instance *skcipher_alloc_instance_simple(
        struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_attr_type *algt;
        u32 mask;
        struct skcipher_instance *inst;
        struct crypto_cipher_spawn *spawn;
        struct crypto_alg *cipher_alg;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return ERR_CAST(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
-               return ERR_PTR(-EINVAL);
-
-       mask = crypto_requires_off(algt->type, algt->mask,
-                                  CRYPTO_ALG_NEED_FALLBACK);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+       if (err)
+               return ERR_PTR(err);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
index d299839..b9a2d73 100644 (file)
@@ -3916,7 +3916,7 @@ static const struct hash_testvec hmac_sm3_tv_template[] = {
 };
 
 /*
- * SHA1 test vectors  from from FIPS PUB 180-1
+ * SHA1 test vectors from FIPS PUB 180-1
  * Long vector from CAVS 5.0
  */
 static const struct hash_testvec sha1_tv_template[] = {
@@ -4103,7 +4103,7 @@ static const struct hash_testvec sha1_tv_template[] = {
 
 
 /*
- * SHA224 test vectors from from FIPS PUB 180-2
+ * SHA224 test vectors from FIPS PUB 180-2
  */
 static const struct hash_testvec sha224_tv_template[] = {
        {
@@ -4273,7 +4273,7 @@ static const struct hash_testvec sha224_tv_template[] = {
 };
 
 /*
- * SHA256 test vectors from from NIST
+ * SHA256 test vectors from NIST
  */
 static const struct hash_testvec sha256_tv_template[] = {
        {
@@ -4442,7 +4442,7 @@ static const struct hash_testvec sha256_tv_template[] = {
 };
 
 /*
- * SHA384 test vectors from from NIST and kerneli
+ * SHA384 test vectors from NIST and kerneli
  */
 static const struct hash_testvec sha384_tv_template[] = {
        {
@@ -4632,7 +4632,7 @@ static const struct hash_testvec sha384_tv_template[] = {
 };
 
 /*
- * SHA512 test vectors from from NIST and kerneli
+ * SHA512 test vectors from NIST and kerneli
  */
 static const struct hash_testvec sha512_tv_template[] = {
        {
index 2d90683..9b565d1 100644 (file)
@@ -620,9 +620,10 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct shash_instance *inst;
        struct crypto_cipher_spawn *spawn;
        struct crypto_alg *alg;
+       u32 mask;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
        if (err)
                return err;
 
@@ -632,7 +633,7 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        spawn = shash_instance_ctx(inst);
 
        err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
-                                crypto_attr_alg_name(tb[1]), 0, 0);
+                                crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
        alg = crypto_spawn_cipher_alg(spawn);
index 598ec88..af3b7eb 100644 (file)
@@ -191,9 +191,10 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
        struct crypto_cipher_spawn *spawn;
        struct crypto_alg *alg;
        unsigned long alignmask;
+       u32 mask;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
        if (err)
                return err;
 
@@ -203,7 +204,7 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
        spawn = shash_instance_ctx(inst);
 
        err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
-                                crypto_attr_alg_name(tb[1]), 0, 0);
+                                crypto_attr_alg_name(tb[1]), 0, mask);
        if (err)
                goto err_free_inst;
        alg = crypto_spawn_cipher_alg(spawn);
index 3565f3b..ad45b00 100644 (file)
@@ -20,7 +20,7 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 
-struct priv {
+struct xts_tfm_ctx {
        struct crypto_skcipher *child;
        struct crypto_cipher *tweak;
 };
@@ -30,17 +30,17 @@ struct xts_instance_ctx {
        char name[CRYPTO_MAX_ALG_NAME];
 };
 
-struct rctx {
+struct xts_request_ctx {
        le128 t;
        struct scatterlist *tail;
        struct scatterlist sg[2];
        struct skcipher_request subreq;
 };
 
-static int setkey(struct crypto_skcipher *parent, const u8 *key,
-                 unsigned int keylen)
+static int xts_setkey(struct crypto_skcipher *parent, const u8 *key,
+                     unsigned int keylen)
 {
-       struct priv *ctx = crypto_skcipher_ctx(parent);
+       struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(parent);
        struct crypto_skcipher *child;
        struct crypto_cipher *tweak;
        int err;
@@ -78,9 +78,10 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
  * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
  * just doing the gf128mul_x_ble() calls again.
  */
-static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
+static int xts_xor_tweak(struct skcipher_request *req, bool second_pass,
+                        bool enc)
 {
-       struct rctx *rctx = skcipher_request_ctx(req);
+       struct xts_request_ctx *rctx = skcipher_request_ctx(req);
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
        const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
        const int bs = XTS_BLOCK_SIZE;
@@ -128,23 +129,23 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
        return err;
 }
 
-static int xor_tweak_pre(struct skcipher_request *req, bool enc)
+static int xts_xor_tweak_pre(struct skcipher_request *req, bool enc)
 {
-       return xor_tweak(req, false, enc);
+       return xts_xor_tweak(req, false, enc);
 }
 
-static int xor_tweak_post(struct skcipher_request *req, bool enc)
+static int xts_xor_tweak_post(struct skcipher_request *req, bool enc)
 {
-       return xor_tweak(req, true, enc);
+       return xts_xor_tweak(req, true, enc);
 }
 
-static void cts_done(struct crypto_async_request *areq, int err)
+static void xts_cts_done(struct crypto_async_request *areq, int err)
 {
        struct skcipher_request *req = areq->data;
        le128 b;
 
        if (!err) {
-               struct rctx *rctx = skcipher_request_ctx(req);
+               struct xts_request_ctx *rctx = skcipher_request_ctx(req);
 
                scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 0);
                le128_xor(&b, &rctx->t, &b);
@@ -154,12 +155,13 @@ static void cts_done(struct crypto_async_request *areq, int err)
        skcipher_request_complete(req, err);
 }
 
-static int cts_final(struct skcipher_request *req,
-                    int (*crypt)(struct skcipher_request *req))
+static int xts_cts_final(struct skcipher_request *req,
+                        int (*crypt)(struct skcipher_request *req))
 {
-       struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+       const struct xts_tfm_ctx *ctx =
+               crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
        int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
-       struct rctx *rctx = skcipher_request_ctx(req);
+       struct xts_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
        int tail = req->cryptlen % XTS_BLOCK_SIZE;
        le128 b[2];
@@ -169,7 +171,7 @@ static int cts_final(struct skcipher_request *req,
                                      offset - XTS_BLOCK_SIZE);
 
        scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0);
-       memcpy(b + 1, b, tail);
+       b[1] = b[0];
        scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
 
        le128_xor(b, &rctx->t, b);
@@ -177,7 +179,8 @@ static int cts_final(struct skcipher_request *req,
        scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE + tail, 1);
 
        skcipher_request_set_tfm(subreq, ctx->child);
-       skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
+       skcipher_request_set_callback(subreq, req->base.flags, xts_cts_done,
+                                     req);
        skcipher_request_set_crypt(subreq, rctx->tail, rctx->tail,
                                   XTS_BLOCK_SIZE, NULL);
 
@@ -192,18 +195,18 @@ static int cts_final(struct skcipher_request *req,
        return 0;
 }
 
-static void encrypt_done(struct crypto_async_request *areq, int err)
+static void xts_encrypt_done(struct crypto_async_request *areq, int err)
 {
        struct skcipher_request *req = areq->data;
 
        if (!err) {
-               struct rctx *rctx = skcipher_request_ctx(req);
+               struct xts_request_ctx *rctx = skcipher_request_ctx(req);
 
                rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-               err = xor_tweak_post(req, true);
+               err = xts_xor_tweak_post(req, true);
 
                if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
-                       err = cts_final(req, crypto_skcipher_encrypt);
+                       err = xts_cts_final(req, crypto_skcipher_encrypt);
                        if (err == -EINPROGRESS)
                                return;
                }
@@ -212,18 +215,18 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
        skcipher_request_complete(req, err);
 }
 
-static void decrypt_done(struct crypto_async_request *areq, int err)
+static void xts_decrypt_done(struct crypto_async_request *areq, int err)
 {
        struct skcipher_request *req = areq->data;
 
        if (!err) {
-               struct rctx *rctx = skcipher_request_ctx(req);
+               struct xts_request_ctx *rctx = skcipher_request_ctx(req);
 
                rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-               err = xor_tweak_post(req, false);
+               err = xts_xor_tweak_post(req, false);
 
                if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
-                       err = cts_final(req, crypto_skcipher_decrypt);
+                       err = xts_cts_final(req, crypto_skcipher_decrypt);
                        if (err == -EINPROGRESS)
                                return;
                }
@@ -232,10 +235,12 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
        skcipher_request_complete(req, err);
 }
 
-static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
+static int xts_init_crypt(struct skcipher_request *req,
+                         crypto_completion_t compl)
 {
-       struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
-       struct rctx *rctx = skcipher_request_ctx(req);
+       const struct xts_tfm_ctx *ctx =
+               crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+       struct xts_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
 
        if (req->cryptlen < XTS_BLOCK_SIZE)
@@ -252,45 +257,45 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
        return 0;
 }
 
-static int encrypt(struct skcipher_request *req)
+static int xts_encrypt(struct skcipher_request *req)
 {
-       struct rctx *rctx = skcipher_request_ctx(req);
+       struct xts_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
        int err;
 
-       err = init_crypt(req, encrypt_done) ?:
-             xor_tweak_pre(req, true) ?:
+       err = xts_init_crypt(req, xts_encrypt_done) ?:
+             xts_xor_tweak_pre(req, true) ?:
              crypto_skcipher_encrypt(subreq) ?:
-             xor_tweak_post(req, true);
+             xts_xor_tweak_post(req, true);
 
        if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
                return err;
 
-       return cts_final(req, crypto_skcipher_encrypt);
+       return xts_cts_final(req, crypto_skcipher_encrypt);
 }
 
-static int decrypt(struct skcipher_request *req)
+static int xts_decrypt(struct skcipher_request *req)
 {
-       struct rctx *rctx = skcipher_request_ctx(req);
+       struct xts_request_ctx *rctx = skcipher_request_ctx(req);
        struct skcipher_request *subreq = &rctx->subreq;
        int err;
 
-       err = init_crypt(req, decrypt_done) ?:
-             xor_tweak_pre(req, false) ?:
+       err = xts_init_crypt(req, xts_decrypt_done) ?:
+             xts_xor_tweak_pre(req, false) ?:
              crypto_skcipher_decrypt(subreq) ?:
-             xor_tweak_post(req, false);
+             xts_xor_tweak_post(req, false);
 
        if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
                return err;
 
-       return cts_final(req, crypto_skcipher_decrypt);
+       return xts_cts_final(req, crypto_skcipher_decrypt);
 }
 
-static int init_tfm(struct crypto_skcipher *tfm)
+static int xts_init_tfm(struct crypto_skcipher *tfm)
 {
        struct skcipher_instance *inst = skcipher_alg_instance(tfm);
        struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst);
-       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct crypto_skcipher *child;
        struct crypto_cipher *tweak;
 
@@ -309,41 +314,39 @@ static int init_tfm(struct crypto_skcipher *tfm)
        ctx->tweak = tweak;
 
        crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) +
-                                        sizeof(struct rctx));
+                                        sizeof(struct xts_request_ctx));
 
        return 0;
 }
 
-static void exit_tfm(struct crypto_skcipher *tfm)
+static void xts_exit_tfm(struct crypto_skcipher *tfm)
 {
-       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        crypto_free_skcipher(ctx->child);
        crypto_free_cipher(ctx->tweak);
 }
 
-static void crypto_xts_free(struct skcipher_instance *inst)
+static void xts_free_instance(struct skcipher_instance *inst)
 {
-       crypto_drop_skcipher(skcipher_instance_ctx(inst));
+       struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst);
+
+       crypto_drop_skcipher(&ictx->spawn);
        kfree(inst);
 }
 
-static int create(struct crypto_template *tmpl, struct rtattr **tb)
+static int xts_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
        struct skcipher_instance *inst;
-       struct crypto_attr_type *algt;
        struct xts_instance_ctx *ctx;
        struct skcipher_alg *alg;
        const char *cipher_name;
        u32 mask;
        int err;
 
-       algt = crypto_get_attr_type(tb);
-       if (IS_ERR(algt))
-               return PTR_ERR(algt);
-
-       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
-               return -EINVAL;
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+       if (err)
+               return err;
 
        cipher_name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(cipher_name))
@@ -355,10 +358,6 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 
        ctx = skcipher_instance_ctx(inst);
 
-       mask = crypto_requires_off(algt->type, algt->mask,
-                                  CRYPTO_ALG_NEED_FALLBACK |
-                                  CRYPTO_ALG_ASYNC);
-
        err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst),
                                   cipher_name, 0, mask);
        if (err == -ENOENT) {
@@ -415,7 +414,6 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        } else
                goto err_free_inst;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
@@ -425,43 +423,43 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2;
        inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2;
 
-       inst->alg.base.cra_ctxsize = sizeof(struct priv);
+       inst->alg.base.cra_ctxsize = sizeof(struct xts_tfm_ctx);
 
-       inst->alg.init = init_tfm;
-       inst->alg.exit = exit_tfm;
+       inst->alg.init = xts_init_tfm;
+       inst->alg.exit = xts_exit_tfm;
 
-       inst->alg.setkey = setkey;
-       inst->alg.encrypt = encrypt;
-       inst->alg.decrypt = decrypt;
+       inst->alg.setkey = xts_setkey;
+       inst->alg.encrypt = xts_encrypt;
+       inst->alg.decrypt = xts_decrypt;
 
-       inst->free = crypto_xts_free;
+       inst->free = xts_free_instance;
 
        err = skcipher_register_instance(tmpl, inst);
        if (err) {
 err_free_inst:
-               crypto_xts_free(inst);
+               xts_free_instance(inst);
        }
        return err;
 }
 
-static struct crypto_template crypto_tmpl = {
+static struct crypto_template xts_tmpl = {
        .name = "xts",
-       .create = create,
+       .create = xts_create,
        .module = THIS_MODULE,
 };
 
-static int __init crypto_module_init(void)
+static int __init xts_module_init(void)
 {
-       return crypto_register_template(&crypto_tmpl);
+       return crypto_register_template(&xts_tmpl);
 }
 
-static void __exit crypto_module_exit(void)
+static void __exit xts_module_exit(void)
 {
-       crypto_unregister_template(&crypto_tmpl);
+       crypto_unregister_template(&xts_tmpl);
 }
 
-subsys_initcall(crypto_module_init);
-module_exit(crypto_module_exit);
+subsys_initcall(xts_module_init);
+module_exit(xts_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("XTS block cipher mode");
index 28a6b38..ec782e4 100644 (file)
@@ -264,15 +264,31 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
 
        if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) {
                struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-               struct acpi_device *adev = to_acpi_device_node(dev->fwnode);
+               struct acpi_device *adev;
                struct acpi_iort_named_component *ncomp;
+               struct device *nc_dev = dev;
+
+               /*
+                * Walk the device tree to find a device with an
+                * ACPI companion; there is no point in scanning
+                * IORT for a device matching a named component if
+                * the device does not have an ACPI companion to
+                * start with.
+                */
+               do {
+                       adev = ACPI_COMPANION(nc_dev);
+                       if (adev)
+                               break;
+
+                       nc_dev = nc_dev->parent;
+               } while (nc_dev);
 
                if (!adev)
                        goto out;
 
                status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf);
                if (ACPI_FAILURE(status)) {
-                       dev_warn(dev, "Can't get device full path name\n");
+                       dev_warn(nc_dev, "Can't get device full path name\n");
                        goto out;
                }
 
@@ -534,7 +550,6 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
                node = iort_get_iort_node(dev->fwnode);
                if (node)
                        return node;
-
                /*
                 * if not, then it should be a platform device defined in
                 * DSDT/SSDT (with Named Component node in IORT)
@@ -543,32 +558,29 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
                                      iort_match_node_callback, dev);
        }
 
-       /* Find a PCI root bus */
        pbus = to_pci_dev(dev)->bus;
-       while (!pci_is_root_bus(pbus))
-               pbus = pbus->parent;
 
        return iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
                              iort_match_node_callback, &pbus->dev);
 }
 
 /**
- * iort_msi_map_rid() - Map a MSI requester ID for a device
+ * iort_msi_map_id() - Map a MSI input ID for a device
  * @dev: The device for which the mapping is to be done.
- * @req_id: The device requester ID.
+ * @input_id: The device input ID.
  *
- * Returns: mapped MSI RID on success, input requester ID otherwise
+ * Returns: mapped MSI ID on success, input ID otherwise
  */
-u32 iort_msi_map_rid(struct device *dev, u32 req_id)
+u32 iort_msi_map_id(struct device *dev, u32 input_id)
 {
        struct acpi_iort_node *node;
        u32 dev_id;
 
        node = iort_find_dev_node(dev);
        if (!node)
-               return req_id;
+               return input_id;
 
-       iort_node_map_id(node, req_id, &dev_id, IORT_MSI_TYPE);
+       iort_node_map_id(node, input_id, &dev_id, IORT_MSI_TYPE);
        return dev_id;
 }
 
@@ -625,13 +637,13 @@ static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base)
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
- * @req_id: Device's requester ID
+ * @id: Device's ID
  * @idx: Index of the ITS identifier list.
  * @its_id: ITS identifier.
  *
  * Returns: 0 on success, appropriate error value otherwise
  */
-static int iort_dev_find_its_id(struct device *dev, u32 req_id,
+static int iort_dev_find_its_id(struct device *dev, u32 id,
                                unsigned int idx, int *its_id)
 {
        struct acpi_iort_its_group *its;
@@ -641,7 +653,7 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id,
        if (!node)
                return -ENXIO;
 
-       node = iort_node_map_id(node, req_id, NULL, IORT_MSI_TYPE);
+       node = iort_node_map_id(node, id, NULL, IORT_MSI_TYPE);
        if (!node)
                return -ENXIO;
 
@@ -664,19 +676,20 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id,
  *
  * Returns: the MSI domain for this device, NULL otherwise
  */
-struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
+                                         enum irq_domain_bus_token bus_token)
 {
        struct fwnode_handle *handle;
        int its_id;
 
-       if (iort_dev_find_its_id(dev, req_id, 0, &its_id))
+       if (iort_dev_find_its_id(dev, id, 0, &its_id))
                return NULL;
 
        handle = iort_find_domain_token(its_id);
        if (!handle)
                return NULL;
 
-       return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
+       return irq_find_matching_fwnode(handle, bus_token);
 }
 
 static void iort_set_device_domain(struct device *dev,
@@ -965,19 +978,54 @@ static void iort_named_component_init(struct device *dev,
                                           nc->node_flags);
 }
 
+static int iort_nc_iommu_map(struct device *dev, struct acpi_iort_node *node)
+{
+       struct acpi_iort_node *parent;
+       int err = -ENODEV, i = 0;
+       u32 streamid = 0;
+
+       do {
+
+               parent = iort_node_map_platform_id(node, &streamid,
+                                                  IORT_IOMMU_TYPE,
+                                                  i++);
+
+               if (parent)
+                       err = iort_iommu_xlate(dev, parent, streamid);
+       } while (parent && !err);
+
+       return err;
+}
+
+static int iort_nc_iommu_map_id(struct device *dev,
+                               struct acpi_iort_node *node,
+                               const u32 *in_id)
+{
+       struct acpi_iort_node *parent;
+       u32 streamid;
+
+       parent = iort_node_map_id(node, *in_id, &streamid, IORT_IOMMU_TYPE);
+       if (parent)
+               return iort_iommu_xlate(dev, parent, streamid);
+
+       return -ENODEV;
+}
+
+
 /**
- * iort_iommu_configure - Set-up IOMMU configuration for a device.
+ * iort_iommu_configure_id - Set-up IOMMU configuration for a device.
  *
  * @dev: device to configure
+ * @id_in: optional input id const value pointer
  *
  * Returns: iommu_ops pointer on configuration success
  *          NULL on configuration failure
  */
-const struct iommu_ops *iort_iommu_configure(struct device *dev)
+const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
+                                               const u32 *id_in)
 {
-       struct acpi_iort_node *node, *parent;
+       struct acpi_iort_node *node;
        const struct iommu_ops *ops;
-       u32 streamid = 0;
        int err = -ENODEV;
 
        /*
@@ -1006,21 +1054,13 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
                if (fwspec && iort_pci_rc_supports_ats(node))
                        fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS;
        } else {
-               int i = 0;
-
                node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
                                      iort_match_node_callback, dev);
                if (!node)
                        return NULL;
 
-               do {
-                       parent = iort_node_map_platform_id(node, &streamid,
-                                                          IORT_IOMMU_TYPE,
-                                                          i++);
-
-                       if (parent)
-                               err = iort_iommu_xlate(dev, parent, streamid);
-               } while (parent && !err);
+               err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) :
+                             iort_nc_iommu_map(dev, node);
 
                if (!err)
                        iort_named_component_init(dev, node);
@@ -1045,6 +1085,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 
        return ops;
 }
+
 #else
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
 { return NULL; }
@@ -1053,7 +1094,8 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
 { return 0; }
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
-const struct iommu_ops *iort_iommu_configure(struct device *dev)
+const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
+                                               const u32 *input_id)
 { return NULL; }
 #endif
 
index 8777fac..2142f15 100644 (file)
@@ -1457,8 +1457,10 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
  * acpi_dma_configure - Set-up DMA configuration for the device.
  * @dev: The pointer to the device
  * @attr: device dma attributes
+ * @input_id: input device id const value pointer
  */
-int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
+int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
+                         const u32 *input_id)
 {
        const struct iommu_ops *iommu;
        u64 dma_addr = 0, size = 0;
@@ -1470,7 +1472,7 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 
        iort_dma_setup(dev, &dma_addr, &size);
 
-       iommu = iort_iommu_configure(dev);
+       iommu = iort_iommu_configure_id(dev, input_id);
        if (PTR_ERR(iommu) == -EPROBE_DEFER)
                return -EPROBE_DEFER;
 
@@ -1479,7 +1481,7 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(acpi_dma_configure);
+EXPORT_SYMBOL_GPL(acpi_dma_configure_id);
 
 static void acpi_init_coherency(struct acpi_device *adev)
 {
index d9fd702..7f814da 100644 (file)
@@ -433,9 +433,15 @@ static int atmtcp_remove_persistent(int itf)
                return -EMEDIUMTYPE;
        }
        dev_data = PRIV(dev);
-       if (!dev_data->persist) return 0;
+       if (!dev_data->persist) {
+               atm_dev_put(dev);
+               return 0;
+       }
        dev_data->persist = 0;
-       if (PRIV(dev)->vcc) return 0;
+       if (PRIV(dev)->vcc) {
+               atm_dev_put(dev);
+               return 0;
+       }
        kfree(dev_data);
        atm_dev_put(dev);
        atm_dev_deregister(dev);
index 4d0a003..75f72d6 100644 (file)
@@ -54,6 +54,17 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
        per_cpu(cpu_scale, cpu) = capacity;
 }
 
+DEFINE_PER_CPU(unsigned long, thermal_pressure);
+
+void topology_set_thermal_pressure(const struct cpumask *cpus,
+                              unsigned long th_pressure)
+{
+       int cpu;
+
+       for_each_cpu(cpu, cpus)
+               WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
+}
+
 static ssize_t cpu_capacity_show(struct device *dev,
                                 struct device_attribute *attr,
                                 char *buf)
index 2fb25c3..2723a70 100644 (file)
@@ -282,7 +282,7 @@ out:
        return err;
 }
 
-static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t brd_submit_bio(struct bio *bio)
 {
        struct brd_device *brd = bio->bi_disk->private_data;
        struct bio_vec bvec;
@@ -330,6 +330,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 
 static const struct block_device_operations brd_fops = {
        .owner =                THIS_MODULE,
+       .submit_bio =           brd_submit_bio,
        .rw_page =              brd_rw_page,
 };
 
@@ -381,7 +382,7 @@ static struct brd_device *brd_alloc(int i)
        spin_lock_init(&brd->brd_lock);
        INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
 
-       brd->brd_queue = blk_alloc_queue(brd_make_request, NUMA_NO_NODE);
+       brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!brd->brd_queue)
                goto out_free_dev;
 
index 33d0831..fe6cb99 100644 (file)
@@ -1451,7 +1451,7 @@ extern void conn_free_crypto(struct drbd_connection *connection);
 /* drbd_req */
 extern void do_submit(struct work_struct *ws);
 extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
-extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio);
+extern blk_qc_t drbd_submit_bio(struct bio *bio);
 extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
 extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
@@ -1576,12 +1576,12 @@ void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
 /*
  * used to submit our private bio
  */
-static inline void drbd_generic_make_request(struct drbd_device *device,
+static inline void drbd_submit_bio_noacct(struct drbd_device *device,
                                             int fault_type, struct bio *bio)
 {
        __release(local);
        if (!bio->bi_disk) {
-               drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n");
+               drbd_err(device, "drbd_submit_bio_noacct: bio->bi_disk == NULL\n");
                bio->bi_status = BLK_STS_IOERR;
                bio_endio(bio);
                return;
@@ -1590,7 +1590,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
        if (drbd_insert_fault(device, fault_type))
                bio_io_error(bio);
        else
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 }
 
 void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
index 45fbd52..cb687cc 100644 (file)
@@ -132,9 +132,10 @@ wait_queue_head_t drbd_pp_wait;
 DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
 
 static const struct block_device_operations drbd_ops = {
-       .owner =   THIS_MODULE,
-       .open =    drbd_open,
-       .release = drbd_release,
+       .owner          = THIS_MODULE,
+       .submit_bio     = drbd_submit_bio,
+       .open           = drbd_open,
+       .release        = drbd_release,
 };
 
 struct bio *bio_alloc_drbd(gfp_t gfp_mask)
@@ -2324,7 +2325,7 @@ static void do_retry(struct work_struct *ws)
                 * workqueues instead.
                 */
 
-               /* We are not just doing generic_make_request(),
+               /* We are not just doing submit_bio_noacct(),
                 * as we want to keep the start_time information. */
                inc_ap_bio(device);
                __drbd_make_request(device, bio, start_jif);
@@ -2414,62 +2415,6 @@ static void drbd_cleanup(void)
        pr_info("module cleanup done.\n");
 }
 
-/**
- * drbd_congested() - Callback for the flusher thread
- * @congested_data:    User data
- * @bdi_bits:          Bits the BDI flusher thread is currently interested in
- *
- * Returns 1<<WB_async_congested and/or 1<<WB_sync_congested if we are congested.
- */
-static int drbd_congested(void *congested_data, int bdi_bits)
-{
-       struct drbd_device *device = congested_data;
-       struct request_queue *q;
-       char reason = '-';
-       int r = 0;
-
-       if (!may_inc_ap_bio(device)) {
-               /* DRBD has frozen IO */
-               r = bdi_bits;
-               reason = 'd';
-               goto out;
-       }
-
-       if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) {
-               r |= (1 << WB_async_congested);
-               /* Without good local data, we would need to read from remote,
-                * and that would need the worker thread as well, which is
-                * currently blocked waiting for that usermode helper to
-                * finish.
-                */
-               if (!get_ldev_if_state(device, D_UP_TO_DATE))
-                       r |= (1 << WB_sync_congested);
-               else
-                       put_ldev(device);
-               r &= bdi_bits;
-               reason = 'c';
-               goto out;
-       }
-
-       if (get_ldev(device)) {
-               q = bdev_get_queue(device->ldev->backing_bdev);
-               r = bdi_congested(q->backing_dev_info, bdi_bits);
-               put_ldev(device);
-               if (r)
-                       reason = 'b';
-       }
-
-       if (bdi_bits & (1 << WB_async_congested) &&
-           test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) {
-               r |= (1 << WB_async_congested);
-               reason = reason == 'b' ? 'a' : 'n';
-       }
-
-out:
-       device->congestion_reason = reason;
-       return r;
-}
-
 static void drbd_init_workqueue(struct drbd_work_queue* wq)
 {
        spin_lock_init(&wq->q_lock);
@@ -2801,11 +2746,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 
        drbd_init_set_defaults(device);
 
-       q = blk_alloc_queue(drbd_make_request, NUMA_NO_NODE);
+       q = blk_alloc_queue(NUMA_NO_NODE);
        if (!q)
                goto out_no_q;
        device->rq_queue = q;
-       q->queuedata   = device;
 
        disk = alloc_disk(1);
        if (!disk)
@@ -2825,9 +2769,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
        /* we have no partitions. we contain only ourselves. */
        device->this_bdev->bd_contains = device->this_bdev;
 
-       q->backing_dev_info->congested_fn = drbd_congested;
-       q->backing_dev_info->congested_data = device;
-
        blk_queue_write_cache(q, true, true);
        /* Setting the max_hw_sectors to an odd value of 8kibyte here
           This triggers a max_bio_size message upon first attach or connect */
index 1c41cd9..3c0193d 100644 (file)
@@ -265,7 +265,6 @@ int drbd_seq_show(struct seq_file *seq, void *v)
                        seq_printf(seq, "%2d: cs:Unconfigured\n", i);
                } else {
                        /* reset device->congestion_reason */
-                       bdi_rw_congested(device->rq_queue->backing_dev_info);
 
                        nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
                        wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
index 3a3f2b6..c74f561 100644 (file)
@@ -1723,7 +1723,7 @@ next_bio:
                bios = bios->bi_next;
                bio->bi_next = NULL;
 
-               drbd_generic_make_request(device, fault_type, bio);
+               drbd_submit_bio_noacct(device, fault_type, bio);
        } while (bios);
        return 0;
 
index c80a2f1..674be09 100644 (file)
@@ -1164,7 +1164,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
                else if (bio_op(bio) == REQ_OP_DISCARD)
                        drbd_process_discard_or_zeroes_req(req, EE_TRIM);
                else
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                put_ldev(device);
        } else
                bio_io_error(bio);
@@ -1593,12 +1593,12 @@ void do_submit(struct work_struct *ws)
        }
 }
 
-blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t drbd_submit_bio(struct bio *bio)
 {
-       struct drbd_device *device = (struct drbd_device *) q->queuedata;
+       struct drbd_device *device = bio->bi_disk->private_data;
        unsigned long start_jif;
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        start_jif = jiffies;
 
index 2b89c9f..7c903de 100644 (file)
@@ -1525,7 +1525,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
 
        drbd_req_make_private_bio(req, req->master_bio);
        bio_set_dev(req->private_bio, device->ldev->backing_bdev);
-       generic_make_request(req->private_bio);
+       submit_bio_noacct(req->private_bio);
 
        return 0;
 }
index 3e9db22..09079ae 100644 (file)
@@ -4205,7 +4205,6 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
        struct bio_vec bio_vec;
        struct page *page;
        struct rb0_cbdata cbdata;
-       size_t size;
 
        page = alloc_page(GFP_NOIO);
        if (!page) {
@@ -4213,15 +4212,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
                return -ENOMEM;
        }
 
-       size = bdev->bd_block_size;
-       if (!size)
-               size = 1024;
-
        cbdata.drive = drive;
 
        bio_init(&bio, &bio_vec, 1);
        bio_set_dev(&bio, bdev);
-       bio_add_page(&bio, page, size, 0);
+       bio_add_page(&bio, page, block_size(bdev), 0);
 
        bio.bi_iter.bi_sector = 0;
        bio.bi_flags |= (1 << BIO_QUIET);
index 475e1a7..d181601 100644 (file)
@@ -509,7 +509,8 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
                return;
        kfree(cmd->bvec);
        cmd->bvec = NULL;
-       blk_mq_complete_request(rq);
+       if (likely(!blk_should_fake_timeout(rq->q)))
+               blk_mq_complete_request(rq);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -1089,11 +1090,10 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
         * here to avoid changing device under exclusive owner.
         */
        if (!(mode & FMODE_EXCL)) {
-               claimed_bdev = bd_start_claiming(bdev, loop_configure);
-               if (IS_ERR(claimed_bdev)) {
-                       error = PTR_ERR(claimed_bdev);
+               claimed_bdev = bdev->bd_contains;
+               error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure);
+               if (error)
                        goto out_putf;
-               }
        }
 
        error = mutex_lock_killable(&loop_ctl_mutex);
@@ -2048,7 +2048,8 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
                        cmd->ret = ret;
                else
                        cmd->ret = ret ? -EIO : 0;
-               blk_mq_complete_request(rq);
+               if (likely(!blk_should_fake_timeout(rq->q)))
+                       blk_mq_complete_request(rq);
        }
 }
 
@@ -2402,6 +2403,8 @@ static void __exit loop_exit(void)
 
        range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
 
+       mutex_lock(&loop_ctl_mutex);
+
        idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
        idr_destroy(&loop_index_idr);
 
@@ -2409,6 +2412,8 @@ static void __exit loop_exit(void)
        unregister_blkdev(LOOP_MAJOR, "loop");
 
        misc_deregister(&loop_misc);
+
+       mutex_unlock(&loop_ctl_mutex);
 }
 
 module_init(loop_init);
index f6bafa9..153e2cd 100644 (file)
@@ -492,7 +492,8 @@ static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
        struct request *req = blk_mq_rq_from_pdu(cmd);
 
        cmd->status = status;
-       blk_mq_complete_request(req);
+       if (likely(!blk_should_fake_timeout(req->q)))
+               blk_mq_complete_request(req);
 }
 
 /*
index ce7e9f2..3ff4054 100644 (file)
@@ -784,6 +784,7 @@ static void recv_work(struct work_struct *work)
        struct nbd_device *nbd = args->nbd;
        struct nbd_config *config = nbd->config;
        struct nbd_cmd *cmd;
+       struct request *rq;
 
        while (1) {
                cmd = nbd_read_stat(nbd, args->index);
@@ -796,7 +797,9 @@ static void recv_work(struct work_struct *work)
                        break;
                }
 
-               blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
+               rq = blk_mq_rq_from_pdu(cmd);
+               if (likely(!blk_should_fake_timeout(rq->q)))
+                       blk_mq_complete_request(rq);
        }
        atomic_dec(&config->recv_threads);
        wake_up(&config->recv_wq);
index 87b31f9..907c685 100644 (file)
@@ -1283,7 +1283,8 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
        case NULL_IRQ_SOFTIRQ:
                switch (cmd->nq->dev->queue_mode) {
                case NULL_Q_MQ:
-                       blk_mq_complete_request(cmd->rq);
+                       if (likely(!blk_should_fake_timeout(cmd->rq->q)))
+                               blk_mq_complete_request(cmd->rq);
                        break;
                case NULL_Q_BIO:
                        /*
@@ -1387,11 +1388,11 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
        return &nullb->queues[index];
 }
 
-static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t null_submit_bio(struct bio *bio)
 {
        sector_t sector = bio->bi_iter.bi_sector;
        sector_t nr_sectors = bio_sectors(bio);
-       struct nullb *nullb = q->queuedata;
+       struct nullb *nullb = bio->bi_disk->private_data;
        struct nullb_queue *nq = nullb_to_queue(nullb);
        struct nullb_cmd *cmd;
 
@@ -1423,7 +1424,7 @@ static bool should_requeue_request(struct request *rq)
 static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
 {
        pr_info("rq %p timed out\n", rq);
-       blk_mq_force_complete_rq(rq);
+       blk_mq_complete_request(rq);
        return BLK_EH_DONE;
 }
 
@@ -1574,7 +1575,13 @@ static void null_config_discard(struct nullb *nullb)
        blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
 }
 
-static const struct block_device_operations null_ops = {
+static const struct block_device_operations null_bio_ops = {
+       .owner          = THIS_MODULE,
+       .submit_bio     = null_submit_bio,
+       .report_zones   = null_report_zones,
+};
+
+static const struct block_device_operations null_rq_ops = {
        .owner          = THIS_MODULE,
        .report_zones   = null_report_zones,
 };
@@ -1646,7 +1653,10 @@ static int null_gendisk_register(struct nullb *nullb)
        disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
        disk->major             = null_major;
        disk->first_minor       = nullb->index;
-       disk->fops              = &null_ops;
+       if (queue_is_mq(nullb->q))
+               disk->fops              = &null_rq_ops;
+       else
+               disk->fops              = &null_bio_ops;
        disk->private_data      = nullb;
        disk->queue             = nullb->q;
        strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
@@ -1791,7 +1801,7 @@ static int null_add_dev(struct nullb_device *dev)
                        goto out_cleanup_tags;
                }
        } else if (dev->queue_mode == NULL_Q_BIO) {
-               nullb->q = blk_alloc_queue(null_queue_bio, dev->home_node);
+               nullb->q = blk_alloc_queue(dev->home_node);
                if (!nullb->q) {
                        rv = -ENOMEM;
                        goto out_cleanup_queues;
index 27a33ad..4becc1e 100644 (file)
@@ -36,7 +36,7 @@
  * block device, assembling the pieces to full packets and queuing them to the
  * packet I/O scheduler.
  *
- * At the top layer there is a custom make_request_fn function that forwards
+ * At the top layer there is a custom ->submit_bio function that forwards
  * read requests directly to the iosched queue and puts write requests in the
  * unaligned write queue. A kernel thread performs the necessary read
  * gathering to convert the unaligned writes to aligned writes and then feeds
@@ -913,7 +913,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
                }
 
                atomic_inc(&pd->cdrw.pending_bios);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        }
 }
 
@@ -2428,15 +2428,15 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
        }
 }
 
-static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pkt_submit_bio(struct bio *bio)
 {
        struct pktcdvd_device *pd;
        char b[BDEVNAME_SIZE];
        struct bio *split;
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
-       pd = q->queuedata;
+       pd = bio->bi_disk->queue->queuedata;
        if (!pd) {
                pr_err("%s incorrect request queue\n", bio_devname(bio, b));
                goto end_io;
@@ -2480,7 +2480,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
                        split = bio;
                }
 
-               pkt_make_request_write(q, split);
+               pkt_make_request_write(bio->bi_disk->queue, split);
        } while (split != bio);
 
        return BLK_QC_T_NONE;
@@ -2685,6 +2685,7 @@ static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
 
 static const struct block_device_operations pktcdvd_ops = {
        .owner =                THIS_MODULE,
+       .submit_bio =           pkt_submit_bio,
        .open =                 pkt_open,
        .release =              pkt_close,
        .ioctl =                pkt_ioctl,
@@ -2749,7 +2750,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
        disk->flags = GENHD_FL_REMOVABLE;
        strcpy(disk->disk_name, pd->name);
        disk->private_data = pd;
-       disk->queue = blk_alloc_queue(pkt_make_request, NUMA_NO_NODE);
+       disk->queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!disk->queue)
                goto out_mem2;
 
index 821d4d8..1088798 100644 (file)
@@ -90,12 +90,6 @@ struct ps3vram_priv {
 
 static int ps3vram_major;
 
-
-static const struct block_device_operations ps3vram_fops = {
-       .owner          = THIS_MODULE,
-};
-
-
 #define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
 #define DMA_NOTIFIER_OFFSET_BASE 0x1000     /* first DMA notifier offset */
 #define DMA_NOTIFIER_SIZE        0x40
@@ -585,15 +579,15 @@ out:
        return next;
 }
 
-static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t ps3vram_submit_bio(struct bio *bio)
 {
-       struct ps3_system_bus_device *dev = q->queuedata;
+       struct ps3_system_bus_device *dev = bio->bi_disk->private_data;
        struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
        int busy;
 
        dev_dbg(&dev->core, "%s\n", __func__);
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        spin_lock_irq(&priv->lock);
        busy = !bio_list_empty(&priv->list);
@@ -610,6 +604,11 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
        return BLK_QC_T_NONE;
 }
 
+static const struct block_device_operations ps3vram_fops = {
+       .owner          = THIS_MODULE,
+       .submit_bio     = ps3vram_submit_bio,
+};
+
 static int ps3vram_probe(struct ps3_system_bus_device *dev)
 {
        struct ps3vram_priv *priv;
@@ -737,7 +736,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 
        ps3vram_proc_init(dev);
 
-       queue = blk_alloc_queue(ps3vram_make_request, NUMA_NO_NODE);
+       queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!queue) {
                dev_err(&dev->core, "blk_alloc_queue failed\n");
                error = -ENOMEM;
@@ -745,7 +744,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
        }
 
        priv->queue = queue;
-       queue->queuedata = dev;
        blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
        blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
        blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
index 3ba07ab..edaceff 100644 (file)
@@ -50,6 +50,8 @@ struct rsxx_bio_meta {
 
 static struct kmem_cache *bio_meta_pool;
 
+static blk_qc_t rsxx_submit_bio(struct bio *bio);
+
 /*----------------- Block Device Operations -----------------*/
 static int rsxx_blkdev_ioctl(struct block_device *bdev,
                                 fmode_t mode,
@@ -92,6 +94,7 @@ static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 
 static const struct block_device_operations rsxx_fops = {
        .owner          = THIS_MODULE,
+       .submit_bio     = rsxx_submit_bio,
        .getgeo         = rsxx_getgeo,
        .ioctl          = rsxx_blkdev_ioctl,
 };
@@ -117,13 +120,13 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
        }
 }
 
-static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t rsxx_submit_bio(struct bio *bio)
 {
-       struct rsxx_cardinfo *card = q->queuedata;
+       struct rsxx_cardinfo *card = bio->bi_disk->private_data;
        struct rsxx_bio_meta *bio_meta;
        blk_status_t st = BLK_STS_IOERR;
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        might_sleep();
 
@@ -233,7 +236,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
                return -ENOMEM;
        }
 
-       card->queue = blk_alloc_queue(rsxx_make_request, NUMA_NO_NODE);
+       card->queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!card->queue) {
                dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
                unregister_blkdev(card->major, DRIVER_NAME);
@@ -267,8 +270,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
                card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
        }
 
-       card->queue->queuedata = card;
-
        snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
                 "rsxx%d", card->disk_id);
        card->gendisk->major = card->major;
@@ -289,7 +290,6 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
        card->gendisk = NULL;
 
        blk_cleanup_queue(card->queue);
-       card->queue->queuedata = NULL;
        unregister_blkdev(card->major, DRIVER_NAME);
 }
 
index 51569c1..3a476dc 100644 (file)
@@ -1417,7 +1417,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
        case SKD_CHECK_STATUS_REPORT_GOOD:
        case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
                skreq->status = BLK_STS_OK;
-               blk_mq_complete_request(req);
+               if (likely(!blk_should_fake_timeout(req->q)))
+                       blk_mq_complete_request(req);
                break;
 
        case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -1440,7 +1441,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
        case SKD_CHECK_STATUS_REPORT_ERROR:
        default:
                skreq->status = BLK_STS_IOERR;
-               blk_mq_complete_request(req);
+               if (likely(!blk_should_fake_timeout(req->q)))
+                       blk_mq_complete_request(req);
                break;
        }
 }
@@ -1560,7 +1562,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
                 */
                if (likely(cmp_status == SAM_STAT_GOOD)) {
                        skreq->status = BLK_STS_OK;
-                       blk_mq_complete_request(rq);
+                       if (likely(!blk_should_fake_timeout(rq->q)))
+                               blk_mq_complete_request(rq);
                } else {
                        skd_resolve_req_exception(skdev, skreq, rq);
                }
index 1e2aa5a..2b95d7b 100644 (file)
@@ -519,14 +519,15 @@ static int mm_check_plugged(struct cardinfo *card)
        return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
 }
 
-static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t mm_submit_bio(struct bio *bio)
 {
-       struct cardinfo *card = q->queuedata;
+       struct cardinfo *card = bio->bi_disk->private_data;
+
        pr_debug("mm_make_request %llu %u\n",
                 (unsigned long long)bio->bi_iter.bi_sector,
                 bio->bi_iter.bi_size);
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        spin_lock_irq(&card->lock);
        *card->biotail = bio;
@@ -778,6 +779,7 @@ static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 
 static const struct block_device_operations mm_fops = {
        .owner          = THIS_MODULE,
+       .submit_bio     = mm_submit_bio,
        .getgeo         = mm_getgeo,
        .revalidate_disk = mm_revalidate,
 };
@@ -885,10 +887,9 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
        card->biotail = &card->bio;
        spin_lock_init(&card->lock);
 
-       card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE);
+       card->queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!card->queue)
                goto failed_alloc;
-       card->queue->queuedata = card;
 
        tasklet_init(&card->tasklet, process_page, (unsigned long)card);
 
index 980df85..63b213e 100644 (file)
@@ -171,7 +171,8 @@ static void virtblk_done(struct virtqueue *vq)
                while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
                        struct request *req = blk_mq_rq_from_pdu(vbr);
 
-                       blk_mq_complete_request(req);
+                       if (likely(!blk_should_fake_timeout(req->q)))
+                               blk_mq_complete_request(req);
                        req_done = true;
                }
                if (unlikely(virtqueue_is_broken(vq)))
index 3b889ea..3bb3dd8 100644 (file)
@@ -1655,7 +1655,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                        BUG();
                }
 
-               blk_mq_complete_request(req);
+               if (likely(!blk_should_fake_timeout(req->q)))
+                       blk_mq_complete_request(req);
        }
 
        rinfo->ring.rsp_cons = i;
index 270dd81..9100ac3 100644 (file)
@@ -793,9 +793,9 @@ static void zram_sync_read(struct work_struct *work)
 }
 
 /*
- * Block layer want one ->make_request_fn to be active at a time
- * so if we use chained IO with parent IO in same context,
- * it's a deadlock. To avoid, it, it uses worker thread context.
+ * Block layer want one ->submit_bio to be active at a time, so if we use
+ * chained IO with parent IO in same context, it's a deadlock. To avoid that,
+ * use a worker thread context.
  */
 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
                                unsigned long entry, struct bio *bio)
@@ -1584,9 +1584,9 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 /*
  * Handler function for all zram I/O requests.
  */
-static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t zram_submit_bio(struct bio *bio)
 {
-       struct zram *zram = queue->queuedata;
+       struct zram *zram = bio->bi_disk->private_data;
 
        if (!valid_io_request(zram, bio->bi_iter.bi_sector,
                                        bio->bi_iter.bi_size)) {
@@ -1813,6 +1813,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
 
 static const struct block_device_operations zram_devops = {
        .open = zram_open,
+       .submit_bio = zram_submit_bio,
        .swap_slot_free_notify = zram_slot_free_notify,
        .rw_page = zram_rw_page,
        .owner = THIS_MODULE
@@ -1891,7 +1892,7 @@ static int zram_add(void)
 #ifdef CONFIG_ZRAM_WRITEBACK
        spin_lock_init(&zram->wb_limit_lock);
 #endif
-       queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE);
+       queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!queue) {
                pr_err("Error allocating disk queue for device %d\n",
                        device_id);
@@ -1912,7 +1913,6 @@ static int zram_add(void)
        zram->disk->first_minor = device_id;
        zram->disk->fops = &zram_devops;
        zram->disk->queue = queue;
-       zram->disk->queue->queuedata = zram;
        zram->disk->private_data = zram;
        snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
 
index c8b1c38..189bff2 100644 (file)
@@ -592,6 +592,7 @@ static int dprc_probe(struct fsl_mc_device *mc_dev)
        bool mc_io_created = false;
        bool msi_domain_set = false;
        u16 major_ver, minor_ver;
+       struct irq_domain *mc_msi_domain;
 
        if (!is_fsl_mc_bus_dprc(mc_dev))
                return -EINVAL;
@@ -621,31 +622,15 @@ static int dprc_probe(struct fsl_mc_device *mc_dev)
                        return error;
 
                mc_io_created = true;
+       }
 
-               /*
-                * Inherit parent MSI domain:
-                */
-               dev_set_msi_domain(&mc_dev->dev,
-                                  dev_get_msi_domain(parent_dev));
-               msi_domain_set = true;
+       mc_msi_domain = fsl_mc_find_msi_domain(&mc_dev->dev);
+       if (!mc_msi_domain) {
+               dev_warn(&mc_dev->dev,
+                        "WARNING: MC bus without interrupt support\n");
        } else {
-               /*
-                * This is a root DPRC
-                */
-               struct irq_domain *mc_msi_domain;
-
-               if (dev_is_fsl_mc(parent_dev))
-                       return -EINVAL;
-
-               error = fsl_mc_find_msi_domain(parent_dev,
-                                              &mc_msi_domain);
-               if (error < 0) {
-                       dev_warn(&mc_dev->dev,
-                                "WARNING: MC bus without interrupt support\n");
-               } else {
-                       dev_set_msi_domain(&mc_dev->dev, mc_msi_domain);
-                       msi_domain_set = true;
-               }
+               dev_set_msi_domain(&mc_dev->dev, mc_msi_domain);
+               msi_domain_set = true;
        }
 
        error = dprc_open(mc_dev->mc_io, 0, mc_dev->obj_desc.id,
index 40526da..324d49d 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/bitops.h>
 #include <linux/msi.h>
 #include <linux/dma-mapping.h>
+#include <linux/acpi.h>
+#include <linux/iommu.h>
 
 #include "fsl-mc-private.h"
 
@@ -38,6 +40,7 @@ struct fsl_mc {
        struct fsl_mc_device *root_mc_bus_dev;
        u8 num_translation_ranges;
        struct fsl_mc_addr_translation_range *translation_ranges;
+       void *fsl_mc_regs;
 };
 
 /**
@@ -56,6 +59,10 @@ struct fsl_mc_addr_translation_range {
        phys_addr_t start_phys_addr;
 };
 
+#define FSL_MC_FAPR    0x28
+#define MC_FAPR_PL     BIT(18)
+#define MC_FAPR_BMT    BIT(17)
+
 /**
  * fsl_mc_bus_match - device to driver matching callback
  * @dev: the fsl-mc device to match against
@@ -118,11 +125,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 static int fsl_mc_dma_configure(struct device *dev)
 {
        struct device *dma_dev = dev;
+       struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
+       u32 input_id = mc_dev->icid;
 
        while (dev_is_fsl_mc(dma_dev))
                dma_dev = dma_dev->parent;
 
-       return of_dma_configure(dev, dma_dev->of_node, 0);
+       if (dev_of_node(dma_dev))
+               return of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id);
+
+       return acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id);
 }
 
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
@@ -368,8 +380,8 @@ EXPORT_SYMBOL_GPL(fsl_mc_get_version);
 /**
  * fsl_mc_get_root_dprc - function to traverse to the root dprc
  */
-static void fsl_mc_get_root_dprc(struct device *dev,
-                                struct device **root_dprc_dev)
+void fsl_mc_get_root_dprc(struct device *dev,
+                        struct device **root_dprc_dev)
 {
        if (!dev) {
                *root_dprc_dev = NULL;
@@ -863,8 +875,11 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
        struct fsl_mc_io *mc_io = NULL;
        int container_id;
        phys_addr_t mc_portal_phys_addr;
-       u32 mc_portal_size;
-       struct resource res;
+       u32 mc_portal_size, mc_stream_id;
+       struct resource *plat_res;
+
+       if (!iommu_present(&fsl_mc_bus_type))
+               return -EPROBE_DEFER;
 
        mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL);
        if (!mc)
@@ -872,19 +887,33 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, mc);
 
+       plat_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       mc->fsl_mc_regs = devm_ioremap_resource(&pdev->dev, plat_res);
+       if (IS_ERR(mc->fsl_mc_regs))
+               return PTR_ERR(mc->fsl_mc_regs);
+
+       if (IS_ENABLED(CONFIG_ACPI) && !dev_of_node(&pdev->dev)) {
+               mc_stream_id = readl(mc->fsl_mc_regs + FSL_MC_FAPR);
+               /*
+                * HW ORs the PL and BMT bit, places the result in bit 15 of
+                * the StreamID and ORs in the ICID. Calculate it accordingly.
+                */
+               mc_stream_id = (mc_stream_id & 0xffff) |
+                               ((mc_stream_id & (MC_FAPR_PL | MC_FAPR_BMT)) ?
+                                       0x4000 : 0);
+               error = acpi_dma_configure_id(&pdev->dev, DEV_DMA_COHERENT,
+                                             &mc_stream_id);
+               if (error)
+                       dev_warn(&pdev->dev, "failed to configure dma: %d.\n",
+                                error);
+       }
+
        /*
         * Get physical address of MC portal for the root DPRC:
         */
-       error = of_address_to_resource(pdev->dev.of_node, 0, &res);
-       if (error < 0) {
-               dev_err(&pdev->dev,
-                       "of_address_to_resource() failed for %pOF\n",
-                       pdev->dev.of_node);
-               return error;
-       }
-
-       mc_portal_phys_addr = res.start;
-       mc_portal_size = resource_size(&res);
+       plat_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       mc_portal_phys_addr = plat_res->start;
+       mc_portal_size = resource_size(plat_res);
        error = fsl_create_mc_io(&pdev->dev, mc_portal_phys_addr,
                                 mc_portal_size, NULL,
                                 FSL_MC_IO_ATOMIC_CONTEXT_PORTAL, &mc_io);
@@ -901,11 +930,13 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
        dev_info(&pdev->dev, "MC firmware version: %u.%u.%u\n",
                 mc_version.major, mc_version.minor, mc_version.revision);
 
-       error = get_mc_addr_translation_ranges(&pdev->dev,
-                                              &mc->translation_ranges,
-                                              &mc->num_translation_ranges);
-       if (error < 0)
-               goto error_cleanup_mc_io;
+       if (dev_of_node(&pdev->dev)) {
+               error = get_mc_addr_translation_ranges(&pdev->dev,
+                                               &mc->translation_ranges,
+                                               &mc->num_translation_ranges);
+               if (error < 0)
+                       goto error_cleanup_mc_io;
+       }
 
        error = dprc_get_container_id(mc_io, 0, &container_id);
        if (error < 0) {
@@ -932,6 +963,7 @@ static int fsl_mc_bus_probe(struct platform_device *pdev)
                goto error_cleanup_mc_io;
 
        mc->root_mc_bus_dev = mc_bus_dev;
+       mc_bus_dev->dev.fwnode = pdev->dev.fwnode;
        return 0;
 
 error_cleanup_mc_io:
@@ -965,11 +997,18 @@ static const struct of_device_id fsl_mc_bus_match_table[] = {
 
 MODULE_DEVICE_TABLE(of, fsl_mc_bus_match_table);
 
+static const struct acpi_device_id fsl_mc_bus_acpi_match_table[] = {
+       {"NXP0008", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, fsl_mc_bus_acpi_match_table);
+
 static struct platform_driver fsl_mc_bus_driver = {
        .driver = {
                   .name = "fsl_mc_bus",
                   .pm = NULL,
                   .of_match_table = fsl_mc_bus_match_table,
+                  .acpi_match_table = fsl_mc_bus_acpi_match_table,
                   },
        .probe = fsl_mc_bus_probe,
        .remove = fsl_mc_bus_remove,
index 8b9c66d..8edadf0 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/msi.h>
+#include <linux/acpi_iort.h>
 
 #include "fsl-mc-private.h"
 
@@ -177,23 +178,36 @@ struct irq_domain *fsl_mc_msi_create_irq_domain(struct fwnode_handle *fwnode,
        return domain;
 }
 
-int fsl_mc_find_msi_domain(struct device *mc_platform_dev,
-                          struct irq_domain **mc_msi_domain)
+struct irq_domain *fsl_mc_find_msi_domain(struct device *dev)
 {
+       struct device *root_dprc_dev;
+       struct device *bus_dev;
        struct irq_domain *msi_domain;
-       struct device_node *mc_of_node = mc_platform_dev->of_node;
+       struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
 
-       msi_domain = of_msi_get_domain(mc_platform_dev, mc_of_node,
-                                      DOMAIN_BUS_FSL_MC_MSI);
-       if (!msi_domain) {
-               pr_err("Unable to find fsl-mc MSI domain for %pOF\n",
-                      mc_of_node);
+       fsl_mc_get_root_dprc(dev, &root_dprc_dev);
+       bus_dev = root_dprc_dev->parent;
+
+       if (bus_dev->of_node) {
+               msi_domain = of_msi_map_get_device_domain(dev,
+                                                 mc_dev->icid,
+                                                 DOMAIN_BUS_FSL_MC_MSI);
 
-               return -ENOENT;
+               /*
+                * if the msi-map property is missing assume that all the
+                * child containers inherit the domain from the parent
+                */
+               if (!msi_domain)
+
+                       msi_domain = of_msi_get_domain(bus_dev,
+                                               bus_dev->of_node,
+                                               DOMAIN_BUS_FSL_MC_MSI);
+       } else {
+               msi_domain = iort_get_device_domain(dev, mc_dev->icid,
+                                                   DOMAIN_BUS_FSL_MC_MSI);
        }
 
-       *mc_msi_domain = msi_domain;
-       return 0;
+       return msi_domain;
 }
 
 static void fsl_mc_msi_free_descs(struct device *dev)
index 21ca8c7..7a46a12 100644 (file)
@@ -595,8 +595,7 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev,
 
 void fsl_mc_msi_domain_free_irqs(struct device *dev);
 
-int fsl_mc_find_msi_domain(struct device *mc_platform_dev,
-                          struct irq_domain **mc_msi_domain);
+struct irq_domain *fsl_mc_find_msi_domain(struct device *dev);
 
 int fsl_mc_populate_irq_pool(struct fsl_mc_bus *mc_bus,
                             unsigned int irq_count);
@@ -613,6 +612,9 @@ void fsl_destroy_mc_io(struct fsl_mc_io *mc_io);
 
 bool fsl_mc_is_root_dprc(struct device *dev);
 
+void fsl_mc_get_root_dprc(struct device *dev,
+                        struct device **root_dprc_dev);
+
 struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
                                           struct fsl_mc_device *mc_bus_dev);
 
index d82b3b7..0c271b9 100644 (file)
@@ -605,7 +605,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
        disk->cdi = cdi;
 
        ENSURE(cdo, drive_status, CDC_DRIVE_STATUS);
-       if (cdo->check_events == NULL && cdo->media_changed == NULL)
+       if (cdo->check_events == NULL)
                WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC));
        ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY);
        ENSURE(cdo, lock_door, CDC_LOCK);
@@ -1419,8 +1419,6 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 
        if (cdi->ops->check_events)
                cdi->ops->check_events(cdi, 0, slot);
-       else
-               cdi->ops->media_changed(cdi, slot);
 
        if (slot == CDSL_NONE) {
                /* set media changed bits, on both queues */
@@ -1517,13 +1515,10 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
                return ret;
 
        /* changed since last call? */
-       if (cdi->ops->check_events) {
-               BUG_ON(!queue); /* shouldn't be called from VFS path */
-               cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
-               changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
-               cdi->ioctl_events = 0;
-       } else
-               changed = cdi->ops->media_changed(cdi, CDSL_CURRENT);
+       BUG_ON(!queue); /* shouldn't be called from VFS path */
+       cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
+       changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
+       cdi->ioctl_events = 0;
 
        if (changed) {
                cdi->mc_flags = 0x3;    /* set bit on both queues */
@@ -1535,18 +1530,6 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
        return ret;
 }
 
-int cdrom_media_changed(struct cdrom_device_info *cdi)
-{
-       /* This talks to the VFS, which doesn't like errors - just 1 or 0.  
-        * Returning "0" is always safe (media hasn't been changed). Do that 
-        * if the low-level cdrom driver dosn't support media changed. */ 
-       if (cdi == NULL || cdi->ops->media_changed == NULL)
-               return 0;
-       if (!CDROM_CAN(CDC_MEDIA_CHANGED))
-               return 0;
-       return media_changed(cdi, 0);
-}
-
 /* Requests to the low-level drivers will /always/ be done in the
    following format convention:
 
@@ -3464,7 +3447,6 @@ EXPORT_SYMBOL(unregister_cdrom);
 EXPORT_SYMBOL(cdrom_open);
 EXPORT_SYMBOL(cdrom_release);
 EXPORT_SYMBOL(cdrom_ioctl);
-EXPORT_SYMBOL(cdrom_media_changed);
 EXPORT_SYMBOL(cdrom_number_of_slots);
 EXPORT_SYMBOL(cdrom_mode_select);
 EXPORT_SYMBOL(cdrom_mode_sense);
index 0ad17ef..f976a49 100644 (file)
@@ -74,6 +74,16 @@ config HW_RANDOM_ATMEL
 
          If unsure, say Y.
 
+config HW_RANDOM_BA431
+       tristate "Silex Insight BA431 Random Number Generator support"
+       depends on HAS_IOMEM
+       help
+         This driver provides kernel-side support for the Random Number
+         Generator hardware based on Silex Insight BA431 IP.
+
+         To compile this driver as a module, choose M here: the
+         module will be called ba431-rng.
+
 config HW_RANDOM_BCM2835
        tristate "Broadcom BCM2835/BCM63xx Random Number Generator support"
        depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X || \
@@ -245,7 +255,7 @@ config HW_RANDOM_MXC_RNGA
 config HW_RANDOM_IMX_RNGC
        tristate "Freescale i.MX RNGC Random Number Generator"
        depends on HAS_IOMEM && HAVE_CLK
-       depends on SOC_IMX25 || COMPILE_TEST
+       depends on SOC_IMX25 || SOC_IMX6SL || SOC_IMX6SLL || SOC_IMX6UL || COMPILE_TEST
        default HW_RANDOM
        help
          This driver provides kernel-side support for the Random Number
@@ -257,6 +267,21 @@ config HW_RANDOM_IMX_RNGC
 
          If unsure, say Y.
 
+config HW_RANDOM_INGENIC_RNG
+       tristate "Ingenic Random Number Generator support"
+       depends on HW_RANDOM
+       depends on MACH_JZ4780 || MACH_X1000
+       default HW_RANDOM
+       help
+         This driver provides kernel-side support for the Random Number Generator
+         hardware found in ingenic JZ4780 and X1000 SoC. MIPS Creator CI20 uses
+         JZ4780 SoC, YSH & ATIL CU1000-Neo uses X1000 SoC.
+
+         To compile this driver as a module, choose M here: the
+         module will be called ingenic-rng.
+
+         If unsure, say Y.
+
 config HW_RANDOM_NOMADIK
        tristate "ST-Ericsson Nomadik Random Number Generator support"
        depends on ARCH_NOMADIK
index 2c67247..26ae068 100644 (file)
@@ -9,6 +9,7 @@ obj-$(CONFIG_HW_RANDOM_TIMERIOMEM) += timeriomem-rng.o
 obj-$(CONFIG_HW_RANDOM_INTEL) += intel-rng.o
 obj-$(CONFIG_HW_RANDOM_AMD) += amd-rng.o
 obj-$(CONFIG_HW_RANDOM_ATMEL) += atmel-rng.o
+obj-$(CONFIG_HW_RANDOM_BA431) += ba431-rng.o
 obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o
 obj-$(CONFIG_HW_RANDOM_N2RNG) += n2-rng.o
 n2-rng-y := n2-drv.o n2-asm.o
@@ -22,6 +23,7 @@ obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
 obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
 obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
 obj-$(CONFIG_HW_RANDOM_IMX_RNGC) += imx-rngc.o
+obj-$(CONFIG_HW_RANDOM_INGENIC_RNG) += ingenic-rng.o
 obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
 obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
diff --git a/drivers/char/hw_random/ba431-rng.c b/drivers/char/hw_random/ba431-rng.c
new file mode 100644 (file)
index 0000000..410b50b
--- /dev/null
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Silex Insight
+
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#define BA431_RESET_DELAY                      1 /* usec */
+#define BA431_RESET_READ_STATUS_TIMEOUT                1000 /* usec */
+#define BA431_RESET_READ_STATUS_INTERVAL       10 /* usec */
+#define BA431_READ_RETRY_INTERVAL              1 /* usec */
+
+#define BA431_REG_CTRL                         0x00
+#define BA431_REG_FIFO_LEVEL                   0x04
+#define BA431_REG_STATUS                       0x30
+#define BA431_REG_FIFODATA                     0x80
+
+#define BA431_CTRL_ENABLE                      BIT(0)
+#define BA431_CTRL_SOFTRESET                   BIT(8)
+
+#define BA431_STATUS_STATE_MASK                        (BIT(1) | BIT(2) | BIT(3))
+#define BA431_STATUS_STATE_OFFSET              1
+
+enum ba431_state {
+       BA431_STATE_RESET,
+       BA431_STATE_STARTUP,
+       BA431_STATE_FIFOFULLON,
+       BA431_STATE_FIFOFULLOFF,
+       BA431_STATE_RUNNING,
+       BA431_STATE_ERROR
+};
+
+struct ba431_trng {
+       struct device *dev;
+       void __iomem *base;
+       struct hwrng rng;
+       atomic_t reset_pending;
+       struct work_struct reset_work;
+};
+
+static inline u32 ba431_trng_read_reg(struct ba431_trng *ba431, u32 reg)
+{
+       return ioread32(ba431->base + reg);
+}
+
+static inline void ba431_trng_write_reg(struct ba431_trng *ba431, u32 reg,
+                                       u32 val)
+{
+       iowrite32(val, ba431->base + reg);
+}
+
+static inline enum ba431_state ba431_trng_get_state(struct ba431_trng *ba431)
+{
+       u32 status = ba431_trng_read_reg(ba431, BA431_REG_STATUS);
+
+       return (status & BA431_STATUS_STATE_MASK) >> BA431_STATUS_STATE_OFFSET;
+}
+
+static int ba431_trng_is_in_error(struct ba431_trng *ba431)
+{
+       enum ba431_state state = ba431_trng_get_state(ba431);
+
+       if ((state < BA431_STATE_STARTUP) ||
+           (state >= BA431_STATE_ERROR))
+               return 1;
+
+       return 0;
+}
+
+static int ba431_trng_reset(struct ba431_trng *ba431)
+{
+       int ret;
+
+       /* Disable interrupts, random generation and enable the softreset */
+       ba431_trng_write_reg(ba431, BA431_REG_CTRL, BA431_CTRL_SOFTRESET);
+       udelay(BA431_RESET_DELAY);
+       ba431_trng_write_reg(ba431, BA431_REG_CTRL, BA431_CTRL_ENABLE);
+
+       /* Wait until the state changed */
+       if (readx_poll_timeout(ba431_trng_is_in_error, ba431, ret, !ret,
+                              BA431_RESET_READ_STATUS_INTERVAL,
+                              BA431_RESET_READ_STATUS_TIMEOUT)) {
+               dev_err(ba431->dev, "reset failed (state: %d)\n",
+                       ba431_trng_get_state(ba431));
+               return -ETIMEDOUT;
+       }
+
+       dev_info(ba431->dev, "reset done\n");
+
+       return 0;
+}
+
+static void ba431_trng_reset_work(struct work_struct *work)
+{
+       struct ba431_trng *ba431 = container_of(work, struct ba431_trng,
+                                               reset_work);
+       ba431_trng_reset(ba431);
+       atomic_set(&ba431->reset_pending, 0);
+}
+
+static void ba431_trng_schedule_reset(struct ba431_trng *ba431)
+{
+       if (atomic_cmpxchg(&ba431->reset_pending, 0, 1))
+               return;
+
+       schedule_work(&ba431->reset_work);
+}
+
+static int ba431_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+       struct ba431_trng *ba431 = container_of(rng, struct ba431_trng, rng);
+       u32 *data = buf;
+       unsigned int level, i;
+       int n = 0;
+
+       while (max > 0) {
+               level = ba431_trng_read_reg(ba431, BA431_REG_FIFO_LEVEL);
+               if (!level) {
+                       if (ba431_trng_is_in_error(ba431)) {
+                               ba431_trng_schedule_reset(ba431);
+                               break;
+                       }
+
+                       if (!wait)
+                               break;
+
+                       udelay(BA431_READ_RETRY_INTERVAL);
+                       continue;
+               }
+
+               i = level;
+               do {
+                       data[n++] = ba431_trng_read_reg(ba431,
+                                                       BA431_REG_FIFODATA);
+                       max -= sizeof(*data);
+               } while (--i && (max > 0));
+
+               if (ba431_trng_is_in_error(ba431)) {
+                       n -= (level - i);
+                       ba431_trng_schedule_reset(ba431);
+                       break;
+               }
+       }
+
+       n *= sizeof(data);
+       return (n || !wait) ? n : -EIO;
+}
+
+static void ba431_trng_cleanup(struct hwrng *rng)
+{
+       struct ba431_trng *ba431 = container_of(rng, struct ba431_trng, rng);
+
+       ba431_trng_write_reg(ba431, BA431_REG_CTRL, 0);
+       cancel_work_sync(&ba431->reset_work);
+}
+
+static int ba431_trng_init(struct hwrng *rng)
+{
+       struct ba431_trng *ba431 = container_of(rng, struct ba431_trng, rng);
+
+       return ba431_trng_reset(ba431);
+}
+
+static int ba431_trng_probe(struct platform_device *pdev)
+{
+       struct ba431_trng *ba431;
+       struct resource *res;
+       int ret;
+
+       ba431 = devm_kzalloc(&pdev->dev, sizeof(*ba431), GFP_KERNEL);
+       if (!ba431)
+               return -ENOMEM;
+
+       ba431->dev = &pdev->dev;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       ba431->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ba431->base))
+               return PTR_ERR(ba431->base);
+
+       atomic_set(&ba431->reset_pending, 0);
+       INIT_WORK(&ba431->reset_work, ba431_trng_reset_work);
+       ba431->rng.name = pdev->name;
+       ba431->rng.init = ba431_trng_init;
+       ba431->rng.cleanup = ba431_trng_cleanup;
+       ba431->rng.read = ba431_trng_read;
+
+       platform_set_drvdata(pdev, ba431);
+
+       ret = hwrng_register(&ba431->rng);
+       if (ret) {
+               dev_err(&pdev->dev, "BA431 registration failed (%d)\n", ret);
+               return ret;
+       }
+
+       dev_info(&pdev->dev, "BA431 TRNG registered\n");
+
+       return 0;
+}
+
+static int ba431_trng_remove(struct platform_device *pdev)
+{
+       struct ba431_trng *ba431 = platform_get_drvdata(pdev);
+
+       hwrng_unregister(&ba431->rng);
+
+       return 0;
+}
+
+static const struct of_device_id ba431_trng_dt_ids[] = {
+       { .compatible = "silex-insight,ba431-rng", .data = NULL },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ba431_trng_dt_ids);
+
+static struct platform_driver ba431_trng_driver = {
+       .driver = {
+               .name = "ba431-rng",
+               .of_match_table = ba431_trng_dt_ids,
+       },
+       .probe = ba431_trng_probe,
+       .remove = ba431_trng_remove,
+};
+
+module_platform_driver(ba431_trng_driver);
+
+MODULE_AUTHOR("Olivier Sobrie <olivier@sobrie.be>");
+MODULE_DESCRIPTION("TRNG driver for Silex Insight BA431");
+MODULE_LICENSE("GPL");
index cbf5eae..1a7c43b 100644 (file)
@@ -139,7 +139,6 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 {
        const struct bcm2835_rng_of_data *of_data;
        struct device *dev = &pdev->dev;
-       struct device_node *np = dev->of_node;
        const struct of_device_id *rng_id;
        struct bcm2835_rng_priv *priv;
        int err;
@@ -166,7 +165,7 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
        priv->rng.cleanup = bcm2835_rng_cleanup;
 
        if (dev_of_node(dev)) {
-               rng_id = of_match_node(bcm2835_rng_of_match, np);
+               rng_id = of_match_node(bcm2835_rng_of_match, dev->of_node);
                if (!rng_id)
                        return -EINVAL;
 
@@ -188,7 +187,7 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 
 MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
 
-static struct platform_device_id bcm2835_rng_devtype[] = {
+static const struct platform_device_id bcm2835_rng_devtype[] = {
        { .name = "bcm2835-rng" },
        { .name = "bcm63xx-rng" },
        { /* sentinel */ }
index d2d7a42..8c1c47d 100644 (file)
@@ -611,7 +611,7 @@ EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
 
 static int __init hwrng_modinit(void)
 {
-       int ret = -ENOMEM;
+       int ret;
 
        /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
        rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
index 6815e17..96438f8 100644 (file)
@@ -99,7 +99,7 @@ static int hisi_rng_probe(struct platform_device *pdev)
        return 0;
 }
 
-static const struct of_device_id hisi_rng_dt_ids[] = {
+static const struct of_device_id hisi_rng_dt_ids[] __maybe_unused = {
        { .compatible = "hisilicon,hip04-rng" },
        { .compatible = "hisilicon,hip05-rng" },
        { }
diff --git a/drivers/char/hw_random/ingenic-rng.c b/drivers/char/hw_random/ingenic-rng.c
new file mode 100644 (file)
index 0000000..d704cef
--- /dev/null
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ingenic Random Number Generator driver
+ * Copyright (c) 2017 PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
+ * Copyright (c) 2020 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* RNG register offsets */
+#define RNG_REG_ERNG_OFFSET            0x0
+#define RNG_REG_RNG_OFFSET             0x4
+
+/* bits within the ERND register */
+#define ERNG_READY                             BIT(31)
+#define ERNG_ENABLE                            BIT(0)
+
+enum ingenic_rng_version {
+       ID_JZ4780,
+       ID_X1000,
+};
+
+/* Device associated memory */
+struct ingenic_rng {
+       enum ingenic_rng_version version;
+
+       void __iomem *base;
+       struct hwrng rng;
+};
+
+static int ingenic_rng_init(struct hwrng *rng)
+{
+       struct ingenic_rng *priv = container_of(rng, struct ingenic_rng, rng);
+
+       writel(ERNG_ENABLE, priv->base + RNG_REG_ERNG_OFFSET);
+
+       return 0;
+}
+
+static void ingenic_rng_cleanup(struct hwrng *rng)
+{
+       struct ingenic_rng *priv = container_of(rng, struct ingenic_rng, rng);
+
+       writel(0, priv->base + RNG_REG_ERNG_OFFSET);
+}
+
+static int ingenic_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+       struct ingenic_rng *priv = container_of(rng, struct ingenic_rng, rng);
+       u32 *data = buf;
+       u32 status;
+       int ret;
+
+       if (priv->version >= ID_X1000) {
+               ret = readl_poll_timeout(priv->base + RNG_REG_ERNG_OFFSET, status,
+                                        status & ERNG_READY, 10, 1000);
+               if (ret == -ETIMEDOUT) {
+                       pr_err("%s: Wait for RNG data ready timeout\n", __func__);
+                       return ret;
+               }
+       } else {
+               /*
+                * A delay is required so that the current RNG data is not bit shifted
+                * version of previous RNG data which could happen if random data is
+                * read continuously from this device.
+                */
+               udelay(20);
+       }
+
+       *data = readl(priv->base + RNG_REG_RNG_OFFSET);
+
+       return 4;
+}
+
+static int ingenic_rng_probe(struct platform_device *pdev)
+{
+       struct ingenic_rng *priv;
+       int ret;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(priv->base)) {
+               pr_err("%s: Failed to map RNG registers\n", __func__);
+               ret = PTR_ERR(priv->base);
+               goto err_free_rng;
+       }
+
+       priv->version = (enum ingenic_rng_version)of_device_get_match_data(&pdev->dev);
+
+       priv->rng.name = pdev->name;
+       priv->rng.init = ingenic_rng_init;
+       priv->rng.cleanup = ingenic_rng_cleanup;
+       priv->rng.read = ingenic_rng_read;
+
+       ret = hwrng_register(&priv->rng);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to register hwrng\n");
+               goto err_free_rng;
+       }
+
+       platform_set_drvdata(pdev, priv);
+
+       dev_info(&pdev->dev, "Ingenic RNG driver registered\n");
+       return 0;
+
+err_free_rng:
+       kfree(priv);
+       return ret;
+}
+
+static int ingenic_rng_remove(struct platform_device *pdev)
+{
+       struct ingenic_rng *priv = platform_get_drvdata(pdev);
+
+       hwrng_unregister(&priv->rng);
+
+       writel(0, priv->base + RNG_REG_ERNG_OFFSET);
+
+       return 0;
+}
+
+static const struct of_device_id ingenic_rng_of_match[] = {
+       { .compatible = "ingenic,jz4780-rng", .data = (void *) ID_JZ4780 },
+       { .compatible = "ingenic,x1000-rng", .data = (void *) ID_X1000 },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ingenic_rng_of_match);
+
+static struct platform_driver ingenic_rng_driver = {
+       .probe          = ingenic_rng_probe,
+       .remove         = ingenic_rng_remove,
+       .driver         = {
+               .name   = "ingenic-rng",
+               .of_match_table = ingenic_rng_of_match,
+       },
+};
+
+module_platform_driver(ingenic_rng_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>");
+MODULE_AUTHOR("周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>");
+MODULE_DESCRIPTION("Ingenic Random Number Generator driver");
index 0016170..8f1d47f 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * Random Number Generator driver for the Keystone SOC
  *
- * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com
+ * Copyright (C) 2016 Texas Instruments Incorporated - https://www.ti.com
  *
  * Authors:    Sandeep Nair
  *             Vitaly Andrianov
index 74ed29f..b0ded41 100644 (file)
@@ -76,7 +76,7 @@ static int nmk_rng_remove(struct amba_device *dev)
        return 0;
 }
 
-static struct amba_id nmk_rng_ids[] = {
+static const struct amba_id nmk_rng_ids[] = {
        {
                .id     = 0x000805e1,
                .mask   = 0x000fffff, /* top bits are rev and cfg: accept all */
index 01d0440..5d0d13f 100644 (file)
@@ -161,7 +161,7 @@ static const struct dev_pm_ops npcm_rng_pm_ops = {
                                pm_runtime_force_resume)
 };
 
-static const struct of_device_id rng_dt_id[] = {
+static const struct of_device_id rng_dt_id[] __maybe_unused = {
        { .compatible = "nuvoton,npcm750-rng",  },
        {},
 };
index 7be8067..8561a09 100644 (file)
@@ -33,7 +33,7 @@ static int octeon_rng_init(struct hwrng *rng)
        ctl.u64 = 0;
        ctl.s.ent_en = 1; /* Enable the entropy source.  */
        ctl.s.rng_en = 1; /* Enable the RNG hardware.  */
-       cvmx_write_csr((u64)p->control_status, ctl.u64);
+       cvmx_write_csr((__force u64)p->control_status, ctl.u64);
        return 0;
 }
 
@@ -44,14 +44,14 @@ static void octeon_rng_cleanup(struct hwrng *rng)
 
        ctl.u64 = 0;
        /* Disable everything.  */
-       cvmx_write_csr((u64)p->control_status, ctl.u64);
+       cvmx_write_csr((__force u64)p->control_status, ctl.u64);
 }
 
 static int octeon_rng_data_read(struct hwrng *rng, u32 *data)
 {
        struct octeon_rng *p = container_of(rng, struct octeon_rng, ops);
 
-       *data = cvmx_read64_uint32((u64)p->result);
+       *data = cvmx_read64_uint32((__force u64)p->result);
        return sizeof(u32);
 }
 
index 7290c60..5cc5fc5 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/platform_device.h>
 #include <linux/hw_random.h>
 #include <linux/delay.h>
+#include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
@@ -243,7 +244,6 @@ static struct omap_rng_pdata omap2_rng_pdata = {
        .cleanup        = omap2_rng_cleanup,
 };
 
-#if defined(CONFIG_OF)
 static inline u32 omap4_rng_data_present(struct omap_rng_dev *priv)
 {
        return omap_rng_read(priv, RNG_STATUS_REG) & RNG_REG_STATUS_RDY;
@@ -358,7 +358,7 @@ static struct omap_rng_pdata eip76_rng_pdata = {
        .cleanup        = omap4_rng_cleanup,
 };
 
-static const struct of_device_id omap_rng_of_match[] = {
+static const struct of_device_id omap_rng_of_match[] __maybe_unused = {
                {
                        .compatible     = "ti,omap2-rng",
                        .data           = &omap2_rng_pdata,
@@ -418,13 +418,6 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
        }
        return 0;
 }
-#else
-static int of_get_omap_rng_device_details(struct omap_rng_dev *omap_rng,
-                                         struct platform_device *pdev)
-{
-       return -EINVAL;
-}
-#endif
 
 static int get_omap_rng_device_details(struct omap_rng_dev *omap_rng)
 {
index 81080cb..e8210c1 100644 (file)
@@ -119,7 +119,7 @@ static int pic32_rng_remove(struct platform_device *pdev)
        return 0;
 }
 
-static const struct of_device_id pic32_rng_of_match[] = {
+static const struct of_device_id pic32_rng_of_match[] __maybe_unused = {
        { .compatible   = "microchip,pic32mzda-rng", },
        { /* sentinel */ }
 };
index 783c24e..15ba1e6 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/delay.h>
 #include <linux/hw_random.h>
 #include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -121,7 +122,7 @@ static int st_rng_remove(struct platform_device *pdev)
        return 0;
 }
 
-static const struct of_device_id st_rng_match[] = {
+static const struct of_device_id st_rng_match[] __maybe_unused = {
        { .compatible = "st,rng" },
        {},
 };
index 79a6e47..a90001e 100644 (file)
@@ -195,7 +195,7 @@ static int virtrng_restore(struct virtio_device *vdev)
 }
 #endif
 
-static struct virtio_device_id id_table[] = {
+static const struct virtio_device_id id_table[] = {
        { VIRTIO_ID_RNG, VIRTIO_DEV_ANY_ID },
        { 0 },
 };
index 2a41b21..d20ba1b 100644 (file)
@@ -1277,6 +1277,7 @@ void add_interrupt_randomness(int irq, int irq_flags)
 
        fast_mix(fast_pool);
        add_interrupt_bench(cycles);
+       this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);
 
        if (unlikely(crng_init == 0)) {
                if ((fast_pool->count >= 64) &&
index 63ada5e..3633ed7 100644 (file)
@@ -49,9 +49,9 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
        void __iomem *virt;
        u64 len, start;
        struct tpm_bios_log *log;
-
-       if (chip->flags & TPM_CHIP_FLAG_TPM2)
-               return -ENODEV;
+       struct acpi_table_tpm2 *tbl;
+       struct acpi_tpm2_phy *tpm2_phy;
+       int format;
 
        log = &chip->log;
 
@@ -61,23 +61,44 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
        if (!chip->acpi_dev_handle)
                return -ENODEV;
 
-       /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */
-       status = acpi_get_table(ACPI_SIG_TCPA, 1,
-                               (struct acpi_table_header **)&buff);
-
-       if (ACPI_FAILURE(status))
-               return -ENODEV;
-
-       switch(buff->platform_class) {
-       case BIOS_SERVER:
-               len = buff->server.log_max_len;
-               start = buff->server.log_start_addr;
-               break;
-       case BIOS_CLIENT:
-       default:
-               len = buff->client.log_max_len;
-               start = buff->client.log_start_addr;
-               break;
+       if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+               status = acpi_get_table("TPM2", 1,
+                                       (struct acpi_table_header **)&tbl);
+               if (ACPI_FAILURE(status))
+                       return -ENODEV;
+
+               if (tbl->header.length <
+                               sizeof(*tbl) + sizeof(struct acpi_tpm2_phy))
+                       return -ENODEV;
+
+               tpm2_phy = (void *)tbl + sizeof(*tbl);
+               len = tpm2_phy->log_area_minimum_length;
+
+               start = tpm2_phy->log_area_start_address;
+               if (!start || !len)
+                       return -ENODEV;
+
+               format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2;
+       } else {
+               /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */
+               status = acpi_get_table(ACPI_SIG_TCPA, 1,
+                                       (struct acpi_table_header **)&buff);
+               if (ACPI_FAILURE(status))
+                       return -ENODEV;
+
+               switch (buff->platform_class) {
+               case BIOS_SERVER:
+                       len = buff->server.log_max_len;
+                       start = buff->server.log_start_addr;
+                       break;
+               case BIOS_CLIENT:
+               default:
+                       len = buff->client.log_max_len;
+                       start = buff->client.log_start_addr;
+                       break;
+               }
+
+               format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
        }
        if (!len) {
                dev_warn(&chip->dev, "%s: TCPA log area empty\n", __func__);
@@ -98,7 +119,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
        memcpy_fromio(log->bios_event_log, virt, len);
 
        acpi_os_unmap_iomem(virt, len);
-       return EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
+       return format;
 
 err:
        kfree(log->bios_event_log);
index 8c77e88..ddaeceb 100644 (file)
@@ -386,13 +386,8 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
        chip->cdev.owner = THIS_MODULE;
        chip->cdevs.owner = THIS_MODULE;
 
-       chip->work_space.context_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!chip->work_space.context_buf) {
-               rc = -ENOMEM;
-               goto out;
-       }
-       chip->work_space.session_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!chip->work_space.session_buf) {
+       rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE);
+       if (rc) {
                rc = -ENOMEM;
                goto out;
        }
index 0fbcede..947d1db 100644 (file)
@@ -59,6 +59,9 @@ enum tpm_addr {
 
 #define TPM_TAG_RQU_COMMAND 193
 
+/* TPM2 specific constants. */
+#define TPM2_SPACE_BUFFER_SIZE         16384 /* 16 kB */
+
 struct stclear_flags_t {
        __be16  tag;
        u8      deactivated;
@@ -228,7 +231,7 @@ unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal);
 int tpm2_probe(struct tpm_chip *chip);
 int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip);
 int tpm2_find_cc(struct tpm_chip *chip, u32 cc);
-int tpm2_init_space(struct tpm_space *space);
+int tpm2_init_space(struct tpm_space *space, unsigned int buf_size);
 void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space);
 void tpm2_flush_space(struct tpm_chip *chip);
 int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
index 982d341..784b8b3 100644 (file)
@@ -38,18 +38,21 @@ static void tpm2_flush_sessions(struct tpm_chip *chip, struct tpm_space *space)
        }
 }
 
-int tpm2_init_space(struct tpm_space *space)
+int tpm2_init_space(struct tpm_space *space, unsigned int buf_size)
 {
-       space->context_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       space->context_buf = kzalloc(buf_size, GFP_KERNEL);
        if (!space->context_buf)
                return -ENOMEM;
 
-       space->session_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       space->session_buf = kzalloc(buf_size, GFP_KERNEL);
        if (space->session_buf == NULL) {
                kfree(space->context_buf);
+               /* Prevent caller getting a dangling pointer. */
+               space->context_buf = NULL;
                return -ENOMEM;
        }
 
+       space->buf_size = buf_size;
        return 0;
 }
 
@@ -311,8 +314,10 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
               sizeof(space->context_tbl));
        memcpy(&chip->work_space.session_tbl, &space->session_tbl,
               sizeof(space->session_tbl));
-       memcpy(chip->work_space.context_buf, space->context_buf, PAGE_SIZE);
-       memcpy(chip->work_space.session_buf, space->session_buf, PAGE_SIZE);
+       memcpy(chip->work_space.context_buf, space->context_buf,
+              space->buf_size);
+       memcpy(chip->work_space.session_buf, space->session_buf,
+              space->buf_size);
 
        rc = tpm2_load_space(chip);
        if (rc) {
@@ -492,7 +497,7 @@ static int tpm2_save_space(struct tpm_chip *chip)
                        continue;
 
                rc = tpm2_save_context(chip, space->context_tbl[i],
-                                      space->context_buf, PAGE_SIZE,
+                                      space->context_buf, space->buf_size,
                                       &offset);
                if (rc == -ENOENT) {
                        space->context_tbl[i] = 0;
@@ -509,9 +514,8 @@ static int tpm2_save_space(struct tpm_chip *chip)
                        continue;
 
                rc = tpm2_save_context(chip, space->session_tbl[i],
-                                      space->session_buf, PAGE_SIZE,
+                                      space->session_buf, space->buf_size,
                                       &offset);
-
                if (rc == -ENOENT) {
                        /* handle error saving session, just forget it */
                        space->session_tbl[i] = 0;
@@ -557,8 +561,10 @@ int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space,
               sizeof(space->context_tbl));
        memcpy(&space->session_tbl, &chip->work_space.session_tbl,
               sizeof(space->session_tbl));
-       memcpy(space->context_buf, chip->work_space.context_buf, PAGE_SIZE);
-       memcpy(space->session_buf, chip->work_space.session_buf, PAGE_SIZE);
+       memcpy(space->context_buf, chip->work_space.context_buf,
+              space->buf_size);
+       memcpy(space->session_buf, chip->work_space.session_buf,
+              space->buf_size);
 
        return 0;
 out:
index 7a0a705..eef0fb0 100644 (file)
@@ -21,7 +21,7 @@ static int tpmrm_open(struct inode *inode, struct file *file)
        if (priv == NULL)
                return -ENOMEM;
 
-       rc = tpm2_init_space(&priv->space);
+       rc = tpm2_init_space(&priv->space, TPM2_SPACE_BUFFER_SIZE);
        if (rc) {
                kfree(priv);
                return -ENOMEM;
index f6670c4..089938e 100644 (file)
@@ -108,4 +108,3 @@ obj-$(CONFIG_LOONGSON1_CPUFREQ)             += loongson1-cpufreq.o
 obj-$(CONFIG_SH_CPU_FREQ)              += sh-cpufreq.o
 obj-$(CONFIG_SPARC_US2E_CPUFREQ)       += sparc-us2e-cpufreq.o
 obj-$(CONFIG_SPARC_US3_CPUFREQ)                += sparc-us3-cpufreq.o
-obj-$(CONFIG_UNICORE32)                        += unicore2-cpufreq.o
diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c
deleted file mode 100644 (file)
index 98d3921..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * clock scaling for the UniCore-II
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-
-#include <mach/hardware.h>
-
-static struct cpufreq_driver ucv2_driver;
-
-/* make sure that only the "userspace" governor is run
- * -- anything else wouldn't make sense on this platform, anyway.
- */
-static int ucv2_verify_speed(struct cpufreq_policy_data *policy)
-{
-       if (policy->cpu)
-               return -EINVAL;
-
-       cpufreq_verify_within_cpu_limits(policy);
-       return 0;
-}
-
-static int ucv2_target(struct cpufreq_policy *policy,
-                        unsigned int target_freq,
-                        unsigned int relation)
-{
-       struct cpufreq_freqs freqs;
-       int ret;
-
-       freqs.old = policy->cur;
-       freqs.new = target_freq;
-
-       cpufreq_freq_transition_begin(policy, &freqs);
-       ret = clk_set_rate(policy->clk, target_freq * 1000);
-       cpufreq_freq_transition_end(policy, &freqs, ret);
-
-       return ret;
-}
-
-static int __init ucv2_cpu_init(struct cpufreq_policy *policy)
-{
-       if (policy->cpu != 0)
-               return -EINVAL;
-
-       policy->min = policy->cpuinfo.min_freq = 250000;
-       policy->max = policy->cpuinfo.max_freq = 1000000;
-       policy->clk = clk_get(NULL, "MAIN_CLK");
-       return PTR_ERR_OR_ZERO(policy->clk);
-}
-
-static struct cpufreq_driver ucv2_driver = {
-       .flags          = CPUFREQ_STICKY | CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING,
-       .verify         = ucv2_verify_speed,
-       .target         = ucv2_target,
-       .get            = cpufreq_generic_get,
-       .init           = ucv2_cpu_init,
-       .name           = "UniCore-II",
-};
-
-static int __init ucv2_cpufreq_init(void)
-{
-       return cpufreq_register_driver(&ucv2_driver);
-}
-
-arch_initcall(ucv2_cpufreq_init);
index 802b9ad..aa3a4ed 100644 (file)
@@ -624,6 +624,8 @@ config CRYPTO_DEV_QCE_SKCIPHER
 config CRYPTO_DEV_QCE_SHA
        bool
        depends on CRYPTO_DEV_QCE
+       select CRYPTO_SHA1
+       select CRYPTO_SHA256
 
 choice
        prompt "Algorithms enabled for QCE acceleration"
@@ -756,10 +758,9 @@ config CRYPTO_DEV_ZYNQMP_AES
 config CRYPTO_DEV_MEDIATEK
        tristate "MediaTek's EIP97 Cryptographic Engine driver"
        depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
-       select CRYPTO_AES
+       select CRYPTO_LIB_AES
        select CRYPTO_AEAD
        select CRYPTO_SKCIPHER
-       select CRYPTO_CTR
        select CRYPTO_SHA1
        select CRYPTO_SHA256
        select CRYPTO_SHA512
@@ -865,4 +866,18 @@ source "drivers/crypto/hisilicon/Kconfig"
 
 source "drivers/crypto/amlogic/Kconfig"
 
+config CRYPTO_DEV_SA2UL
+       tristate "Support for TI security accelerator"
+       depends on ARCH_K3 || COMPILE_TEST
+       select ARM64_CRYPTO
+       select CRYPTO_AES
+       select CRYPTO_AES_ARM64
+       select CRYPTO_ALGAPI
+       select HW_RANDOM
+       select SG_SPLIT
+       help
+         K3 devices include a security accelerator engine that may be
+         used for crypto offload.  Select this if you want to use hardware
+         acceleration for cryptographic algorithms on these devices.
+
 endif # CRYPTO_HW
index 944ed72..53fc115 100644 (file)
@@ -38,6 +38,7 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
 obj-$(CONFIG_CRYPTO_DEV_QCOM_RNG) += qcom-rng.o
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
+obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
 obj-$(CONFIG_ARCH_STM32) += stm32/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
index 7f22d30..b72de89 100644 (file)
@@ -122,19 +122,17 @@ static int noinline_for_stack sun4i_ss_cipher_poll_fallback(struct skcipher_requ
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
        struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
        struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq);
-       SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
        int err;
 
-       skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
-       skcipher_request_set_callback(subreq, areq->base.flags, NULL,
-                                     NULL);
-       skcipher_request_set_crypt(subreq, areq->src, areq->dst,
+       skcipher_request_set_tfm(&ctx->fallback_req, op->fallback_tfm);
+       skcipher_request_set_callback(&ctx->fallback_req, areq->base.flags,
+                                     areq->base.complete, areq->base.data);
+       skcipher_request_set_crypt(&ctx->fallback_req, areq->src, areq->dst,
                                   areq->cryptlen, areq->iv);
        if (ctx->mode & SS_DECRYPTION)
-               err = crypto_skcipher_decrypt(subreq);
+               err = crypto_skcipher_decrypt(&ctx->fallback_req);
        else
-               err = crypto_skcipher_encrypt(subreq);
-       skcipher_request_zero(subreq);
+               err = crypto_skcipher_encrypt(&ctx->fallback_req);
 
        return err;
 }
@@ -494,23 +492,25 @@ int sun4i_ss_cipher_init(struct crypto_tfm *tfm)
                            alg.crypto.base);
        op->ss = algt->ss;
 
-       crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
-                                   sizeof(struct sun4i_cipher_req_ctx));
-
-       op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+       op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(op->fallback_tfm)) {
                dev_err(op->ss->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
                        name, PTR_ERR(op->fallback_tfm));
                return PTR_ERR(op->fallback_tfm);
        }
 
+       crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+                                   sizeof(struct sun4i_cipher_req_ctx) +
+                                   crypto_skcipher_reqsize(op->fallback_tfm));
+
+
        err = pm_runtime_get_sync(op->ss->dev);
        if (err < 0)
                goto error_pm;
 
        return 0;
 error_pm:
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       crypto_free_skcipher(op->fallback_tfm);
        return err;
 }
 
@@ -518,7 +518,7 @@ void sun4i_ss_cipher_exit(struct crypto_tfm *tfm)
 {
        struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
 
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       crypto_free_skcipher(op->fallback_tfm);
        pm_runtime_put(op->ss->dev);
 }
 
@@ -546,10 +546,10 @@ int sun4i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
        op->keylen = keylen;
        memcpy(op->key, key, keylen);
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
 
 /* check and set the DES key, prepare the mode to be used */
@@ -566,10 +566,10 @@ int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
        op->keylen = keylen;
        memcpy(op->key, key, keylen);
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
 
 /* check and set the 3DES key, prepare the mode to be used */
@@ -586,9 +586,9 @@ int sun4i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
        op->keylen = keylen;
        memcpy(op->key, key, keylen);
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 
 }
index 2b4c633..163962f 100644 (file)
@@ -170,11 +170,12 @@ struct sun4i_tfm_ctx {
        u32 keylen;
        u32 keymode;
        struct sun4i_ss_ctx *ss;
-       struct crypto_sync_skcipher *fallback_tfm;
+       struct crypto_skcipher *fallback_tfm;
 };
 
 struct sun4i_cipher_req_ctx {
        u32 mode;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 struct sun4i_req_ctx {
index a6abb70..1e4f9a5 100644 (file)
@@ -58,23 +58,20 @@ static int sun8i_ce_cipher_fallback(struct skcipher_request *areq)
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
        struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
        struct sun8i_ce_alg_template *algt;
-#endif
-       SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
 
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
        algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
        algt->stat_fb++;
 #endif
 
-       skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
-       skcipher_request_set_callback(subreq, areq->base.flags, NULL, NULL);
-       skcipher_request_set_crypt(subreq, areq->src, areq->dst,
+       skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+       skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+                                     areq->base.complete, areq->base.data);
+       skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
                                   areq->cryptlen, areq->iv);
        if (rctx->op_dir & CE_DECRYPTION)
-               err = crypto_skcipher_decrypt(subreq);
+               err = crypto_skcipher_decrypt(&rctx->fallback_req);
        else
-               err = crypto_skcipher_encrypt(subreq);
-       skcipher_request_zero(subreq);
+               err = crypto_skcipher_encrypt(&rctx->fallback_req);
        return err;
 }
 
@@ -335,18 +332,20 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
        algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
        op->ce = algt->ce;
 
-       sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx);
-
-       op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+       op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(op->fallback_tfm)) {
                dev_err(op->ce->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
                        name, PTR_ERR(op->fallback_tfm));
                return PTR_ERR(op->fallback_tfm);
        }
 
+       sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx) +
+                        crypto_skcipher_reqsize(op->fallback_tfm);
+
+
        dev_info(op->ce->dev, "Fallback for %s is %s\n",
                 crypto_tfm_alg_driver_name(&sktfm->base),
-                crypto_tfm_alg_driver_name(crypto_skcipher_tfm(&op->fallback_tfm->base)));
+                crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
 
        op->enginectx.op.do_one_request = sun8i_ce_handle_cipher_request;
        op->enginectx.op.prepare_request = NULL;
@@ -358,7 +357,8 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
 
        return 0;
 error_pm:
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       pm_runtime_put_noidle(op->ce->dev);
+       crypto_free_skcipher(op->fallback_tfm);
        return err;
 }
 
@@ -370,7 +370,7 @@ void sun8i_ce_cipher_exit(struct crypto_tfm *tfm)
                memzero_explicit(op->key, op->keylen);
                kfree(op->key);
        }
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       crypto_free_skcipher(op->fallback_tfm);
        pm_runtime_put_sync_suspend(op->ce->dev);
 }
 
@@ -400,10 +400,10 @@ int sun8i_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
        if (!op->key)
                return -ENOMEM;
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
 
 int sun8i_ce_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -425,8 +425,8 @@ int sun8i_ce_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
        if (!op->key)
                return -ENOMEM;
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
index b957061..138759d 100644 (file)
@@ -185,7 +185,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -211,7 +212,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -236,7 +238,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -262,7 +265,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
index 0e9eac3..963645f 100644 (file)
@@ -181,12 +181,14 @@ struct sun8i_ce_dev {
 
 /*
  * struct sun8i_cipher_req_ctx - context for a skcipher request
- * @op_dir:    direction (encrypt vs decrypt) for this request
- * @flow:      the flow to use for this request
+ * @op_dir:            direction (encrypt vs decrypt) for this request
+ * @flow:              the flow to use for this request
+ * @fallback_req:      request struct for invoking the fallback skcipher TFM
  */
 struct sun8i_cipher_req_ctx {
        u32 op_dir;
        int flow;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 /*
@@ -202,7 +204,7 @@ struct sun8i_cipher_tfm_ctx {
        u32 *key;
        u32 keylen;
        struct sun8i_ce_dev *ce;
-       struct crypto_sync_skcipher *fallback_tfm;
+       struct crypto_skcipher *fallback_tfm;
 };
 
 /*
index c89cb2e..7a13167 100644 (file)
@@ -73,7 +73,6 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
        struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
        int err;
 
-       SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
        struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
        struct sun8i_ss_alg_template *algt;
@@ -81,15 +80,15 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
        algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
        algt->stat_fb++;
 #endif
-       skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
-       skcipher_request_set_callback(subreq, areq->base.flags, NULL, NULL);
-       skcipher_request_set_crypt(subreq, areq->src, areq->dst,
+       skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+       skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+                                     areq->base.complete, areq->base.data);
+       skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
                                   areq->cryptlen, areq->iv);
        if (rctx->op_dir & SS_DECRYPTION)
-               err = crypto_skcipher_decrypt(subreq);
+               err = crypto_skcipher_decrypt(&rctx->fallback_req);
        else
-               err = crypto_skcipher_encrypt(subreq);
-       skcipher_request_zero(subreq);
+               err = crypto_skcipher_encrypt(&rctx->fallback_req);
        return err;
 }
 
@@ -334,18 +333,20 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
        algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
        op->ss = algt->ss;
 
-       sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx);
-
-       op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+       op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(op->fallback_tfm)) {
                dev_err(op->ss->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
                        name, PTR_ERR(op->fallback_tfm));
                return PTR_ERR(op->fallback_tfm);
        }
 
+       sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx) +
+                        crypto_skcipher_reqsize(op->fallback_tfm);
+
+
        dev_info(op->ss->dev, "Fallback for %s is %s\n",
                 crypto_tfm_alg_driver_name(&sktfm->base),
-                crypto_tfm_alg_driver_name(crypto_skcipher_tfm(&op->fallback_tfm->base)));
+                crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
 
        op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request;
        op->enginectx.op.prepare_request = NULL;
@@ -359,7 +360,7 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
 
        return 0;
 error_pm:
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       crypto_free_skcipher(op->fallback_tfm);
        return err;
 }
 
@@ -371,7 +372,7 @@ void sun8i_ss_cipher_exit(struct crypto_tfm *tfm)
                memzero_explicit(op->key, op->keylen);
                kfree(op->key);
        }
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       crypto_free_skcipher(op->fallback_tfm);
        pm_runtime_put_sync(op->ss->dev);
 }
 
@@ -401,10 +402,10 @@ int sun8i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
        if (!op->key)
                return -ENOMEM;
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
 
 int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -427,8 +428,8 @@ int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
        if (!op->key)
                return -ENOMEM;
 
-       crypto_sync_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
index 5d9d0fe..9a23515 100644 (file)
@@ -169,7 +169,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -195,7 +196,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -220,7 +222,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -246,7 +249,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
index 29c44f2..0405767 100644 (file)
@@ -135,17 +135,18 @@ struct sun8i_ss_dev {
 
 /*
  * struct sun8i_cipher_req_ctx - context for a skcipher request
- * @t_src:     list of mapped SGs with their size
- * @t_dst:     list of mapped SGs with their size
- * @p_key:     DMA address of the key
- * @p_iv:      DMA address of the IV
- * @method:    current algorithm for this request
- * @op_mode:   op_mode for this request
- * @op_dir:    direction (encrypt vs decrypt) for this request
- * @flow:      the flow to use for this request
- * @ivlen:     size of biv
- * @keylen:    keylen for this request
- * @biv:       buffer which contain the IV
+ * @t_src:             list of mapped SGs with their size
+ * @t_dst:             list of mapped SGs with their size
+ * @p_key:             DMA address of the key
+ * @p_iv:              DMA address of the IV
+ * @method:            current algorithm for this request
+ * @op_mode:           op_mode for this request
+ * @op_dir:            direction (encrypt vs decrypt) for this request
+ * @flow:              the flow to use for this request
+ * @ivlen:             size of biv
+ * @keylen:            keylen for this request
+ * @biv:               buffer which contain the IV
+ * @fallback_req:      request struct for invoking the fallback skcipher TFM
  */
 struct sun8i_cipher_req_ctx {
        struct sginfo t_src[MAX_SG];
@@ -159,6 +160,7 @@ struct sun8i_cipher_req_ctx {
        unsigned int ivlen;
        unsigned int keylen;
        void *biv;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 /*
@@ -174,7 +176,7 @@ struct sun8i_cipher_tfm_ctx {
        u32 *key;
        u32 keylen;
        struct sun8i_ss_dev *ss;
-       struct crypto_sync_skcipher *fallback_tfm;
+       struct crypto_skcipher *fallback_tfm;
 };
 
 /*
index cf95476..cf2c676 100644 (file)
@@ -1,7 +1,7 @@
 config CRYPTO_DEV_AMLOGIC_GXL
        tristate "Support for amlogic cryptographic offloader"
        depends on HAS_IOMEM
-       default y if ARCH_MESON
+       default m if ARCH_MESON
        select CRYPTO_SKCIPHER
        select CRYPTO_ENGINE
        select CRYPTO_ECB
index 9819dd5..5880b94 100644 (file)
@@ -64,22 +64,20 @@ static int meson_cipher_do_fallback(struct skcipher_request *areq)
 #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
        struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
        struct meson_alg_template *algt;
-#endif
-       SYNC_SKCIPHER_REQUEST_ON_STACK(req, op->fallback_tfm);
 
-#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
        algt = container_of(alg, struct meson_alg_template, alg.skcipher);
        algt->stat_fb++;
 #endif
-       skcipher_request_set_sync_tfm(req, op->fallback_tfm);
-       skcipher_request_set_callback(req, areq->base.flags, NULL, NULL);
-       skcipher_request_set_crypt(req, areq->src, areq->dst,
+       skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+       skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+                                     areq->base.complete, areq->base.data);
+       skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
                                   areq->cryptlen, areq->iv);
+
        if (rctx->op_dir == MESON_DECRYPT)
-               err = crypto_skcipher_decrypt(req);
+               err = crypto_skcipher_decrypt(&rctx->fallback_req);
        else
-               err = crypto_skcipher_encrypt(req);
-       skcipher_request_zero(req);
+               err = crypto_skcipher_encrypt(&rctx->fallback_req);
        return err;
 }
 
@@ -321,15 +319,16 @@ int meson_cipher_init(struct crypto_tfm *tfm)
        algt = container_of(alg, struct meson_alg_template, alg.skcipher);
        op->mc = algt->mc;
 
-       sktfm->reqsize = sizeof(struct meson_cipher_req_ctx);
-
-       op->fallback_tfm = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+       op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(op->fallback_tfm)) {
                dev_err(op->mc->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
                        name, PTR_ERR(op->fallback_tfm));
                return PTR_ERR(op->fallback_tfm);
        }
 
+       sktfm->reqsize = sizeof(struct meson_cipher_req_ctx) +
+                        crypto_skcipher_reqsize(op->fallback_tfm);
+
        op->enginectx.op.do_one_request = meson_handle_cipher_request;
        op->enginectx.op.prepare_request = NULL;
        op->enginectx.op.unprepare_request = NULL;
@@ -345,7 +344,7 @@ void meson_cipher_exit(struct crypto_tfm *tfm)
                memzero_explicit(op->key, op->keylen);
                kfree(op->key);
        }
-       crypto_free_sync_skcipher(op->fallback_tfm);
+       crypto_free_skcipher(op->fallback_tfm);
 }
 
 int meson_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -377,5 +376,5 @@ int meson_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
        if (!op->key)
                return -ENOMEM;
 
-       return crypto_sync_skcipher_setkey(op->fallback_tfm, key, keylen);
+       return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
 }
index 411857f..466552a 100644 (file)
@@ -54,7 +54,8 @@ static struct meson_alg_template mc_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct meson_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
@@ -79,7 +80,8 @@ static struct meson_alg_template mc_algs[] = {
                        .cra_priority = 400,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                               CRYPTO_ALG_NEED_FALLBACK,
                        .cra_ctxsize = sizeof(struct meson_cipher_tfm_ctx),
                        .cra_module = THIS_MODULE,
                        .cra_alignmask = 0xf,
index b7f2de9..dc0f142 100644 (file)
@@ -109,6 +109,7 @@ struct meson_dev {
 struct meson_cipher_req_ctx {
        u32 op_dir;
        int flow;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 /*
@@ -126,7 +127,7 @@ struct meson_cipher_tfm_ctx {
        u32 keylen;
        u32 keymode;
        struct meson_dev *mc;
-       struct crypto_sync_skcipher *fallback_tfm;
+       struct crypto_skcipher *fallback_tfm;
 };
 
 /*
index 62ba032..1a46eed 100644 (file)
@@ -2630,7 +2630,8 @@ static struct ahash_alg hash_algos[] = {
                        .cra_name = "sha1",
                        .cra_driver_name = "artpec-sha1",
                        .cra_priority = 300,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = SHA1_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct artpec6_hashalg_context),
                        .cra_alignmask = 3,
@@ -2653,7 +2654,8 @@ static struct ahash_alg hash_algos[] = {
                        .cra_name = "sha256",
                        .cra_driver_name = "artpec-sha256",
                        .cra_priority = 300,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = SHA256_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct artpec6_hashalg_context),
                        .cra_alignmask = 3,
@@ -2677,7 +2679,8 @@ static struct ahash_alg hash_algos[] = {
                        .cra_name = "hmac(sha256)",
                        .cra_driver_name = "artpec-hmac-sha256",
                        .cra_priority = 300,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = SHA256_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct artpec6_hashalg_context),
                        .cra_alignmask = 3,
@@ -2696,7 +2699,8 @@ static struct skcipher_alg crypto_algos[] = {
                        .cra_name = "ecb(aes)",
                        .cra_driver_name = "artpec6-ecb-aes",
                        .cra_priority = 300,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
                        .cra_alignmask = 3,
@@ -2717,6 +2721,7 @@ static struct skcipher_alg crypto_algos[] = {
                        .cra_driver_name = "artpec6-ctr-aes",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_NEED_FALLBACK,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
@@ -2738,7 +2743,8 @@ static struct skcipher_alg crypto_algos[] = {
                        .cra_name = "cbc(aes)",
                        .cra_driver_name = "artpec6-cbc-aes",
                        .cra_priority = 300,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
                        .cra_alignmask = 3,
@@ -2759,7 +2765,8 @@ static struct skcipher_alg crypto_algos[] = {
                        .cra_name = "xts(aes)",
                        .cra_driver_name = "artpec6-xts-aes",
                        .cra_priority = 300,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
                        .cra_alignmask = 3,
@@ -2790,6 +2797,7 @@ static struct aead_alg aead_algos[] = {
                        .cra_driver_name = "artpec-gcm-aes",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
index a353217..8a7fa1a 100644 (file)
@@ -3233,7 +3233,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(md5),cbc(aes))",
                        .cra_driver_name = "authenc-hmac-md5-cbc-aes-iproc",
                        .cra_blocksize = AES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                .ivsize = AES_BLOCK_SIZE,
@@ -3256,7 +3258,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha1),cbc(aes))",
                        .cra_driver_name = "authenc-hmac-sha1-cbc-aes-iproc",
                        .cra_blocksize = AES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = AES_BLOCK_SIZE,
@@ -3279,7 +3283,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha256),cbc(aes))",
                        .cra_driver_name = "authenc-hmac-sha256-cbc-aes-iproc",
                        .cra_blocksize = AES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = AES_BLOCK_SIZE,
@@ -3302,7 +3308,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(md5),cbc(des))",
                        .cra_driver_name = "authenc-hmac-md5-cbc-des-iproc",
                        .cra_blocksize = DES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES_BLOCK_SIZE,
@@ -3325,7 +3333,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha1),cbc(des))",
                        .cra_driver_name = "authenc-hmac-sha1-cbc-des-iproc",
                        .cra_blocksize = DES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES_BLOCK_SIZE,
@@ -3348,7 +3358,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha224),cbc(des))",
                        .cra_driver_name = "authenc-hmac-sha224-cbc-des-iproc",
                        .cra_blocksize = DES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES_BLOCK_SIZE,
@@ -3371,7 +3383,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha256),cbc(des))",
                        .cra_driver_name = "authenc-hmac-sha256-cbc-des-iproc",
                        .cra_blocksize = DES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES_BLOCK_SIZE,
@@ -3394,7 +3408,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha384),cbc(des))",
                        .cra_driver_name = "authenc-hmac-sha384-cbc-des-iproc",
                        .cra_blocksize = DES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES_BLOCK_SIZE,
@@ -3417,7 +3433,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha512),cbc(des))",
                        .cra_driver_name = "authenc-hmac-sha512-cbc-des-iproc",
                        .cra_blocksize = DES_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES_BLOCK_SIZE,
@@ -3440,7 +3458,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(md5),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-md5-cbc-des3-iproc",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -3463,7 +3483,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha1-cbc-des3-iproc",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -3486,7 +3508,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha224),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha224-cbc-des3-iproc",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -3509,7 +3533,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha256-cbc-des3-iproc",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -3532,7 +3558,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha384),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha384-cbc-des3-iproc",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -3555,7 +3583,9 @@ static struct iproc_alg_s driver_algs[] = {
                        .cra_name = "authenc(hmac(sha512),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha512-cbc-des3-iproc",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC
+                       .cra_flags = CRYPTO_ALG_NEED_FALLBACK |
+                                    CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY
                 },
                 .setkey = aead_authenc_setkey,
                 .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -3811,7 +3841,8 @@ static struct iproc_alg_s driver_algs[] = {
                                    .cra_name = "md5",
                                    .cra_driver_name = "md5-iproc",
                                    .cra_blocksize = MD5_BLOCK_WORDS * 4,
-                                   .cra_flags = CRYPTO_ALG_ASYNC,
+                                   .cra_flags = CRYPTO_ALG_ASYNC |
+                                                CRYPTO_ALG_ALLOCATES_MEMORY,
                                }
                      },
         .cipher_info = {
@@ -4508,7 +4539,9 @@ static int spu_register_skcipher(struct iproc_alg_s *driver_alg)
        crypto->base.cra_priority = cipher_pri;
        crypto->base.cra_alignmask = 0;
        crypto->base.cra_ctxsize = sizeof(struct iproc_ctx_s);
-       crypto->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       crypto->base.cra_flags = CRYPTO_ALG_ASYNC |
+                                CRYPTO_ALG_ALLOCATES_MEMORY |
+                                CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        crypto->init = skcipher_init_tfm;
        crypto->exit = skcipher_exit_tfm;
@@ -4547,7 +4580,8 @@ static int spu_register_ahash(struct iproc_alg_s *driver_alg)
        hash->halg.base.cra_ctxsize = sizeof(struct iproc_ctx_s);
        hash->halg.base.cra_init = ahash_cra_init;
        hash->halg.base.cra_exit = generic_cra_exit;
-       hash->halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+       hash->halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+                                   CRYPTO_ALG_ALLOCATES_MEMORY;
        hash->halg.statesize = sizeof(struct spu_hash_export_s);
 
        if (driver_alg->auth_info.mode != HASH_MODE_HMAC) {
@@ -4591,7 +4625,7 @@ static int spu_register_aead(struct iproc_alg_s *driver_alg)
        aead->base.cra_alignmask = 0;
        aead->base.cra_ctxsize = sizeof(struct iproc_ctx_s);
 
-       aead->base.cra_flags |= CRYPTO_ALG_ASYNC;
+       aead->base.cra_flags |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY;
        /* setkey set in alg initialization */
        aead->setauthsize = aead_setauthsize;
        aead->encrypt = aead_encrypt;
index b2f9882..91feda5 100644 (file)
@@ -810,12 +810,6 @@ static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
        return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
 }
 
-static int arc4_skcipher_setkey(struct crypto_skcipher *skcipher,
-                               const u8 *key, unsigned int keylen)
-{
-       return skcipher_setkey(skcipher, key, keylen, 0);
-}
-
 static int des_skcipher_setkey(struct crypto_skcipher *skcipher,
                               const u8 *key, unsigned int keylen)
 {
@@ -838,7 +832,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
        u32 *desc;
 
        if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
-               dev_err(jrdev, "key size mismatch\n");
+               dev_dbg(jrdev, "key size mismatch\n");
                return -EINVAL;
        }
 
@@ -1967,21 +1961,6 @@ static struct caam_skcipher_alg driver_algs[] = {
                },
                .caam.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_ECB,
        },
-       {
-               .skcipher = {
-                       .base = {
-                               .cra_name = "ecb(arc4)",
-                               .cra_driver_name = "ecb-arc4-caam",
-                               .cra_blocksize = ARC4_BLOCK_SIZE,
-                       },
-                       .setkey = arc4_skcipher_setkey,
-                       .encrypt = skcipher_encrypt,
-                       .decrypt = skcipher_decrypt,
-                       .min_keysize = ARC4_MIN_KEY_SIZE,
-                       .max_keysize = ARC4_MAX_KEY_SIZE,
-               },
-               .caam.class1_alg_type = OP_ALG_ALGSEL_ARC4 | OP_ALG_AAI_ECB,
-       },
 };
 
 static struct caam_aead_alg driver_aeads[] = {
@@ -3433,7 +3412,8 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                             CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        alg->init = caam_cra_init;
        alg->exit = caam_cra_exit;
@@ -3446,7 +3426,8 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                             CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        alg->init = caam_aead_init;
        alg->exit = caam_aead_exit;
@@ -3457,7 +3438,6 @@ int caam_algapi_init(struct device *ctrldev)
        struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
        int i = 0, err = 0;
        u32 aes_vid, aes_inst, des_inst, md_vid, md_inst, ccha_inst, ptha_inst;
-       u32 arc4_inst;
        unsigned int md_limit = SHA512_DIGEST_SIZE;
        bool registered = false, gcm_support;
 
@@ -3477,8 +3457,6 @@ int caam_algapi_init(struct device *ctrldev)
                           CHA_ID_LS_DES_SHIFT;
                aes_inst = cha_inst & CHA_ID_LS_AES_MASK;
                md_inst = (cha_inst & CHA_ID_LS_MD_MASK) >> CHA_ID_LS_MD_SHIFT;
-               arc4_inst = (cha_inst & CHA_ID_LS_ARC4_MASK) >>
-                           CHA_ID_LS_ARC4_SHIFT;
                ccha_inst = 0;
                ptha_inst = 0;
 
@@ -3499,7 +3477,6 @@ int caam_algapi_init(struct device *ctrldev)
                md_inst = mdha & CHA_VER_NUM_MASK;
                ccha_inst = rd_reg32(&priv->ctrl->vreg.ccha) & CHA_VER_NUM_MASK;
                ptha_inst = rd_reg32(&priv->ctrl->vreg.ptha) & CHA_VER_NUM_MASK;
-               arc4_inst = rd_reg32(&priv->ctrl->vreg.afha) & CHA_VER_NUM_MASK;
 
                gcm_support = aesa & CHA_VER_MISC_AES_GCM;
        }
@@ -3522,10 +3499,6 @@ int caam_algapi_init(struct device *ctrldev)
                if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES))
                                continue;
 
-               /* Skip ARC4 algorithms if not supported by device */
-               if (!arc4_inst && alg_sel == OP_ALG_ALGSEL_ARC4)
-                       continue;
-
                /*
                 * Check support for AES modes not available
                 * on LP devices.
index 27e36bd..bb1c010 100644 (file)
@@ -728,7 +728,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
        int ret = 0;
 
        if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
-               dev_err(jrdev, "key size mismatch\n");
+               dev_dbg(jrdev, "key size mismatch\n");
                return -EINVAL;
        }
 
@@ -2502,7 +2502,8 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                             CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        alg->init = caam_cra_init;
        alg->exit = caam_cra_exit;
@@ -2515,7 +2516,8 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                             CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        alg->init = caam_aead_init;
        alg->exit = caam_aead_exit;
index 28669cb..66ae1d5 100644 (file)
@@ -1058,7 +1058,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
        u32 *desc;
 
        if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
-               dev_err(dev, "key size mismatch\n");
+               dev_dbg(dev, "key size mismatch\n");
                return -EINVAL;
        }
 
@@ -2912,7 +2912,8 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                             CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        alg->init = caam_cra_init_skcipher;
        alg->exit = caam_cra_exit;
@@ -2925,7 +2926,8 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+                             CRYPTO_ALG_KERN_DRIVER_ONLY;
 
        alg->init = caam_cra_init_aead;
        alg->exit = caam_cra_exit_aead;
@@ -4004,7 +4006,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
        struct dpaa2_sg_entry *sg_table;
-       int ret;
+       int ret = -ENOMEM;
 
        src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
@@ -4017,7 +4019,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
                                          DMA_TO_DEVICE);
                if (!mapped_nents) {
                        dev_err(ctx->dev, "unable to DMA map source\n");
-                       return -ENOMEM;
+                       return ret;
                }
        } else {
                mapped_nents = 0;
@@ -4027,7 +4029,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
        edesc = qi_cache_zalloc(GFP_DMA | flags);
        if (!edesc) {
                dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
-               return -ENOMEM;
+               return ret;
        }
 
        edesc->src_nents = src_nents;
@@ -4082,7 +4084,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 unmap:
        ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE);
        qi_cache_free(edesc);
-       return -ENOMEM;
+       return ret;
 }
 
 static int ahash_update_first(struct ahash_request *req)
@@ -4498,7 +4500,11 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct caam_hash_state));
 
-       return ahash_set_sh_desc(ahash);
+       /*
+        * For keyed hash algorithms shared descriptors
+        * will be created later in setkey() callback
+        */
+       return alg->setkey ? 0 : ahash_set_sh_desc(ahash);
 }
 
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)
@@ -4547,7 +4553,7 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
        alg->cra_priority = CAAM_CRA_PRIORITY;
        alg->cra_blocksize = template->blocksize;
        alg->cra_alignmask = 0;
-       alg->cra_flags = CRYPTO_ALG_ASYNC;
+       alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY;
 
        t_alg->alg_type = template->alg_type;
        t_alg->dev = dev;
@@ -4697,6 +4703,13 @@ static void dpaa2_dpseci_free(struct dpaa2_caam_priv *priv)
 {
        struct device *dev = priv->dev;
        struct fsl_mc_device *ls_dev = to_fsl_mc_device(dev);
+       int err;
+
+       if (DPSECI_VER(priv->major_ver, priv->minor_ver) > DPSECI_VER(5, 3)) {
+               err = dpseci_reset(priv->mc_io, 0, ls_dev->mc_handle);
+               if (err)
+                       dev_err(dev, "dpseci_reset() failed\n");
+       }
 
        dpaa2_dpseci_congestion_free(priv);
        dpseci_close(priv->mc_io, 0, ls_dev->mc_handle);
@@ -4894,6 +4907,14 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
 
        dev_info(dev, "dpseci v%d.%d\n", priv->major_ver, priv->minor_ver);
 
+       if (DPSECI_VER(priv->major_ver, priv->minor_ver) > DPSECI_VER(5, 3)) {
+               err = dpseci_reset(priv->mc_io, 0, ls_dev->mc_handle);
+               if (err) {
+                       dev_err(dev, "dpseci_reset() failed\n");
+                       goto err_get_vers;
+               }
+       }
+
        err = dpseci_get_attributes(priv->mc_io, 0, ls_dev->mc_handle,
                                    &priv->dpseci_attr);
        if (err) {
@@ -5221,7 +5242,7 @@ static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev)
                if (IS_ERR(t_alg)) {
                        err = PTR_ERR(t_alg);
                        dev_warn(dev, "%s hash alg allocation failed: %d\n",
-                                alg->driver_name, err);
+                                alg->hmac_driver_name, err);
                        continue;
                }
 
@@ -5384,6 +5405,7 @@ static const struct fsl_mc_device_id dpaa2_caam_match_id_table[] = {
        },
        { .vendor = 0x0 }
 };
+MODULE_DEVICE_TABLE(fslmc, dpaa2_caam_match_id_table);
 
 static struct fsl_mc_driver dpaa2_caam_driver = {
        .driver = {
index 27ff4a3..e8a6d8b 100644 (file)
@@ -1927,7 +1927,7 @@ caam_hash_alloc(struct caam_hash_template *template,
        alg->cra_priority = CAAM_CRA_PRIORITY;
        alg->cra_blocksize = template->blocksize;
        alg->cra_alignmask = 0;
-       alg->cra_flags = CRYPTO_ALG_ASYNC;
+       alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY;
 
        t_alg->alg_type = template->alg_type;
 
index 60e2a54..c3c22a8 100644 (file)
@@ -43,7 +43,6 @@
 #include <crypto/akcipher.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/skcipher.h>
-#include <crypto/arc4.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/rsa.h>
index f3d20b7..94502f1 100644 (file)
@@ -469,7 +469,7 @@ static int caam_get_era(struct caam_ctrl __iomem *ctrl)
  * pipeline to a depth of 1 (from it's default of 4) to preclude this situation
  * from occurring.
  */
-static void handle_imx6_err005766(u32 *mcr)
+static void handle_imx6_err005766(u32 __iomem *mcr)
 {
        if (of_machine_is_compatible("fsl,imx6q") ||
            of_machine_is_compatible("fsl,imx6dl") ||
@@ -527,11 +527,21 @@ static const struct caam_imx_data caam_imx6ul_data = {
        .num_clks = ARRAY_SIZE(caam_imx6ul_clks),
 };
 
+static const struct clk_bulk_data caam_vf610_clks[] = {
+       { .id = "ipg" },
+};
+
+static const struct caam_imx_data caam_vf610_data = {
+       .clks = caam_vf610_clks,
+       .num_clks = ARRAY_SIZE(caam_vf610_clks),
+};
+
 static const struct soc_device_attribute caam_imx_soc_table[] = {
        { .soc_id = "i.MX6UL", .data = &caam_imx6ul_data },
        { .soc_id = "i.MX6*",  .data = &caam_imx6_data },
        { .soc_id = "i.MX7*",  .data = &caam_imx7_data },
        { .soc_id = "i.MX8M*", .data = &caam_imx7_data },
+       { .soc_id = "VF*",     .data = &caam_vf610_data },
        { .family = "Freescale i.MX" },
        { /* sentinel */ }
 };
index 8a68531..039df6c 100644 (file)
@@ -103,6 +103,24 @@ int dpseci_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
        return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpseci_reset() - Reset the DPSECI, returns the object to initial state
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSECI object
+ *
+ * Return:     '0' on success, error code otherwise
+ */
+int dpseci_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+       struct fsl_mc_command cmd = { 0 };
+
+       cmd.header = mc_encode_cmd_header(DPSECI_CMDID_RESET,
+                                         cmd_flags,
+                                         token);
+       return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpseci_is_enabled() - Check if the DPSECI is enabled.
  * @mc_io:     Pointer to MC portal's I/O object
index 4550e13..6dcd9be 100644 (file)
@@ -59,6 +59,8 @@ int dpseci_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
 
 int dpseci_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
 
+int dpseci_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
 int dpseci_is_enabled(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
                      int *en);
 
index 6ab77ea..71a007c 100644 (file)
@@ -33,6 +33,7 @@
 #define DPSECI_CMDID_ENABLE                            DPSECI_CMD_V1(0x002)
 #define DPSECI_CMDID_DISABLE                           DPSECI_CMD_V1(0x003)
 #define DPSECI_CMDID_GET_ATTR                          DPSECI_CMD_V1(0x004)
+#define DPSECI_CMDID_RESET                             DPSECI_CMD_V1(0x005)
 #define DPSECI_CMDID_IS_ENABLED                                DPSECI_CMD_V1(0x006)
 
 #define DPSECI_CMDID_SET_RX_QUEUE                      DPSECI_CMD_V1(0x194)
index 17c6108..72db901 100644 (file)
@@ -212,6 +212,9 @@ static const char * const rng_err_id_list[] = {
        "Prediction resistance and test request",
        "Uninstantiate",
        "Secure key generation",
+       "",
+       "Hardware error",
+       "Continuous check"
 };
 
 static int report_ccb_status(struct device *jrdev, const u32 status,
index 4af22e7..bf6b03b 100644 (file)
@@ -339,8 +339,7 @@ EXPORT_SYMBOL(caam_jr_free);
  * caam_jr_enqueue() - Enqueue a job descriptor head. Returns -EINPROGRESS
  * if OK, -ENOSPC if the queue is full, -EIO if it cannot map the caller's
  * descriptor.
- * @dev:  device of the job ring to be used. This device should have
- *        been assigned prior by caam_jr_register().
+ * @dev:  struct device of the job ring to be used
  * @desc: points to a job descriptor that execute our request. All
  *        descriptors (and all referenced data) must be in a DMAable
  *        region, and all data references must be physical addresses
index 0f810bc..af61f3a 100644 (file)
@@ -173,9 +173,14 @@ static inline u64 rd_reg64(void __iomem *reg)
 
 static inline u64 cpu_to_caam_dma64(dma_addr_t value)
 {
-       if (caam_imx)
-               return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
-                        (u64)cpu_to_caam32(upper_32_bits(value)));
+       if (caam_imx) {
+               u64 ret_val = (u64)cpu_to_caam32(lower_32_bits(value)) << 32;
+
+               if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+                       ret_val |= (u64)cpu_to_caam32(upper_32_bits(value));
+
+               return ret_val;
+       }
 
        return cpu_to_caam64(value);
 }
index 1be1adf..5af0dc2 100644 (file)
@@ -99,10 +99,10 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
        struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
        struct fc_context *fctx = &rctx->fctx;
-       u64 *offset_control = &rctx->control_word;
        u32 enc_iv_len = crypto_skcipher_ivsize(tfm);
        struct cpt_request_info *req_info = &rctx->cpt_req;
-       u64 *ctrl_flags = NULL;
+       __be64 *ctrl_flags = NULL;
+       __be64 *offset_control;
 
        req_info->ctrl.s.grp = 0;
        req_info->ctrl.s.dma_mode = DMA_GATHER_SCATTER;
@@ -126,9 +126,10 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
                memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
        else
                memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
-       ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags;
-       *ctrl_flags = cpu_to_be64(*ctrl_flags);
+       ctrl_flags = (__be64 *)&fctx->enc.enc_ctrl.flags;
+       *ctrl_flags = cpu_to_be64(fctx->enc.enc_ctrl.flags);
 
+       offset_control = (__be64 *)&rctx->control_word;
        *offset_control = cpu_to_be64(((u64)(enc_iv_len) << 16));
        /* Storing  Packet Data Information in offset
         * Control Word First 8 bytes
@@ -200,6 +201,7 @@ static inline int cvm_enc_dec(struct skcipher_request *req, u32 enc)
        int status;
 
        memset(req_info, 0, sizeof(struct cpt_request_info));
+       req_info->may_sleep = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) != 0;
        memset(fctx, 0, sizeof(struct fc_context));
        create_input_list(req, enc, enc_iv_len);
        create_output_list(req, enc_iv_len);
@@ -339,7 +341,8 @@ static int cvm_enc_dec_init(struct crypto_skcipher *tfm)
 }
 
 static struct skcipher_alg algs[] = { {
-       .base.cra_flags         = CRYPTO_ALG_ASYNC,
+       .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize     = AES_BLOCK_SIZE,
        .base.cra_ctxsize       = sizeof(struct cvm_enc_ctx),
        .base.cra_alignmask     = 7,
@@ -356,7 +359,8 @@ static struct skcipher_alg algs[] = { {
        .decrypt                = cvm_decrypt,
        .init                   = cvm_enc_dec_init,
 }, {
-       .base.cra_flags         = CRYPTO_ALG_ASYNC,
+       .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize     = AES_BLOCK_SIZE,
        .base.cra_ctxsize       = sizeof(struct cvm_enc_ctx),
        .base.cra_alignmask     = 7,
@@ -373,7 +377,8 @@ static struct skcipher_alg algs[] = { {
        .decrypt                = cvm_decrypt,
        .init                   = cvm_enc_dec_init,
 }, {
-       .base.cra_flags         = CRYPTO_ALG_ASYNC,
+       .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize     = AES_BLOCK_SIZE,
        .base.cra_ctxsize       = sizeof(struct cvm_enc_ctx),
        .base.cra_alignmask     = 7,
@@ -389,7 +394,8 @@ static struct skcipher_alg algs[] = { {
        .decrypt                = cvm_decrypt,
        .init                   = cvm_enc_dec_init,
 }, {
-       .base.cra_flags         = CRYPTO_ALG_ASYNC,
+       .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize     = AES_BLOCK_SIZE,
        .base.cra_ctxsize       = sizeof(struct cvm_enc_ctx),
        .base.cra_alignmask     = 7,
@@ -406,7 +412,8 @@ static struct skcipher_alg algs[] = { {
        .decrypt                = cvm_decrypt,
        .init                   = cvm_enc_dec_init,
 }, {
-       .base.cra_flags         = CRYPTO_ALG_ASYNC,
+       .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize     = DES3_EDE_BLOCK_SIZE,
        .base.cra_ctxsize       = sizeof(struct cvm_des3_ctx),
        .base.cra_alignmask     = 7,
@@ -423,7 +430,8 @@ static struct skcipher_alg algs[] = { {
        .decrypt                = cvm_decrypt,
        .init                   = cvm_enc_dec_init,
 }, {
-       .base.cra_flags         = CRYPTO_ALG_ASYNC,
+       .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize     = DES3_EDE_BLOCK_SIZE,
        .base.cra_ctxsize       = sizeof(struct cvm_des3_ctx),
        .base.cra_alignmask     = 7,
index 7a24019..3878b01 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "cptvf.h"
+#include "cptvf_algs.h"
 #include "request_manager.h"
 
 /**
@@ -133,7 +134,7 @@ static inline int setup_sgio_list(struct cpt_vf *cptvf,
 
        /* Setup gather (input) components */
        g_sz_bytes = ((req->incnt + 3) / 4) * sizeof(struct sglist_component);
-       info->gather_components = kzalloc(g_sz_bytes, GFP_KERNEL);
+       info->gather_components = kzalloc(g_sz_bytes, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (!info->gather_components) {
                ret = -ENOMEM;
                goto  scatter_gather_clean;
@@ -150,7 +151,7 @@ static inline int setup_sgio_list(struct cpt_vf *cptvf,
 
        /* Setup scatter (output) components */
        s_sz_bytes = ((req->outcnt + 3) / 4) * sizeof(struct sglist_component);
-       info->scatter_components = kzalloc(s_sz_bytes, GFP_KERNEL);
+       info->scatter_components = kzalloc(s_sz_bytes, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (!info->scatter_components) {
                ret = -ENOMEM;
                goto  scatter_gather_clean;
@@ -167,17 +168,16 @@ static inline int setup_sgio_list(struct cpt_vf *cptvf,
 
        /* Create and initialize DPTR */
        info->dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
-       info->in_buffer = kzalloc(info->dlen, GFP_KERNEL);
+       info->in_buffer = kzalloc(info->dlen, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (!info->in_buffer) {
                ret = -ENOMEM;
                goto  scatter_gather_clean;
        }
 
-       ((u16 *)info->in_buffer)[0] = req->outcnt;
-       ((u16 *)info->in_buffer)[1] = req->incnt;
-       ((u16 *)info->in_buffer)[2] = 0;
-       ((u16 *)info->in_buffer)[3] = 0;
-       *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer);
+       ((__be16 *)info->in_buffer)[0] = cpu_to_be16(req->outcnt);
+       ((__be16 *)info->in_buffer)[1] = cpu_to_be16(req->incnt);
+       ((__be16 *)info->in_buffer)[2] = 0;
+       ((__be16 *)info->in_buffer)[3] = 0;
 
        memcpy(&info->in_buffer[8], info->gather_components,
               g_sz_bytes);
@@ -195,7 +195,7 @@ static inline int setup_sgio_list(struct cpt_vf *cptvf,
        }
 
        /* Create and initialize RPTR */
-       info->out_buffer = kzalloc(COMPLETION_CODE_SIZE, GFP_KERNEL);
+       info->out_buffer = kzalloc(COMPLETION_CODE_SIZE, req->may_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (!info->out_buffer) {
                ret = -ENOMEM;
                goto scatter_gather_clean;
@@ -421,7 +421,7 @@ int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req)
        struct cpt_vq_command vq_cmd;
        union cpt_inst_s cptinst;
 
-       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       info = kzalloc(sizeof(*info), req->may_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (unlikely(!info)) {
                dev_err(&pdev->dev, "Unable to allocate memory for info_buffer\n");
                return -ENOMEM;
@@ -443,7 +443,7 @@ int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req)
         * Get buffer for union cpt_res_s response
         * structure and its physical address
         */
-       info->completion_addr = kzalloc(sizeof(union cpt_res_s), GFP_KERNEL);
+       info->completion_addr = kzalloc(sizeof(union cpt_res_s), req->may_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (unlikely(!info->completion_addr)) {
                dev_err(&pdev->dev, "Unable to allocate memory for completion_addr\n");
                ret = -ENOMEM;
@@ -470,8 +470,6 @@ int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req)
        vq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
        vq_cmd.cmd.s.dlen   = cpu_to_be16(cpt_req->dlen);
 
-       /* 64-bit swap for microcode data reads, not needed for addresses*/
-       vq_cmd.cmd.u64 = cpu_to_be64(vq_cmd.cmd.u64);
        vq_cmd.dptr = info->dptr_baddr;
        vq_cmd.rptr = info->rptr_baddr;
        vq_cmd.cptr.u64 = 0;
index 3514b08..8d40e4b 100644 (file)
@@ -62,6 +62,8 @@ struct cpt_request_info {
        union ctrl_info ctrl; /* User control information */
        struct cptvf_request req; /* Request Information (Core specific) */
 
+       bool may_sleep;
+
        struct buf_ptr in[MAX_BUF_CNT];
        struct buf_ptr out[MAX_BUF_CNT];
 
@@ -73,16 +75,16 @@ struct sglist_component {
        union {
                u64 len;
                struct {
-                       u16 len0;
-                       u16 len1;
-                       u16 len2;
-                       u16 len3;
+                       __be16 len0;
+                       __be16 len1;
+                       __be16 len2;
+                       __be16 len3;
                } s;
        } u;
-       u64 ptr0;
-       u64 ptr1;
-       u64 ptr2;
-       u64 ptr3;
+       __be64 ptr0;
+       __be64 ptr1;
+       __be64 ptr2;
+       __be64 ptr3;
 };
 
 struct cpt_info_buffer {
@@ -112,10 +114,10 @@ struct cpt_info_buffer {
 union vq_cmd_word0 {
        u64 u64;
        struct {
-               u16 opcode;
-               u16 param1;
-               u16 param2;
-               u16 dlen;
+               __be16 opcode;
+               __be16 param1;
+               __be16 param2;
+               __be16 dlen;
        } s;
 };
 
index dce5423..1be2571 100644 (file)
@@ -522,7 +522,7 @@ static struct aead_alg nitrox_aeads[] = { {
                .cra_name = "gcm(aes)",
                .cra_driver_name = "n5_aes_gcm",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = 1,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -541,7 +541,7 @@ static struct aead_alg nitrox_aeads[] = { {
                .cra_name = "rfc4106(gcm(aes))",
                .cra_driver_name = "n5_rfc4106",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = 1,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
index 18088b0..a553ac6 100644 (file)
@@ -388,7 +388,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "cbc(aes)",
                .cra_driver_name = "n5_cbc(aes)",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -407,7 +407,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "ecb(aes)",
                .cra_driver_name = "n5_ecb(aes)",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -426,7 +426,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "cfb(aes)",
                .cra_driver_name = "n5_cfb(aes)",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -445,7 +445,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "xts(aes)",
                .cra_driver_name = "n5_xts(aes)",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -464,7 +464,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "rfc3686(ctr(aes))",
                .cra_driver_name = "n5_rfc3686(ctr(aes))",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = 1,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -483,7 +483,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "cts(cbc(aes))",
                .cra_driver_name = "n5_cts(cbc(aes))",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -502,7 +502,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "cbc(des3_ede)",
                .cra_driver_name = "n5_cbc(des3_ede)",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
@@ -521,7 +521,7 @@ static struct skcipher_alg nitrox_skciphers[] = { {
                .cra_name = "ecb(des3_ede)",
                .cra_driver_name = "n5_ecb(des3_ede)",
                .cra_priority = PRIO,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
                .cra_alignmask = 0,
index 5eba7ee..11a305f 100644 (file)
@@ -378,6 +378,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
        snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
        snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp");
        base->cra_flags = CRYPTO_ALG_ASYNC |
+                         CRYPTO_ALG_ALLOCATES_MEMORY |
                          CRYPTO_ALG_KERN_DRIVER_ONLY |
                          CRYPTO_ALG_NEED_FALLBACK;
        base->cra_blocksize = AES_BLOCK_SIZE;
index 9e8f07c..1c1c939 100644 (file)
@@ -172,6 +172,7 @@ static struct aead_alg ccp_aes_gcm_defaults = {
        .maxauthsize = AES_BLOCK_SIZE,
        .base = {
                .cra_flags      = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize  = AES_BLOCK_SIZE,
index 04b2517..6849261 100644 (file)
@@ -98,7 +98,7 @@ static int ccp_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
        ctx->u.aes.key_len = key_len / 2;
        sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
 
-       return crypto_sync_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
+       return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
 }
 
 static int ccp_aes_xts_crypt(struct skcipher_request *req,
@@ -145,20 +145,19 @@ static int ccp_aes_xts_crypt(struct skcipher_request *req,
            (ctx->u.aes.key_len != AES_KEYSIZE_256))
                fallback = 1;
        if (fallback) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq,
-                                              ctx->u.aes.tfm_skcipher);
-
                /* Use the fallback to process the request for any
                 * unsupported unit sizes or key sizes
                 */
-               skcipher_request_set_sync_tfm(subreq, ctx->u.aes.tfm_skcipher);
-               skcipher_request_set_callback(subreq, req->base.flags,
-                                             NULL, NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
-               ret = encrypt ? crypto_skcipher_encrypt(subreq) :
-                               crypto_skcipher_decrypt(subreq);
-               skcipher_request_zero(subreq);
+               skcipher_request_set_tfm(&rctx->fallback_req,
+                                        ctx->u.aes.tfm_skcipher);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
+               ret = encrypt ? crypto_skcipher_encrypt(&rctx->fallback_req) :
+                               crypto_skcipher_decrypt(&rctx->fallback_req);
                return ret;
        }
 
@@ -198,13 +197,12 @@ static int ccp_aes_xts_decrypt(struct skcipher_request *req)
 static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm)
 {
        struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
-       struct crypto_sync_skcipher *fallback_tfm;
+       struct crypto_skcipher *fallback_tfm;
 
        ctx->complete = ccp_aes_xts_complete;
        ctx->u.aes.key_len = 0;
 
-       fallback_tfm = crypto_alloc_sync_skcipher("xts(aes)", 0,
-                                            CRYPTO_ALG_ASYNC |
+       fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0,
                                             CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(fallback_tfm)) {
                pr_warn("could not load fallback driver xts(aes)\n");
@@ -212,7 +210,8 @@ static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm)
        }
        ctx->u.aes.tfm_skcipher = fallback_tfm;
 
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx) +
+                                        crypto_skcipher_reqsize(fallback_tfm));
 
        return 0;
 }
@@ -221,7 +220,7 @@ static void ccp_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
 {
        struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       crypto_free_sync_skcipher(ctx->u.aes.tfm_skcipher);
+       crypto_free_skcipher(ctx->u.aes.tfm_skcipher);
 }
 
 static int ccp_register_aes_xts_alg(struct list_head *head,
@@ -243,6 +242,7 @@ static int ccp_register_aes_xts_alg(struct list_head *head,
        snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                 def->drv_name);
        alg->base.cra_flags     = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_NEED_FALLBACK;
        alg->base.cra_blocksize = AES_BLOCK_SIZE;
index 51e12fb..e6dcd8c 100644 (file)
@@ -212,6 +212,7 @@ static const struct skcipher_alg ccp_aes_defaults = {
        .init                   = ccp_aes_init_tfm,
 
        .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_NEED_FALLBACK,
        .base.cra_blocksize     = AES_BLOCK_SIZE,
@@ -229,6 +230,7 @@ static const struct skcipher_alg ccp_aes_rfc3686_defaults = {
        .init                   = ccp_aes_rfc3686_init_tfm,
 
        .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_NEED_FALLBACK,
        .base.cra_blocksize     = CTR_RFC3686_BLOCK_SIZE,
index 9c129de..ec97daf 100644 (file)
@@ -136,6 +136,7 @@ static const struct skcipher_alg ccp_des3_defaults = {
        .init                   = ccp_des3_init_tfm,
 
        .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_NEED_FALLBACK,
        .base.cra_blocksize     = DES3_EDE_BLOCK_SIZE,
index b0cc2bd..8fbfdb9 100644 (file)
@@ -19,6 +19,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/scatterwalk.h>
+#include <linux/string.h>
 
 #include "ccp-crypto.h"
 
@@ -424,7 +425,7 @@ static int ccp_register_hmac_alg(struct list_head *head,
        *ccp_alg = *base_alg;
        INIT_LIST_HEAD(&ccp_alg->entry);
 
-       strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME);
+       strscpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME);
 
        alg = &ccp_alg->alg;
        alg->setkey = ccp_sha_setkey;
@@ -486,6 +487,7 @@ static int ccp_register_sha_alg(struct list_head *head,
        snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                 def->drv_name);
        base->cra_flags = CRYPTO_ALG_ASYNC |
+                         CRYPTO_ALG_ALLOCATES_MEMORY |
                          CRYPTO_ALG_KERN_DRIVER_ONLY |
                          CRYPTO_ALG_NEED_FALLBACK;
        base->cra_blocksize = def->block_size;
index 90a009e..aed3d21 100644 (file)
@@ -89,7 +89,7 @@ static inline struct ccp_crypto_ahash_alg *
 /***** AES related defines *****/
 struct ccp_aes_ctx {
        /* Fallback cipher for XTS with unsupported unit sizes */
-       struct crypto_sync_skcipher *tfm_skcipher;
+       struct crypto_skcipher *tfm_skcipher;
 
        enum ccp_engine engine;
        enum ccp_aes_type type;
@@ -121,6 +121,8 @@ struct ccp_aes_req_ctx {
        u8 rfc3686_iv[AES_BLOCK_SIZE];
 
        struct ccp_cmd cmd;
+
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 struct ccp_aes_cmac_req_ctx {
index 82ac4c1..7838f63 100644 (file)
@@ -221,8 +221,8 @@ static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
 static int ccp5_do_cmd(struct ccp5_desc *desc,
                       struct ccp_cmd_queue *cmd_q)
 {
-       u32 *mP;
-       __le32 *dP;
+       __le32 *mP;
+       u32 *dP;
        u32 tail;
        int     i;
        int ret = 0;
@@ -235,8 +235,8 @@ static int ccp5_do_cmd(struct ccp5_desc *desc,
        }
        mutex_lock(&cmd_q->q_mutex);
 
-       mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
-       dP = (__le32 *) desc;
+       mP = (__le32 *)&cmd_q->qbase[cmd_q->qidx];
+       dP = (u32 *)desc;
        for (i = 0; i < 8; i++)
                mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
 
index 19ac509..0971ee6 100644 (file)
@@ -531,7 +531,6 @@ int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
        return len;
 }
 
-#ifdef CONFIG_PM
 bool ccp_queues_suspended(struct ccp_device *ccp)
 {
        unsigned int suspended = 0;
@@ -549,7 +548,7 @@ bool ccp_queues_suspended(struct ccp_device *ccp)
        return ccp->cmd_q_count == suspended;
 }
 
-int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+int ccp_dev_suspend(struct sp_device *sp)
 {
        struct ccp_device *ccp = sp->ccp_data;
        unsigned long flags;
@@ -601,7 +600,6 @@ int ccp_dev_resume(struct sp_device *sp)
 
        return 0;
 }
-#endif
 
 int ccp_dev_init(struct sp_device *sp)
 {
index 3f68262..a5d9123 100644 (file)
@@ -469,6 +469,7 @@ struct ccp_sg_workarea {
        unsigned int sg_used;
 
        struct scatterlist *dma_sg;
+       struct scatterlist *dma_sg_head;
        struct device *dma_dev;
        unsigned int dma_count;
        enum dma_data_direction dma_dir;
@@ -596,8 +597,8 @@ struct dword3 {
 };
 
 union dword4 {
-       __le32 dst_lo;          /* NON-SHA      */
-       __le32 sha_len_lo;      /* SHA          */
+       u32 dst_lo;             /* NON-SHA      */
+       u32 sha_len_lo;         /* SHA          */
 };
 
 union dword5 {
@@ -607,7 +608,7 @@ union dword5 {
                unsigned int  rsvd1:13;
                unsigned int  fixed:1;
        } fields;
-       __le32 sha_len_hi;
+       u32 sha_len_hi;
 };
 
 struct dword7 {
@@ -618,12 +619,12 @@ struct dword7 {
 
 struct ccp5_desc {
        struct dword0 dw0;
-       __le32 length;
-       __le32 src_lo;
+       u32 length;
+       u32 src_lo;
        struct dword3 dw3;
        union dword4 dw4;
        union dword5 dw5;
-       __le32 key_lo;
+       u32 key_lo;
        struct dword7 dw7;
 };
 
index 4221936..bd270e6 100644 (file)
@@ -63,7 +63,7 @@ static u32 ccp_gen_jobid(struct ccp_device *ccp)
 static void ccp_sg_free(struct ccp_sg_workarea *wa)
 {
        if (wa->dma_count)
-               dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
+               dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
 
        wa->dma_count = 0;
 }
@@ -92,6 +92,7 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
                return 0;
 
        wa->dma_sg = sg;
+       wa->dma_sg_head = sg;
        wa->dma_dev = dev;
        wa->dma_dir = dma_dir;
        wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
@@ -104,14 +105,28 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
 {
        unsigned int nbytes = min_t(u64, len, wa->bytes_left);
+       unsigned int sg_combined_len = 0;
 
        if (!wa->sg)
                return;
 
        wa->sg_used += nbytes;
        wa->bytes_left -= nbytes;
-       if (wa->sg_used == wa->sg->length) {
-               wa->sg = sg_next(wa->sg);
+       if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
+               /* Advance to the next DMA scatterlist entry */
+               wa->dma_sg = sg_next(wa->dma_sg);
+
+               /* In the case that the DMA mapped scatterlist has entries
+                * that have been merged, the non-DMA mapped scatterlist
+                * must be advanced multiple times for each merged entry.
+                * This ensures that the current non-DMA mapped entry
+                * corresponds to the current DMA mapped entry.
+                */
+               do {
+                       sg_combined_len += wa->sg->length;
+                       wa->sg = sg_next(wa->sg);
+               } while (wa->sg_used > sg_combined_len);
+
                wa->sg_used = 0;
        }
 }
@@ -299,7 +314,7 @@ static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
        /* Update the structures and generate the count */
        buf_count = 0;
        while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
-               nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
+               nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
                             dm_wa->length - buf_count);
                nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
 
@@ -331,11 +346,11 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
         * and destination. The resulting len values will always be <= UINT_MAX
         * because the dma length is an unsigned int.
         */
-       sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
+       sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
        sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
 
        if (dst) {
-               sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
+               sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
                sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
                op_len = min(sg_src_len, sg_dst_len);
        } else {
@@ -365,7 +380,7 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
                /* Enough data in the sg element, but we need to
                 * adjust for any previously copied data
                 */
-               op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
+               op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
                op->src.u.dma.offset = src->sg_wa.sg_used;
                op->src.u.dma.length = op_len & ~(block_size - 1);
 
@@ -386,7 +401,7 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
                        /* Enough room in the sg element, but we need to
                         * adjust for any previously used area
                         */
-                       op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
+                       op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
                        op->dst.u.dma.offset = dst->sg_wa.sg_used;
                        op->dst.u.dma.length = op->src.u.dma.length;
                }
@@ -617,13 +632,12 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        struct ccp_data src, dst;
        struct ccp_data aad;
        struct ccp_op op;
-
-       unsigned long long *final;
        unsigned int dm_offset;
        unsigned int authsize;
        unsigned int jobid;
        unsigned int ilen;
        bool in_place = true; /* Default value */
+       __be64 *final;
        int ret;
 
        struct scatterlist *p_inp, sg_inp[2];
@@ -825,7 +839,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_dst;
-       final = (unsigned long long *) final_wa.address;
+       final = (__be64 *)final_wa.address;
        final[0] = cpu_to_be64(aes->aad_len * 8);
        final[1] = cpu_to_be64(ilen * 8);
 
@@ -1308,7 +1322,6 @@ ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                        return -EINVAL;
        }
 
-       ret = -EIO;
        /* Zero out all the fields of the command desc */
        memset(&op, 0, sizeof(op));
 
@@ -2028,7 +2041,7 @@ ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        dst.sg_wa.sg_used = 0;
        for (i = 1; i <= src.sg_wa.dma_count; i++) {
                if (!dst.sg_wa.sg ||
-                   (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
+                   (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
                        ret = -EINVAL;
                        goto e_dst;
                }
@@ -2054,8 +2067,8 @@ ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                        goto e_dst;
                }
 
-               dst.sg_wa.sg_used += src.sg_wa.sg->length;
-               if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
+               dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
+               if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
                        dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
                        dst.sg_wa.sg_used = 0;
                }
index ce42675..6284a15 100644 (file)
@@ -211,13 +211,12 @@ void sp_destroy(struct sp_device *sp)
        sp_del_device(sp);
 }
 
-#ifdef CONFIG_PM
-int sp_suspend(struct sp_device *sp, pm_message_t state)
+int sp_suspend(struct sp_device *sp)
 {
        int ret;
 
        if (sp->dev_vdata->ccp_vdata) {
-               ret = ccp_dev_suspend(sp, state);
+               ret = ccp_dev_suspend(sp);
                if (ret)
                        return ret;
        }
@@ -237,7 +236,6 @@ int sp_resume(struct sp_device *sp)
 
        return 0;
 }
-#endif
 
 struct sp_device *sp_get_psp_master_device(void)
 {
index f913f14..0218d06 100644 (file)
@@ -119,7 +119,7 @@ int sp_init(struct sp_device *sp);
 void sp_destroy(struct sp_device *sp);
 struct sp_device *sp_get_master(void);
 
-int sp_suspend(struct sp_device *sp, pm_message_t state);
+int sp_suspend(struct sp_device *sp);
 int sp_resume(struct sp_device *sp);
 int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
                       const char *name, void *data);
@@ -134,7 +134,7 @@ struct sp_device *sp_get_psp_master_device(void);
 int ccp_dev_init(struct sp_device *sp);
 void ccp_dev_destroy(struct sp_device *sp);
 
-int ccp_dev_suspend(struct sp_device *sp, pm_message_t state);
+int ccp_dev_suspend(struct sp_device *sp);
 int ccp_dev_resume(struct sp_device *sp);
 
 #else  /* !CONFIG_CRYPTO_DEV_SP_CCP */
@@ -145,7 +145,7 @@ static inline int ccp_dev_init(struct sp_device *sp)
 }
 static inline void ccp_dev_destroy(struct sp_device *sp) { }
 
-static inline int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+static inline int ccp_dev_suspend(struct sp_device *sp)
 {
        return 0;
 }
index cb6cb47..f471dba 100644 (file)
@@ -252,23 +252,19 @@ static void sp_pci_remove(struct pci_dev *pdev)
        sp_free_irqs(sp);
 }
 
-#ifdef CONFIG_PM
-static int sp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused sp_pci_suspend(struct device *dev)
 {
-       struct device *dev = &pdev->dev;
        struct sp_device *sp = dev_get_drvdata(dev);
 
-       return sp_suspend(sp, state);
+       return sp_suspend(sp);
 }
 
-static int sp_pci_resume(struct pci_dev *pdev)
+static int __maybe_unused sp_pci_resume(struct device *dev)
 {
-       struct device *dev = &pdev->dev;
        struct sp_device *sp = dev_get_drvdata(dev);
 
        return sp_resume(sp);
 }
-#endif
 
 #ifdef CONFIG_CRYPTO_DEV_SP_PSP
 static const struct sev_vdata sevv1 = {
@@ -365,15 +361,14 @@ static const struct pci_device_id sp_pci_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, sp_pci_table);
 
+static SIMPLE_DEV_PM_OPS(sp_pci_pm_ops, sp_pci_suspend, sp_pci_resume);
+
 static struct pci_driver sp_pci_driver = {
        .name = "ccp",
        .id_table = sp_pci_table,
        .probe = sp_pci_probe,
        .remove = sp_pci_remove,
-#ifdef CONFIG_PM
-       .suspend = sp_pci_suspend,
-       .resume = sp_pci_resume,
-#endif
+       .driver.pm = &sp_pci_pm_ops,
 };
 
 int sp_pci_init(void)
index 831aac1..9dba52f 100644 (file)
@@ -207,7 +207,7 @@ static int sp_platform_suspend(struct platform_device *pdev,
        struct device *dev = &pdev->dev;
        struct sp_device *sp = dev_get_drvdata(dev);
 
-       return sp_suspend(sp, state);
+       return sp_suspend(sp);
 }
 
 static int sp_platform_resume(struct platform_device *pdev)
index 872ea3f..076669d 100644 (file)
@@ -45,7 +45,6 @@ enum cc_key_type {
 struct cc_cipher_ctx {
        struct cc_drvdata *drvdata;
        int keylen;
-       int key_round_number;
        int cipher_mode;
        int flow_mode;
        unsigned int flags;
@@ -56,6 +55,8 @@ struct cc_cipher_ctx {
                struct cc_cpp_key_info cpp;
        };
        struct crypto_shash *shash_tfm;
+       struct crypto_skcipher *fallback_tfm;
+       bool fallback_on;
 };
 
 static void cc_cipher_complete(struct device *dev, void *cc_req, int err);
@@ -75,7 +76,6 @@ static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size)
                case CC_AES_128_BIT_KEY_SIZE:
                case CC_AES_192_BIT_KEY_SIZE:
                        if (ctx_p->cipher_mode != DRV_CIPHER_XTS &&
-                           ctx_p->cipher_mode != DRV_CIPHER_ESSIV &&
                            ctx_p->cipher_mode != DRV_CIPHER_BITLOCKER)
                                return 0;
                        break;
@@ -159,22 +159,49 @@ static int cc_cipher_init(struct crypto_tfm *tfm)
                                     skcipher_alg.base);
        struct device *dev = drvdata_to_dev(cc_alg->drvdata);
        unsigned int max_key_buf_size = cc_alg->skcipher_alg.max_keysize;
-       int rc = 0;
+       unsigned int fallback_req_size = 0;
 
        dev_dbg(dev, "Initializing context @%p for %s\n", ctx_p,
                crypto_tfm_alg_name(tfm));
 
-       crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
-                                   sizeof(struct cipher_req_ctx));
-
        ctx_p->cipher_mode = cc_alg->cipher_mode;
        ctx_p->flow_mode = cc_alg->flow_mode;
        ctx_p->drvdata = cc_alg->drvdata;
 
+       if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
+               const char *name = crypto_tfm_alg_name(tfm);
+
+               /* Alloc hash tfm for essiv */
+               ctx_p->shash_tfm = crypto_alloc_shash("sha256", 0, 0);
+               if (IS_ERR(ctx_p->shash_tfm)) {
+                       dev_err(dev, "Error allocating hash tfm for ESSIV.\n");
+                       return PTR_ERR(ctx_p->shash_tfm);
+               }
+               max_key_buf_size <<= 1;
+
+               /* Alloc fallabck tfm or essiv when key size != 256 bit */
+               ctx_p->fallback_tfm =
+                       crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
+
+               if (IS_ERR(ctx_p->fallback_tfm)) {
+                       /* Note we're still allowing registration with no fallback since it's
+                        * better to have most modes supported than none at all.
+                        */
+                       dev_warn(dev, "Error allocating fallback algo %s. Some modes may be available.\n",
+                              name);
+                       ctx_p->fallback_tfm = NULL;
+               } else {
+                       fallback_req_size = crypto_skcipher_reqsize(ctx_p->fallback_tfm);
+               }
+       }
+
+       crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+                                   sizeof(struct cipher_req_ctx) + fallback_req_size);
+
        /* Allocate key buffer, cache line aligned */
-       ctx_p->user.key = kmalloc(max_key_buf_size, GFP_KERNEL);
+       ctx_p->user.key = kzalloc(max_key_buf_size, GFP_KERNEL);
        if (!ctx_p->user.key)
-               return -ENOMEM;
+               goto free_fallback;
 
        dev_dbg(dev, "Allocated key buffer in context. key=@%p\n",
                ctx_p->user.key);
@@ -186,21 +213,20 @@ static int cc_cipher_init(struct crypto_tfm *tfm)
        if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) {
                dev_err(dev, "Mapping Key %u B at va=%pK for DMA failed\n",
                        max_key_buf_size, ctx_p->user.key);
-               return -ENOMEM;
+               goto free_key;
        }
        dev_dbg(dev, "Mapped key %u B at va=%pK to dma=%pad\n",
                max_key_buf_size, ctx_p->user.key, &ctx_p->user.key_dma_addr);
 
-       if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
-               /* Alloc hash tfm for essiv */
-               ctx_p->shash_tfm = crypto_alloc_shash("sha256-generic", 0, 0);
-               if (IS_ERR(ctx_p->shash_tfm)) {
-                       dev_err(dev, "Error allocating hash tfm for ESSIV.\n");
-                       return PTR_ERR(ctx_p->shash_tfm);
-               }
-       }
+       return 0;
 
-       return rc;
+free_key:
+       kfree(ctx_p->user.key);
+free_fallback:
+       crypto_free_skcipher(ctx_p->fallback_tfm);
+       crypto_free_shash(ctx_p->shash_tfm);
+
+       return -ENOMEM;
 }
 
 static void cc_cipher_exit(struct crypto_tfm *tfm)
@@ -220,6 +246,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm)
                /* Free hash tfm for essiv */
                crypto_free_shash(ctx_p->shash_tfm);
                ctx_p->shash_tfm = NULL;
+               crypto_free_skcipher(ctx_p->fallback_tfm);
+               ctx_p->fallback_tfm = NULL;
        }
 
        /* Unmap key buffer */
@@ -303,6 +331,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
        }
 
        ctx_p->keylen = keylen;
+       ctx_p->fallback_on = false;
 
        switch (cc_slot_to_key_type(hki.hw_key1)) {
        case CC_HW_PROTECTED_KEY:
@@ -388,10 +417,33 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
        /* STAT_PHASE_0: Init and sanity checks */
 
        if (validate_keys_sizes(ctx_p, keylen)) {
-               dev_dbg(dev, "Unsupported key size %d.\n", keylen);
+               dev_dbg(dev, "Invalid key size %d.\n", keylen);
                return -EINVAL;
        }
 
+       if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
+
+               /* We only support 256 bit ESSIV-CBC-AES keys */
+               if (keylen != AES_KEYSIZE_256)  {
+                       unsigned int flags = crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_MASK;
+
+                       if (likely(ctx_p->fallback_tfm)) {
+                               ctx_p->fallback_on = true;
+                               crypto_skcipher_clear_flags(ctx_p->fallback_tfm,
+                                                           CRYPTO_TFM_REQ_MASK);
+                               crypto_skcipher_clear_flags(ctx_p->fallback_tfm, flags);
+                               return crypto_skcipher_setkey(ctx_p->fallback_tfm, key, keylen);
+                       }
+
+                       dev_dbg(dev, "Unsupported key size %d and no fallback.\n", keylen);
+                       return -EINVAL;
+               }
+
+               /* Internal ESSIV key buffer is double sized */
+               max_key_buf_size <<= 1;
+       }
+
+       ctx_p->fallback_on = false;
        ctx_p->key_type = CC_UNPROTECTED_KEY;
 
        /*
@@ -419,21 +471,20 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
                                max_key_buf_size, DMA_TO_DEVICE);
 
        memcpy(ctx_p->user.key, key, keylen);
-       if (keylen == 24)
-               memset(ctx_p->user.key + 24, 0, CC_AES_KEY_SIZE_MAX - 24);
 
        if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
                /* sha256 for key2 - use sw implementation */
-               int key_len = keylen >> 1;
                int err;
 
                err = crypto_shash_tfm_digest(ctx_p->shash_tfm,
-                                             ctx_p->user.key, key_len,
-                                             ctx_p->user.key + key_len);
+                                             ctx_p->user.key, keylen,
+                                             ctx_p->user.key + keylen);
                if (err) {
                        dev_err(dev, "Failed to hash ESSIV key.\n");
                        return err;
                }
+
+               keylen <<= 1;
        }
        dma_sync_single_for_device(dev, ctx_p->user.key_dma_addr,
                                   max_key_buf_size, DMA_TO_DEVICE);
@@ -571,9 +622,10 @@ static void cc_setup_xex_state_desc(struct crypto_tfm *tfm,
        int flow_mode = ctx_p->flow_mode;
        int direction = req_ctx->gen_ctx.op_type;
        dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr;
-       unsigned int key_len = ctx_p->keylen;
+       unsigned int key_len = (ctx_p->keylen / 2);
        dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
        unsigned int du_size = nbytes;
+       unsigned int key_offset = key_len;
 
        struct cc_crypto_alg *cc_alg =
                container_of(tfm->__crt_alg, struct cc_crypto_alg,
@@ -593,6 +645,10 @@ static void cc_setup_xex_state_desc(struct crypto_tfm *tfm,
        case DRV_CIPHER_XTS:
        case DRV_CIPHER_ESSIV:
        case DRV_CIPHER_BITLOCKER:
+
+               if (cipher_mode == DRV_CIPHER_ESSIV)
+                       key_len = SHA256_DIGEST_SIZE;
+
                /* load XEX key */
                hw_desc_init(&desc[*seq_size]);
                set_cipher_mode(&desc[*seq_size], cipher_mode);
@@ -602,12 +658,12 @@ static void cc_setup_xex_state_desc(struct crypto_tfm *tfm,
                                          ctx_p->hw.key2_slot);
                } else {
                        set_din_type(&desc[*seq_size], DMA_DLLI,
-                                    (key_dma_addr + (key_len / 2)),
-                                    (key_len / 2), NS_BIT);
+                                    (key_dma_addr + key_offset),
+                                    key_len, NS_BIT);
                }
                set_xex_data_unit_size(&desc[*seq_size], du_size);
                set_flow_mode(&desc[*seq_size], S_DIN_to_AES2);
-               set_key_size_aes(&desc[*seq_size], (key_len / 2));
+               set_key_size_aes(&desc[*seq_size], key_len);
                set_setup_mode(&desc[*seq_size], SETUP_LOAD_XEX_KEY);
                (*seq_size)++;
 
@@ -616,7 +672,7 @@ static void cc_setup_xex_state_desc(struct crypto_tfm *tfm,
                set_setup_mode(&desc[*seq_size], SETUP_LOAD_STATE1);
                set_cipher_mode(&desc[*seq_size], cipher_mode);
                set_cipher_config0(&desc[*seq_size], direction);
-               set_key_size_aes(&desc[*seq_size], (key_len / 2));
+               set_key_size_aes(&desc[*seq_size], key_len);
                set_flow_mode(&desc[*seq_size], flow_mode);
                set_din_type(&desc[*seq_size], DMA_DLLI, iv_dma_addr,
                             CC_AES_BLOCK_SIZE, NS_BIT);
@@ -867,6 +923,17 @@ static int cc_cipher_process(struct skcipher_request *req,
                goto exit_process;
        }
 
+       if (ctx_p->fallback_on) {
+               struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+               *subreq = *req;
+               skcipher_request_set_tfm(subreq, ctx_p->fallback_tfm);
+               if (direction == DRV_CRYPTO_DIRECTION_ENCRYPT)
+                       return crypto_skcipher_encrypt(subreq);
+               else
+                       return crypto_skcipher_decrypt(subreq);
+       }
+
        /* The IV we are handed may be allocted from the stack so
         * we must copy it to a DMAable buffer before use.
         */
@@ -1010,7 +1077,7 @@ static const struct cc_alg_template skcipher_algs[] = {
                .sec_func = true,
        },
        {
-               .name = "essiv(paes)",
+               .name = "essiv(cbc(paes),sha256)",
                .driver_name = "essiv-paes-ccree",
                .blocksize = AES_BLOCK_SIZE,
                .template_skcipher = {
@@ -1028,7 +1095,7 @@ static const struct cc_alg_template skcipher_algs[] = {
                .sec_func = true,
        },
        {
-               .name = "essiv512(paes)",
+               .name = "essiv512(cbc(paes),sha256)",
                .driver_name = "essiv-paes-du512-ccree",
                .blocksize = AES_BLOCK_SIZE,
                .template_skcipher = {
@@ -1047,7 +1114,7 @@ static const struct cc_alg_template skcipher_algs[] = {
                .sec_func = true,
        },
        {
-               .name = "essiv4096(paes)",
+               .name = "essiv4096(cbc(paes),sha256)",
                .driver_name = "essiv-paes-du4096-ccree",
                .blocksize = AES_BLOCK_SIZE,
                .template_skcipher = {
@@ -1269,15 +1336,15 @@ static const struct cc_alg_template skcipher_algs[] = {
                .std_body = CC_STD_NIST,
        },
        {
-               .name = "essiv(aes)",
+               .name = "essiv(cbc(aes),sha256)",
                .driver_name = "essiv-aes-ccree",
                .blocksize = AES_BLOCK_SIZE,
                .template_skcipher = {
                        .setkey = cc_cipher_setkey,
                        .encrypt = cc_cipher_encrypt,
                        .decrypt = cc_cipher_decrypt,
-                       .min_keysize = AES_MIN_KEY_SIZE * 2,
-                       .max_keysize = AES_MAX_KEY_SIZE * 2,
+                       .min_keysize = AES_MIN_KEY_SIZE,
+                       .max_keysize = AES_MAX_KEY_SIZE,
                        .ivsize = AES_BLOCK_SIZE,
                        },
                .cipher_mode = DRV_CIPHER_ESSIV,
@@ -1286,15 +1353,15 @@ static const struct cc_alg_template skcipher_algs[] = {
                .std_body = CC_STD_NIST,
        },
        {
-               .name = "essiv512(aes)",
+               .name = "essiv512(cbc(aes),sha256)",
                .driver_name = "essiv-aes-du512-ccree",
                .blocksize = AES_BLOCK_SIZE,
                .template_skcipher = {
                        .setkey = cc_cipher_setkey,
                        .encrypt = cc_cipher_encrypt,
                        .decrypt = cc_cipher_decrypt,
-                       .min_keysize = AES_MIN_KEY_SIZE * 2,
-                       .max_keysize = AES_MAX_KEY_SIZE * 2,
+                       .min_keysize = AES_MIN_KEY_SIZE,
+                       .max_keysize = AES_MAX_KEY_SIZE,
                        .ivsize = AES_BLOCK_SIZE,
                        },
                .cipher_mode = DRV_CIPHER_ESSIV,
@@ -1304,15 +1371,15 @@ static const struct cc_alg_template skcipher_algs[] = {
                .std_body = CC_STD_NIST,
        },
        {
-               .name = "essiv4096(aes)",
+               .name = "essiv4096(cbc(aes),sha256)",
                .driver_name = "essiv-aes-du4096-ccree",
                .blocksize = AES_BLOCK_SIZE,
                .template_skcipher = {
                        .setkey = cc_cipher_setkey,
                        .encrypt = cc_cipher_encrypt,
                        .decrypt = cc_cipher_decrypt,
-                       .min_keysize = AES_MIN_KEY_SIZE * 2,
-                       .max_keysize = AES_MAX_KEY_SIZE * 2,
+                       .min_keysize = AES_MIN_KEY_SIZE,
+                       .max_keysize = AES_MAX_KEY_SIZE,
                        .ivsize = AES_BLOCK_SIZE,
                        },
                .cipher_mode = DRV_CIPHER_ESSIV,
index 4c25536..13b908e 100644 (file)
@@ -690,26 +690,22 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
        return min(srclen, dstlen);
 }
 
-static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher,
-                               u32 flags,
-                               struct scatterlist *src,
-                               struct scatterlist *dst,
-                               unsigned int nbytes,
+static int chcr_cipher_fallback(struct crypto_skcipher *cipher,
+                               struct skcipher_request *req,
                                u8 *iv,
                                unsigned short op_type)
 {
+       struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
        int err;
 
-       SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, cipher);
-
-       skcipher_request_set_sync_tfm(subreq, cipher);
-       skcipher_request_set_callback(subreq, flags, NULL, NULL);
-       skcipher_request_set_crypt(subreq, src, dst,
-                                  nbytes, iv);
+       skcipher_request_set_tfm(&reqctx->fallback_req, cipher);
+       skcipher_request_set_callback(&reqctx->fallback_req, req->base.flags,
+                                     req->base.complete, req->base.data);
+       skcipher_request_set_crypt(&reqctx->fallback_req, req->src, req->dst,
+                                  req->cryptlen, iv);
 
-       err = op_type ? crypto_skcipher_decrypt(subreq) :
-               crypto_skcipher_encrypt(subreq);
-       skcipher_request_zero(subreq);
+       err = op_type ? crypto_skcipher_decrypt(&reqctx->fallback_req) :
+                       crypto_skcipher_encrypt(&reqctx->fallback_req);
 
        return err;
 
@@ -924,11 +920,11 @@ static int chcr_cipher_fallback_setkey(struct crypto_skcipher *cipher,
 {
        struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(cipher));
 
-       crypto_sync_skcipher_clear_flags(ablkctx->sw_cipher,
+       crypto_skcipher_clear_flags(ablkctx->sw_cipher,
                                CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(ablkctx->sw_cipher,
+       crypto_skcipher_set_flags(ablkctx->sw_cipher,
                                cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK);
-       return crypto_sync_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
+       return crypto_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
 }
 
 static int chcr_aes_cbc_setkey(struct crypto_skcipher *cipher,
@@ -1206,13 +1202,8 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
                                      req);
                memcpy(req->iv, reqctx->init_iv, IV);
                atomic_inc(&adap->chcr_stats.fallback);
-               err = chcr_cipher_fallback(ablkctx->sw_cipher,
-                                    req->base.flags,
-                                    req->src,
-                                    req->dst,
-                                    req->cryptlen,
-                                    req->iv,
-                                    reqctx->op);
+               err = chcr_cipher_fallback(ablkctx->sw_cipher, req, req->iv,
+                                          reqctx->op);
                goto complete;
        }
 
@@ -1224,7 +1215,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
        wrparam.bytes = bytes;
        skb = create_cipher_wr(&wrparam);
        if (IS_ERR(skb)) {
-               pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+               pr_err("%s : Failed to form WR. No memory\n", __func__);
                err = PTR_ERR(skb);
                goto unmap;
        }
@@ -1341,11 +1332,7 @@ static int process_cipher(struct skcipher_request *req,
                chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
                                      req);
 fallback:       atomic_inc(&adap->chcr_stats.fallback);
-               err = chcr_cipher_fallback(ablkctx->sw_cipher,
-                                          req->base.flags,
-                                          req->src,
-                                          req->dst,
-                                          req->cryptlen,
+               err = chcr_cipher_fallback(ablkctx->sw_cipher, req,
                                           subtype ==
                                           CRYPTO_ALG_SUB_TYPE_CTR_RFC3686 ?
                                           reqctx->iv : req->iv,
@@ -1486,14 +1473,15 @@ static int chcr_init_tfm(struct crypto_skcipher *tfm)
        struct chcr_context *ctx = crypto_skcipher_ctx(tfm);
        struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
-       ablkctx->sw_cipher = crypto_alloc_sync_skcipher(alg->base.cra_name, 0,
+       ablkctx->sw_cipher = crypto_alloc_skcipher(alg->base.cra_name, 0,
                                CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(ablkctx->sw_cipher)) {
                pr_err("failed to allocate fallback for %s\n", alg->base.cra_name);
                return PTR_ERR(ablkctx->sw_cipher);
        }
        init_completion(&ctx->cbc_aes_aio_done);
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx) +
+                                        crypto_skcipher_reqsize(ablkctx->sw_cipher));
 
        return chcr_device_init(ctx);
 }
@@ -1507,13 +1495,14 @@ static int chcr_rfc3686_init(struct crypto_skcipher *tfm)
        /*RFC3686 initialises IV counter value to 1, rfc3686(ctr(aes))
         * cannot be used as fallback in chcr_handle_cipher_response
         */
-       ablkctx->sw_cipher = crypto_alloc_sync_skcipher("ctr(aes)", 0,
+       ablkctx->sw_cipher = crypto_alloc_skcipher("ctr(aes)", 0,
                                CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(ablkctx->sw_cipher)) {
                pr_err("failed to allocate fallback for %s\n", alg->base.cra_name);
                return PTR_ERR(ablkctx->sw_cipher);
        }
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx) +
+                                   crypto_skcipher_reqsize(ablkctx->sw_cipher));
        return chcr_device_init(ctx);
 }
 
@@ -1523,7 +1512,7 @@ static void chcr_exit_tfm(struct crypto_skcipher *tfm)
        struct chcr_context *ctx = crypto_skcipher_ctx(tfm);
        struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
-       crypto_free_sync_skcipher(ablkctx->sw_cipher);
+       crypto_free_skcipher(ablkctx->sw_cipher);
 }
 
 static int get_alg_config(struct algo_param *params,
@@ -1556,7 +1545,7 @@ static int get_alg_config(struct algo_param *params,
                params->result_size = SHA512_DIGEST_SIZE;
                break;
        default:
-               pr_err("chcr : ERROR, unsupported digest size\n");
+               pr_err("ERROR, unsupported digest size\n");
                return -EINVAL;
        }
        return 0;
@@ -3571,7 +3560,7 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
                goto out;
 
        if (get_alg_config(&param, max_authsize)) {
-               pr_err("chcr : Unsupported digest size\n");
+               pr_err("Unsupported digest size\n");
                goto out;
        }
        subtype = get_aead_subtype(authenc);
@@ -3590,7 +3579,7 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
        } else if (keys.enckeylen == AES_KEYSIZE_256) {
                ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
        } else {
-               pr_err("chcr : Unsupported cipher key\n");
+               pr_err("Unsupported cipher key\n");
                goto out;
        }
 
@@ -3608,10 +3597,8 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
        }
        base_hash  = chcr_alloc_shash(max_authsize);
        if (IS_ERR(base_hash)) {
-               pr_err("chcr : Base driver cannot be loaded\n");
-               aeadctx->enckey_len = 0;
-               memzero_explicit(&keys, sizeof(keys));
-               return -EINVAL;
+               pr_err("Base driver cannot be loaded\n");
+               goto out;
        }
        {
                SHASH_DESC_ON_STACK(shash, base_hash);
@@ -3626,7 +3613,7 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
                                                  keys.authkeylen,
                                                  o_ptr);
                        if (err) {
-                               pr_err("chcr : Base driver cannot be loaded\n");
+                               pr_err("Base driver cannot be loaded\n");
                                goto out;
                        }
                        keys.authkeylen = max_authsize;
@@ -3711,7 +3698,7 @@ static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
        } else if (keys.enckeylen == AES_KEYSIZE_256) {
                ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
        } else {
-               pr_err("chcr : Unsupported cipher key %d\n", keys.enckeylen);
+               pr_err("Unsupported cipher key %d\n", keys.enckeylen);
                goto out;
        }
        memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
@@ -3747,7 +3734,7 @@ static int chcr_aead_op(struct aead_request *req,
 
        cdev = a_ctx(tfm)->dev;
        if (!cdev) {
-               pr_err("chcr : %s : No crypto device.\n", __func__);
+               pr_err("%s : No crypto device.\n", __func__);
                return -ENXIO;
        }
 
@@ -4445,6 +4432,7 @@ static int chcr_register_alg(void)
                        driver_algs[i].alg.skcipher.base.cra_module = THIS_MODULE;
                        driver_algs[i].alg.skcipher.base.cra_flags =
                                CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC |
+                               CRYPTO_ALG_ALLOCATES_MEMORY |
                                CRYPTO_ALG_NEED_FALLBACK;
                        driver_algs[i].alg.skcipher.base.cra_ctxsize =
                                sizeof(struct chcr_context) +
@@ -4456,7 +4444,8 @@ static int chcr_register_alg(void)
                        break;
                case CRYPTO_ALG_TYPE_AEAD:
                        driver_algs[i].alg.aead.base.cra_flags =
-                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK |
+                               CRYPTO_ALG_ALLOCATES_MEMORY;
                        driver_algs[i].alg.aead.encrypt = chcr_aead_encrypt;
                        driver_algs[i].alg.aead.decrypt = chcr_aead_decrypt;
                        driver_algs[i].alg.aead.init = chcr_aead_cra_init;
@@ -4476,7 +4465,8 @@ static int chcr_register_alg(void)
                        a_hash->halg.statesize = SZ_AHASH_REQ_CTX;
                        a_hash->halg.base.cra_priority = CHCR_CRA_PRIORITY;
                        a_hash->halg.base.cra_module = THIS_MODULE;
-                       a_hash->halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+                       a_hash->halg.base.cra_flags =
+                               CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY;
                        a_hash->halg.base.cra_alignmask = 0;
                        a_hash->halg.base.cra_exit = NULL;
 
@@ -4497,8 +4487,7 @@ static int chcr_register_alg(void)
                        break;
                }
                if (err) {
-                       pr_err("chcr : %s : Algorithm registration failed\n",
-                              name);
+                       pr_err("%s : Algorithm registration failed\n", name);
                        goto register_err;
                } else {
                        driver_algs[i].is_registered = 1;
index 31e427e..e89f9e0 100644 (file)
@@ -171,7 +171,7 @@ static inline struct chcr_context *h_ctx(struct crypto_ahash *tfm)
 }
 
 struct ablk_ctx {
-       struct crypto_sync_skcipher *sw_cipher;
+       struct crypto_skcipher *sw_cipher;
        __be32 key_ctx_hdr;
        unsigned int enckey_len;
        unsigned char ciph_mode;
@@ -305,6 +305,7 @@ struct chcr_skcipher_req_ctx {
        u8 init_iv[CHCR_MAX_CRYPTO_IV_LEN];
        u16 txqidx;
        u16 rxqidx;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 struct chcr_alg_template {
index a3ee127..b135c74 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/topology.h>
 #include "hpre.h"
 
-#define HPRE_VF_NUM                    63
 #define HPRE_QUEUE_NUM_V2              1024
 #define HPRE_QM_ABNML_INT_MASK         0x100004
 #define HPRE_CTRL_CNT_CLR_CE_BIT       BIT(0)
@@ -46,9 +45,9 @@
 #define HPRE_CORE_IS_SCHD_OFFSET       0x90
 
 #define HPRE_RAS_CE_ENB                        0x301410
-#define HPRE_HAC_RAS_CE_ENABLE         0x3f
+#define HPRE_HAC_RAS_CE_ENABLE         0x1
 #define HPRE_RAS_NFE_ENB               0x301414
-#define HPRE_HAC_RAS_NFE_ENABLE                0x3fffc0
+#define HPRE_HAC_RAS_NFE_ENABLE                0x3ffffe
 #define HPRE_RAS_FE_ENB                        0x301418
 #define HPRE_HAC_RAS_FE_ENABLE         0
 
 #define HPRE_CORE_ECC_2BIT_ERR         BIT(1)
 #define HPRE_OOO_ECC_2BIT_ERR          BIT(5)
 
+#define HPRE_QM_BME_FLR                        BIT(7)
+#define HPRE_QM_PM_FLR                 BIT(11)
+#define HPRE_QM_SRIOV_FLR              BIT(12)
+
 #define HPRE_VIA_MSI_DSM               1
 #define HPRE_SQE_MASK_OFFSET           8
 #define HPRE_SQE_MASK_LEN              24
@@ -231,6 +234,22 @@ static int hpre_cfg_by_dsm(struct hisi_qm *qm)
        return 0;
 }
 
+/*
+ * For Hi1620, we shoul disable FLR triggered by hardware (BME/PM/SRIOV).
+ * Or it may stay in D3 state when we bind and unbind hpre quickly,
+ * as it does FLR triggered by hardware.
+ */
+static void disable_flr_of_bme(struct hisi_qm *qm)
+{
+       u32 val;
+
+       val = readl(HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
+       val &= ~(HPRE_QM_BME_FLR | HPRE_QM_SRIOV_FLR);
+       val |= HPRE_QM_PM_FLR;
+       writel(val, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
+       writel(PEH_AXUSER_CFG_ENABLE, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG_ENABLE));
+}
+
 static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
 {
        struct device *dev = &qm->pdev->dev;
@@ -242,10 +261,6 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
        writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_AWUSER_M_CFG_ENABLE));
        writel_relaxed(HPRE_QM_AXI_CFG_MASK, HPRE_ADDR(qm, QM_AXI_M_CFG));
 
-       /* disable FLR triggered by BME(bus master enable) */
-       writel(PEH_AXUSER_CFG, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
-       writel(PEH_AXUSER_CFG_ENABLE, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG_ENABLE));
-
        /* HPRE need more time, we close this interrupt */
        val = readl_relaxed(HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
        val |= BIT(HPRE_TIMEOUT_ABNML_BIT);
@@ -264,7 +279,7 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
        writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG));
        writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG));
        ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, HPRE_RDCHN_INI_ST), val,
-                       val & BIT(0),
+                                        val & BIT(0),
                        HPRE_REG_RD_INTVRL_US,
                        HPRE_REG_RD_TMOUT_US);
        if (ret) {
@@ -296,6 +311,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
        if (ret)
                dev_err(dev, "acpi_evaluate_dsm err.\n");
 
+       disable_flr_of_bme(qm);
+
        return ret;
 }
 
@@ -372,7 +389,6 @@ static int hpre_current_qm_write(struct hpre_debugfs_file *file, u32 val)
        u32 num_vfs = qm->vfs_num;
        u32 vfq_num, tmp;
 
-
        if (val > num_vfs)
                return -EINVAL;
 
@@ -449,7 +465,7 @@ static int hpre_cluster_inqry_write(struct hpre_debugfs_file *file, u32 val)
 }
 
 static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
-                              size_t count, loff_t *pos)
+                                   size_t count, loff_t *pos)
 {
        struct hpre_debugfs_file *file = filp->private_data;
        char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
@@ -477,7 +493,7 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
 }
 
 static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
-                               size_t count, loff_t *pos)
+                                    size_t count, loff_t *pos)
 {
        struct hpre_debugfs_file *file = filp->private_data;
        char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
@@ -548,13 +564,15 @@ static int hpre_debugfs_atomic64_get(void *data, u64 *val)
 static int hpre_debugfs_atomic64_set(void *data, u64 val)
 {
        struct hpre_dfx *dfx_item = data;
-       struct hpre_dfx *hpre_dfx = dfx_item - HPRE_OVERTIME_THRHLD;
+       struct hpre_dfx *hpre_dfx = NULL;
 
-       if (val)
+       if (dfx_item->type == HPRE_OVERTIME_THRHLD) {
+               hpre_dfx = dfx_item - HPRE_OVERTIME_THRHLD;
+               atomic64_set(&hpre_dfx[HPRE_OVER_THRHLD_CNT].value, 0);
+       } else if (val) {
                return -EINVAL;
+       }
 
-       if (dfx_item->type == HPRE_OVERTIME_THRHLD)
-               atomic64_set(&hpre_dfx[HPRE_OVER_THRHLD_CNT].value, 0);
        atomic64_set(&dfx_item->value, val);
 
        return 0;
@@ -563,15 +581,17 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
 DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
                         hpre_debugfs_atomic64_set, "%llu\n");
 
-static int hpre_create_debugfs_file(struct hpre_debug *dbg, struct dentry *dir,
+static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
                                    enum hpre_ctrl_dbgfs_file type, int indx)
 {
+       struct hpre *hpre = container_of(qm, struct hpre, qm);
+       struct hpre_debug *dbg = &hpre->debug;
        struct dentry *file_dir;
 
        if (dir)
                file_dir = dir;
        else
-               file_dir = dbg->debug_root;
+               file_dir = qm->debug.debug_root;
 
        if (type >= HPRE_DEBUG_FILE_NUM)
                return -EINVAL;
@@ -586,10 +606,8 @@ static int hpre_create_debugfs_file(struct hpre_debug *dbg, struct dentry *dir,
        return 0;
 }
 
-static int hpre_pf_comm_regs_debugfs_init(struct hpre_debug *debug)
+static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
 {
-       struct hpre *hpre = container_of(debug, struct hpre, debug);
-       struct hisi_qm *qm = &hpre->qm;
        struct device *dev = &qm->pdev->dev;
        struct debugfs_regset32 *regset;
 
@@ -601,14 +619,12 @@ static int hpre_pf_comm_regs_debugfs_init(struct hpre_debug *debug)
        regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs);
        regset->base = qm->io_base;
 
-       debugfs_create_regset32("regs", 0444,  debug->debug_root, regset);
+       debugfs_create_regset32("regs", 0444,  qm->debug.debug_root, regset);
        return 0;
 }
 
-static int hpre_cluster_debugfs_init(struct hpre_debug *debug)
+static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
 {
-       struct hpre *hpre = container_of(debug, struct hpre, debug);
-       struct hisi_qm *qm = &hpre->qm;
        struct device *dev = &qm->pdev->dev;
        char buf[HPRE_DBGFS_VAL_MAX_LEN];
        struct debugfs_regset32 *regset;
@@ -619,7 +635,7 @@ static int hpre_cluster_debugfs_init(struct hpre_debug *debug)
                ret = snprintf(buf, HPRE_DBGFS_VAL_MAX_LEN, "cluster%d", i);
                if (ret < 0)
                        return -EINVAL;
-               tmp_d = debugfs_create_dir(buf, debug->debug_root);
+               tmp_d = debugfs_create_dir(buf, qm->debug.debug_root);
 
                regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
                if (!regset)
@@ -630,7 +646,7 @@ static int hpre_cluster_debugfs_init(struct hpre_debug *debug)
                regset->base = qm->io_base + hpre_cluster_offsets[i];
 
                debugfs_create_regset32("regs", 0444, tmp_d, regset);
-               ret = hpre_create_debugfs_file(debug, tmp_d, HPRE_CLUSTER_CTRL,
+               ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL,
                                               i + HPRE_CLUSTER_CTRL);
                if (ret)
                        return ret;
@@ -639,32 +655,31 @@ static int hpre_cluster_debugfs_init(struct hpre_debug *debug)
        return 0;
 }
 
-static int hpre_ctrl_debug_init(struct hpre_debug *debug)
+static int hpre_ctrl_debug_init(struct hisi_qm *qm)
 {
        int ret;
 
-       ret = hpre_create_debugfs_file(debug, NULL, HPRE_CURRENT_QM,
+       ret = hpre_create_debugfs_file(qm, NULL, HPRE_CURRENT_QM,
                                       HPRE_CURRENT_QM);
        if (ret)
                return ret;
 
-       ret = hpre_create_debugfs_file(debug, NULL, HPRE_CLEAR_ENABLE,
+       ret = hpre_create_debugfs_file(qm, NULL, HPRE_CLEAR_ENABLE,
                                       HPRE_CLEAR_ENABLE);
        if (ret)
                return ret;
 
-       ret = hpre_pf_comm_regs_debugfs_init(debug);
+       ret = hpre_pf_comm_regs_debugfs_init(qm);
        if (ret)
                return ret;
 
-       return hpre_cluster_debugfs_init(debug);
+       return hpre_cluster_debugfs_init(qm);
 }
 
-static void hpre_dfx_debug_init(struct hpre_debug *debug)
+static void hpre_dfx_debug_init(struct hisi_qm *qm)
 {
-       struct hpre *hpre = container_of(debug, struct hpre, debug);
+       struct hpre *hpre = container_of(qm, struct hpre, qm);
        struct hpre_dfx *dfx = hpre->debug.dfx;
-       struct hisi_qm *qm = &hpre->qm;
        struct dentry *parent;
        int i;
 
@@ -676,30 +691,27 @@ static void hpre_dfx_debug_init(struct hpre_debug *debug)
        }
 }
 
-static int hpre_debugfs_init(struct hpre *hpre)
+static int hpre_debugfs_init(struct hisi_qm *qm)
 {
-       struct hisi_qm *qm = &hpre->qm;
        struct device *dev = &qm->pdev->dev;
-       struct dentry *dir;
        int ret;
 
-       dir = debugfs_create_dir(dev_name(dev), hpre_debugfs_root);
-       qm->debug.debug_root = dir;
+       qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
+                                                 hpre_debugfs_root);
+
        qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
        qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
-
        ret = hisi_qm_debug_init(qm);
        if (ret)
                goto failed_to_create;
 
        if (qm->pdev->device == HPRE_PCI_DEVICE_ID) {
-               hpre->debug.debug_root = dir;
-               ret = hpre_ctrl_debug_init(&hpre->debug);
+               ret = hpre_ctrl_debug_init(qm);
                if (ret)
                        goto failed_to_create;
        }
 
-       hpre_dfx_debug_init(&hpre->debug);
+       hpre_dfx_debug_init(qm);
 
        return 0;
 
@@ -708,10 +720,8 @@ failed_to_create:
        return ret;
 }
 
-static void hpre_debugfs_exit(struct hpre *hpre)
+static void hpre_debugfs_exit(struct hisi_qm *qm)
 {
-       struct hisi_qm *qm = &hpre->qm;
-
        debugfs_remove_recursive(qm->debug.debug_root);
 }
 
@@ -732,6 +742,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
        if (qm->fun_type == QM_HW_PF) {
                qm->qp_base = HPRE_PF_DEF_Q_BASE;
                qm->qp_num = pf_q_num;
+               qm->debug.curr_qm_qp_num = pf_q_num;
                qm->qm_list = &hpre_devices;
        }
 
@@ -849,7 +860,7 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (ret)
                goto err_with_err_init;
 
-       ret = hpre_debugfs_init(hpre);
+       ret = hpre_debugfs_init(qm);
        if (ret)
                dev_warn(&pdev->dev, "init debugfs fail!\n");
 
@@ -874,6 +885,7 @@ err_with_crypto_register:
 
 err_with_qm_start:
        hisi_qm_del_from_list(qm, &hpre_devices);
+       hpre_debugfs_exit(qm);
        hisi_qm_stop(qm);
 
 err_with_err_init:
@@ -905,7 +917,7 @@ static void hpre_remove(struct pci_dev *pdev)
                qm->debug.curr_qm_qp_num = 0;
        }
 
-       hpre_debugfs_exit(hpre);
+       hpre_debugfs_exit(qm);
        hisi_qm_stop(qm);
        hisi_qm_dev_err_uninit(qm);
        hisi_qm_uninit(qm);
@@ -924,7 +936,8 @@ static struct pci_driver hpre_pci_driver = {
        .id_table               = hpre_dev_ids,
        .probe                  = hpre_probe,
        .remove                 = hpre_remove,
-       .sriov_configure        = hisi_qm_sriov_configure,
+       .sriov_configure        = IS_ENABLED(CONFIG_PCI_IOV) ?
+                                 hisi_qm_sriov_configure : NULL,
        .err_handler            = &hpre_err_handler,
 };
 
index 9bb263c..6527c53 100644 (file)
@@ -1064,19 +1064,10 @@ static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
        char buf[QM_DBG_READ_LEN];
        int len;
 
-       if (*pos)
-               return 0;
-
-       if (count < QM_DBG_READ_LEN)
-               return -ENOSPC;
-
-       len = snprintf(buf, QM_DBG_READ_LEN, "%s\n",
-                      "Please echo help to cmd to get help information");
+       len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n",
+                       "Please echo help to cmd to get help information");
 
-       if (copy_to_user(buffer, buf, len))
-               return -EFAULT;
-
-       return (*pos = len);
+       return simple_read_from_buffer(buffer, count, pos, buf, len);
 }
 
 static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
@@ -1741,7 +1732,7 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
 }
 EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
 
-static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
+static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
 {
        struct hisi_qm *qm = qp->qm;
        struct device *dev = &qm->pdev->dev;
@@ -1813,7 +1804,7 @@ static int qm_start_qp_nolock(struct hisi_qp *qp, unsigned long arg)
        struct hisi_qm *qm = qp->qm;
        struct device *dev = &qm->pdev->dev;
        int qp_id = qp->qp_id;
-       int pasid = arg;
+       u32 pasid = arg;
        int ret;
 
        if (!qm_qp_avail_state(qm, qp, QP_START))
@@ -2179,8 +2170,12 @@ static int qm_alloc_uacce(struct hisi_qm *qm)
                .flags = UACCE_DEV_SVA,
                .ops = &uacce_qm_ops,
        };
+       int ret;
 
-       strncpy(interface.name, pdev->driver->name, sizeof(interface.name));
+       ret = strscpy(interface.name, pdev->driver->name,
+                     sizeof(interface.name));
+       if (ret < 0)
+               return -ENAMETOOLONG;
 
        uacce = uacce_alloc(&pdev->dev, &interface);
        if (IS_ERR(uacce))
@@ -2691,24 +2686,12 @@ static ssize_t qm_status_read(struct file *filp, char __user *buffer,
 {
        struct hisi_qm *qm = filp->private_data;
        char buf[QM_DBG_READ_LEN];
-       int val, cp_len, len;
-
-       if (*pos)
-               return 0;
-
-       if (count < QM_DBG_READ_LEN)
-               return -ENOSPC;
+       int val, len;
 
        val = atomic_read(&qm->status.flags);
-       len = snprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
-       if (!len)
-               return -EFAULT;
-
-       cp_len = copy_to_user(buffer, buf, len);
-       if (cp_len)
-               return -EFAULT;
+       len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
 
-       return (*pos = len);
+       return simple_read_from_buffer(buffer, count, pos, buf, len);
 }
 
 static const struct file_operations qm_status_fops = {
index 0a351de..6c1d3c7 100644 (file)
@@ -44,6 +44,7 @@
 #define QM_AXI_M_CFG                   0x1000ac
 #define AXI_M_CFG                      0xffff
 #define QM_AXI_M_CFG_ENABLE            0x1000b0
+#define AM_CFG_SINGLE_PORT_MAX_TRANS   0x300014
 #define AXI_M_CFG_ENABLE               0xffffffff
 #define QM_PEH_AXUSER_CFG              0x1000cc
 #define QM_PEH_AXUSER_CFG_ENABLE       0x1000d0
index c27e716..8ca945a 100644 (file)
@@ -175,7 +175,8 @@ static int sec_alloc_and_fill_hw_sgl(struct sec_hw_sgl **sec_sgl,
                                     dma_addr_t *psec_sgl,
                                     struct scatterlist *sgl,
                                     int count,
-                                    struct sec_dev_info *info)
+                                    struct sec_dev_info *info,
+                                    gfp_t gfp)
 {
        struct sec_hw_sgl *sgl_current = NULL;
        struct sec_hw_sgl *sgl_next;
@@ -190,7 +191,7 @@ static int sec_alloc_and_fill_hw_sgl(struct sec_hw_sgl **sec_sgl,
                sge_index = i % SEC_MAX_SGE_NUM;
                if (sge_index == 0) {
                        sgl_next = dma_pool_zalloc(info->hw_sgl_pool,
-                                                  GFP_KERNEL, &sgl_next_dma);
+                                                  gfp, &sgl_next_dma);
                        if (!sgl_next) {
                                ret = -ENOMEM;
                                goto err_free_hw_sgls;
@@ -545,14 +546,14 @@ void sec_alg_callback(struct sec_bd_info *resp, void *shadow)
 }
 
 static int sec_alg_alloc_and_calc_split_sizes(int length, size_t **split_sizes,
-                                             int *steps)
+                                             int *steps, gfp_t gfp)
 {
        size_t *sizes;
        int i;
 
        /* Split into suitable sized blocks */
        *steps = roundup(length, SEC_REQ_LIMIT) / SEC_REQ_LIMIT;
-       sizes = kcalloc(*steps, sizeof(*sizes), GFP_KERNEL);
+       sizes = kcalloc(*steps, sizeof(*sizes), gfp);
        if (!sizes)
                return -ENOMEM;
 
@@ -568,7 +569,7 @@ static int sec_map_and_split_sg(struct scatterlist *sgl, size_t *split_sizes,
                                int steps, struct scatterlist ***splits,
                                int **splits_nents,
                                int sgl_len_in,
-                               struct device *dev)
+                               struct device *dev, gfp_t gfp)
 {
        int ret, count;
 
@@ -576,12 +577,12 @@ static int sec_map_and_split_sg(struct scatterlist *sgl, size_t *split_sizes,
        if (!count)
                return -EINVAL;
 
-       *splits = kcalloc(steps, sizeof(struct scatterlist *), GFP_KERNEL);
+       *splits = kcalloc(steps, sizeof(struct scatterlist *), gfp);
        if (!*splits) {
                ret = -ENOMEM;
                goto err_unmap_sg;
        }
-       *splits_nents = kcalloc(steps, sizeof(int), GFP_KERNEL);
+       *splits_nents = kcalloc(steps, sizeof(int), gfp);
        if (!*splits_nents) {
                ret = -ENOMEM;
                goto err_free_splits;
@@ -589,7 +590,7 @@ static int sec_map_and_split_sg(struct scatterlist *sgl, size_t *split_sizes,
 
        /* output the scatter list before and after this */
        ret = sg_split(sgl, count, 0, steps, split_sizes,
-                      *splits, *splits_nents, GFP_KERNEL);
+                      *splits, *splits_nents, gfp);
        if (ret) {
                ret = -ENOMEM;
                goto err_free_splits_nents;
@@ -630,13 +631,13 @@ static struct sec_request_el
                           int el_size, bool different_dest,
                           struct scatterlist *sgl_in, int n_ents_in,
                           struct scatterlist *sgl_out, int n_ents_out,
-                          struct sec_dev_info *info)
+                          struct sec_dev_info *info, gfp_t gfp)
 {
        struct sec_request_el *el;
        struct sec_bd_info *req;
        int ret;
 
-       el = kzalloc(sizeof(*el), GFP_KERNEL);
+       el = kzalloc(sizeof(*el), gfp);
        if (!el)
                return ERR_PTR(-ENOMEM);
        el->el_length = el_size;
@@ -668,7 +669,7 @@ static struct sec_request_el
        el->sgl_in = sgl_in;
 
        ret = sec_alloc_and_fill_hw_sgl(&el->in, &el->dma_in, el->sgl_in,
-                                       n_ents_in, info);
+                                       n_ents_in, info, gfp);
        if (ret)
                goto err_free_el;
 
@@ -679,7 +680,7 @@ static struct sec_request_el
                el->sgl_out = sgl_out;
                ret = sec_alloc_and_fill_hw_sgl(&el->out, &el->dma_out,
                                                el->sgl_out,
-                                               n_ents_out, info);
+                                               n_ents_out, info, gfp);
                if (ret)
                        goto err_free_hw_sgl_in;
 
@@ -720,6 +721,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
        int *splits_out_nents = NULL;
        struct sec_request_el *el, *temp;
        bool split = skreq->src != skreq->dst;
+       gfp_t gfp = skreq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
 
        mutex_init(&sec_req->lock);
        sec_req->req_base = &skreq->base;
@@ -728,13 +730,13 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
        sec_req->len_in = sg_nents(skreq->src);
 
        ret = sec_alg_alloc_and_calc_split_sizes(skreq->cryptlen, &split_sizes,
-                                                &steps);
+                                                &steps, gfp);
        if (ret)
                return ret;
        sec_req->num_elements = steps;
        ret = sec_map_and_split_sg(skreq->src, split_sizes, steps, &splits_in,
                                   &splits_in_nents, sec_req->len_in,
-                                  info->dev);
+                                  info->dev, gfp);
        if (ret)
                goto err_free_split_sizes;
 
@@ -742,7 +744,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
                sec_req->len_out = sg_nents(skreq->dst);
                ret = sec_map_and_split_sg(skreq->dst, split_sizes, steps,
                                           &splits_out, &splits_out_nents,
-                                          sec_req->len_out, info->dev);
+                                          sec_req->len_out, info->dev, gfp);
                if (ret)
                        goto err_unmap_in_sg;
        }
@@ -775,7 +777,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
                                               splits_in[i], splits_in_nents[i],
                                               split ? splits_out[i] : NULL,
                                               split ? splits_out_nents[i] : 0,
-                                              info);
+                                              info, gfp);
                if (IS_ERR(el)) {
                        ret = PTR_ERR(el);
                        goto err_free_elements;
@@ -932,7 +934,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "ecb(aes)",
                        .cra_driver_name = "hisi_sec_aes_ecb",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -951,7 +954,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "cbc(aes)",
                        .cra_driver_name = "hisi_sec_aes_cbc",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -970,7 +974,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "ctr(aes)",
                        .cra_driver_name = "hisi_sec_aes_ctr",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -989,7 +994,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "xts(aes)",
                        .cra_driver_name = "hisi_sec_aes_xts",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -1009,7 +1015,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "ecb(des)",
                        .cra_driver_name = "hisi_sec_des_ecb",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -1028,7 +1035,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "cbc(des)",
                        .cra_driver_name = "hisi_sec_des_cbc",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -1047,7 +1055,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "cbc(des3_ede)",
                        .cra_driver_name = "hisi_sec_3des_cbc",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
@@ -1066,7 +1075,8 @@ static struct skcipher_alg sec_algs[] = {
                        .cra_name = "ecb(des3_ede)",
                        .cra_driver_name = "hisi_sec_3des_ecb",
                        .cra_priority = 4001,
-                       .cra_flags = CRYPTO_ALG_ASYNC,
+                       .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct sec_alg_tfm_ctx),
                        .cra_alignmask = 0,
index 7b64aca..037762b 100644 (file)
@@ -46,9 +46,11 @@ struct sec_req {
 
        struct sec_cipher_req c_req;
        struct sec_aead_req aead_req;
+       struct list_head backlog_head;
 
        int err_type;
        int req_id;
+       int flag;
 
        /* Status of the SEC request */
        bool fake_busy;
@@ -104,6 +106,7 @@ struct sec_qp_ctx {
        struct sec_alg_res res[QM_Q_DEPTH];
        struct sec_ctx *ctx;
        struct mutex req_lock;
+       struct list_head backlog;
        struct hisi_acc_sgl_pool *c_in_pool;
        struct hisi_acc_sgl_pool *c_out_pool;
        atomic_t pending_reqs;
@@ -161,6 +164,7 @@ struct sec_dfx {
        atomic64_t send_cnt;
        atomic64_t recv_cnt;
        atomic64_t send_busy_cnt;
+       atomic64_t recv_busy_cnt;
        atomic64_t err_bd_cnt;
        atomic64_t invalid_req_cnt;
        atomic64_t done_flag_cnt;
index 64614a9..497969a 100644 (file)
@@ -166,6 +166,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
        req = qp_ctx->req_list[le16_to_cpu(bd->type2.tag)];
        if (unlikely(!req)) {
                atomic64_inc(&dfx->invalid_req_cnt);
+               atomic_inc(&qp->qp_status.used);
                return;
        }
        req->err_type = bd->type2.error_type;
@@ -198,21 +199,30 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
        struct sec_qp_ctx *qp_ctx = req->qp_ctx;
        int ret;
 
+       if (ctx->fake_req_limit <=
+           atomic_read(&qp_ctx->qp->qp_status.used) &&
+           !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG))
+               return -EBUSY;
+
        mutex_lock(&qp_ctx->req_lock);
        ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
+
+       if (ctx->fake_req_limit <=
+           atomic_read(&qp_ctx->qp->qp_status.used) && !ret) {
+               list_add_tail(&req->backlog_head, &qp_ctx->backlog);
+               atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
+               atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt);
+               mutex_unlock(&qp_ctx->req_lock);
+               return -EBUSY;
+       }
        mutex_unlock(&qp_ctx->req_lock);
-       atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
 
        if (unlikely(ret == -EBUSY))
                return -ENOBUFS;
 
-       if (!ret) {
-               if (req->fake_busy) {
-                       atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt);
-                       ret = -EBUSY;
-               } else {
-                       ret = -EINPROGRESS;
-               }
+       if (likely(!ret)) {
+               ret = -EINPROGRESS;
+               atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
        }
 
        return ret;
@@ -373,8 +383,8 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
        qp_ctx->ctx = ctx;
 
        mutex_init(&qp_ctx->req_lock);
-       atomic_set(&qp_ctx->pending_reqs, 0);
        idr_init(&qp_ctx->req_idr);
+       INIT_LIST_HEAD(&qp_ctx->backlog);
 
        qp_ctx->c_in_pool = hisi_acc_create_sgl_pool(dev, QM_Q_DEPTH,
                                                     SEC_SGL_SGE_NR);
@@ -1048,21 +1058,49 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
                dev_err(SEC_CTX_DEV(req->ctx), "copy output iv error!\n");
 }
 
+static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
+                               struct sec_qp_ctx *qp_ctx)
+{
+       struct sec_req *backlog_req = NULL;
+
+       mutex_lock(&qp_ctx->req_lock);
+       if (ctx->fake_req_limit >=
+           atomic_read(&qp_ctx->qp->qp_status.used) &&
+           !list_empty(&qp_ctx->backlog)) {
+               backlog_req = list_first_entry(&qp_ctx->backlog,
+                               typeof(*backlog_req), backlog_head);
+               list_del(&backlog_req->backlog_head);
+       }
+       mutex_unlock(&qp_ctx->req_lock);
+
+       return backlog_req;
+}
+
 static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
                                  int err)
 {
        struct skcipher_request *sk_req = req->c_req.sk_req;
        struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+       struct skcipher_request *backlog_sk_req;
+       struct sec_req *backlog_req;
 
-       atomic_dec(&qp_ctx->pending_reqs);
        sec_free_req_id(req);
 
        /* IV output at encrypto of CBC mode */
        if (!err && ctx->c_ctx.c_mode == SEC_CMODE_CBC && req->c_req.encrypt)
                sec_update_iv(req, SEC_SKCIPHER);
 
-       if (req->fake_busy)
-               sk_req->base.complete(&sk_req->base, -EINPROGRESS);
+       while (1) {
+               backlog_req = sec_back_req_clear(ctx, qp_ctx);
+               if (!backlog_req)
+                       break;
+
+               backlog_sk_req = backlog_req->c_req.sk_req;
+               backlog_sk_req->base.complete(&backlog_sk_req->base,
+                                               -EINPROGRESS);
+               atomic64_inc(&ctx->sec->debug.dfx.recv_busy_cnt);
+       }
+
 
        sk_req->base.complete(&sk_req->base, err);
 }
@@ -1133,10 +1171,10 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
        struct sec_cipher_req *c_req = &req->c_req;
        size_t authsize = crypto_aead_authsize(tfm);
        struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+       struct aead_request *backlog_aead_req;
+       struct sec_req *backlog_req;
        size_t sz;
 
-       atomic_dec(&qp_ctx->pending_reqs);
-
        if (!err && c->c_ctx.c_mode == SEC_CMODE_CBC && c_req->encrypt)
                sec_update_iv(req, SEC_AEAD);
 
@@ -1157,17 +1195,22 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
 
        sec_free_req_id(req);
 
-       if (req->fake_busy)
-               a_req->base.complete(&a_req->base, -EINPROGRESS);
+       while (1) {
+               backlog_req = sec_back_req_clear(c, qp_ctx);
+               if (!backlog_req)
+                       break;
+
+               backlog_aead_req = backlog_req->aead_req.aead_req;
+               backlog_aead_req->base.complete(&backlog_aead_req->base,
+                                               -EINPROGRESS);
+               atomic64_inc(&c->sec->debug.dfx.recv_busy_cnt);
+       }
 
        a_req->base.complete(&a_req->base, err);
 }
 
 static void sec_request_uninit(struct sec_ctx *ctx, struct sec_req *req)
 {
-       struct sec_qp_ctx *qp_ctx = req->qp_ctx;
-
-       atomic_dec(&qp_ctx->pending_reqs);
        sec_free_req_id(req);
        sec_free_queue_id(ctx, req);
 }
@@ -1187,11 +1230,6 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
                return req->req_id;
        }
 
-       if (ctx->fake_req_limit <= atomic_inc_return(&qp_ctx->pending_reqs))
-               req->fake_busy = true;
-       else
-               req->fake_busy = false;
-
        return 0;
 }
 
@@ -1213,7 +1251,8 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
                sec_update_iv(req, ctx->alg_type);
 
        ret = ctx->req_op->bd_send(ctx, req);
-       if (unlikely(ret != -EBUSY && ret != -EINPROGRESS)) {
+       if (unlikely((ret != -EBUSY && ret != -EINPROGRESS) ||
+               (ret == -EBUSY && !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)))) {
                dev_err_ratelimited(SEC_CTX_DEV(ctx), "send sec request failed!\n");
                goto err_send_req;
        }
@@ -1407,6 +1446,7 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
        if (!sk_req->cryptlen)
                return 0;
 
+       req->flag = sk_req->base.flags;
        req->c_req.sk_req = sk_req;
        req->c_req.encrypt = encrypt;
        req->ctx = ctx;
@@ -1435,7 +1475,7 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
                .cra_name = sec_cra_name,\
                .cra_driver_name = "hisi_sec_"sec_cra_name,\
                .cra_priority = SEC_PRIORITY,\
-               .cra_flags = CRYPTO_ALG_ASYNC,\
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\
                .cra_blocksize = blk_size,\
                .cra_ctxsize = sizeof(struct sec_ctx),\
                .cra_module = THIS_MODULE,\
@@ -1530,6 +1570,7 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
        struct sec_ctx *ctx = crypto_aead_ctx(tfm);
        int ret;
 
+       req->flag = a_req->base.flags;
        req->aead_req.aead_req = a_req;
        req->c_req.encrypt = encrypt;
        req->ctx = ctx;
@@ -1558,7 +1599,7 @@ static int sec_aead_decrypt(struct aead_request *a_req)
                .cra_name = sec_cra_name,\
                .cra_driver_name = "hisi_sec_"sec_cra_name,\
                .cra_priority = SEC_PRIORITY,\
-               .cra_flags = CRYPTO_ALG_ASYNC,\
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\
                .cra_blocksize = blk_size,\
                .cra_ctxsize = sizeof(struct sec_ctx),\
                .cra_module = THIS_MODULE,\
index a4cb58b..2297425 100644 (file)
 #define SEC_PF_PCI_DEVICE_ID           0xa255
 #define SEC_VF_PCI_DEVICE_ID           0xa256
 
-#define SEC_XTS_MIV_ENABLE_REG         0x301384
-#define SEC_XTS_MIV_ENABLE_MSK         0x7FFFFFFF
-#define SEC_XTS_MIV_DISABLE_MSK                0xFFFFFFFF
-#define SEC_BD_ERR_CHK_EN1             0xfffff7fd
-#define SEC_BD_ERR_CHK_EN2             0xffffbfff
+#define SEC_BD_ERR_CHK_EN0             0xEFFFFFFF
+#define SEC_BD_ERR_CHK_EN1             0x7ffff7fd
+#define SEC_BD_ERR_CHK_EN3             0xffffbfff
 
 #define SEC_SQE_SIZE                   128
 #define SEC_SQ_SIZE                    (SEC_SQE_SIZE * QM_Q_DEPTH)
-#define SEC_PF_DEF_Q_NUM               64
+#define SEC_PF_DEF_Q_NUM               256
 #define SEC_PF_DEF_Q_BASE              0
-#define SEC_CTX_Q_NUM_DEF              24
+#define SEC_CTX_Q_NUM_DEF              2
 #define SEC_CTX_Q_NUM_MAX              32
 
 #define SEC_CTRL_CNT_CLR_CE            0x301120
 #define SEC_ECC_ADDR(err)                      ((err) >> 0)
 #define SEC_CORE_INT_DISABLE           0x0
 #define SEC_CORE_INT_ENABLE            0x1ff
+#define SEC_CORE_INT_CLEAR             0x1ff
+#define SEC_SAA_ENABLE                 0x17f
 
-#define SEC_RAS_CE_REG                 0x50
-#define SEC_RAS_FE_REG                 0x54
-#define SEC_RAS_NFE_REG                        0x58
+#define SEC_RAS_CE_REG                 0x301050
+#define SEC_RAS_FE_REG                 0x301054
+#define SEC_RAS_NFE_REG                        0x301058
 #define SEC_RAS_CE_ENB_MSK             0x88
 #define SEC_RAS_FE_ENB_MSK             0x0
 #define SEC_RAS_NFE_ENB_MSK            0x177
 #define SEC_RAS_DISABLE                        0x0
 #define SEC_MEM_START_INIT_REG         0x0100
 #define SEC_MEM_INIT_DONE_REG          0x0104
-#define SEC_QM_ABNORMAL_INT_MASK       0x100004
 
 #define SEC_CONTROL_REG                        0x0200
 #define SEC_TRNG_EN_SHIFT              8
 
 #define SEC_INTERFACE_USER_CTRL0_REG   0x0220
 #define SEC_INTERFACE_USER_CTRL1_REG   0x0224
+#define SEC_SAA_EN_REG                                 0x0270
+#define SEC_BD_ERR_CHK_EN_REG0         0x0380
 #define SEC_BD_ERR_CHK_EN_REG1         0x0384
-#define SEC_BD_ERR_CHK_EN_REG2         0x038c
+#define SEC_BD_ERR_CHK_EN_REG3         0x038c
 
 #define SEC_USER0_SMMU_NORMAL          (BIT(23) | BIT(15))
 #define SEC_USER1_SMMU_NORMAL          (BIT(31) | BIT(23) | BIT(15) | BIT(7))
@@ -77,8 +78,8 @@
 
 #define SEC_DELAY_10_US                        10
 #define SEC_POLL_TIMEOUT_US            1000
-#define SEC_VF_CNT_MASK                        0xffffffc0
 #define SEC_DBGFS_VAL_MAX_LEN          20
+#define SEC_SINGLE_PORT_MAX_TRANS      0x2060
 
 #define SEC_SQE_MASK_OFFSET            64
 #define SEC_SQE_MASK_LEN               48
@@ -122,6 +123,7 @@ static struct sec_dfx_item sec_dfx_labels[] = {
        {"send_cnt", offsetof(struct sec_dfx, send_cnt)},
        {"recv_cnt", offsetof(struct sec_dfx, recv_cnt)},
        {"send_busy_cnt", offsetof(struct sec_dfx, send_busy_cnt)},
+       {"recv_busy_cnt", offsetof(struct sec_dfx, recv_busy_cnt)},
        {"err_bd_cnt", offsetof(struct sec_dfx, err_bd_cnt)},
        {"invalid_req_cnt", offsetof(struct sec_dfx, invalid_req_cnt)},
        {"done_flag_cnt", offsetof(struct sec_dfx, done_flag_cnt)},
@@ -191,7 +193,7 @@ static const struct kernel_param_ops sec_ctx_q_num_ops = {
 };
 static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF;
 module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444);
-MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)");
+MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (2 default, 2, 4, ..., 32)");
 
 static const struct kernel_param_ops vfs_num_ops = {
        .set = vfs_num_set,
@@ -280,7 +282,7 @@ static int sec_engine_init(struct hisi_qm *qm)
                                         reg, reg & 0x1, SEC_DELAY_10_US,
                                         SEC_POLL_TIMEOUT_US);
        if (ret) {
-               dev_err(&qm->pdev->dev, "fail to init sec mem\n");
+               pci_err(qm->pdev, "fail to init sec mem\n");
                return ret;
        }
 
@@ -296,25 +298,25 @@ static int sec_engine_init(struct hisi_qm *qm)
        reg |= SEC_USER1_SMMU_NORMAL;
        writel_relaxed(reg, SEC_ADDR(qm, SEC_INTERFACE_USER_CTRL1_REG));
 
+       writel(SEC_SINGLE_PORT_MAX_TRANS,
+              qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS);
+
+       writel(SEC_SAA_ENABLE, SEC_ADDR(qm, SEC_SAA_EN_REG));
+
+       /* Enable sm4 extra mode, as ctr/ecb */
+       writel_relaxed(SEC_BD_ERR_CHK_EN0,
+                      SEC_ADDR(qm, SEC_BD_ERR_CHK_EN_REG0));
+       /* Enable sm4 xts mode multiple iv */
        writel_relaxed(SEC_BD_ERR_CHK_EN1,
                       SEC_ADDR(qm, SEC_BD_ERR_CHK_EN_REG1));
-       writel_relaxed(SEC_BD_ERR_CHK_EN2,
-                      SEC_ADDR(qm, SEC_BD_ERR_CHK_EN_REG2));
-
-       /* enable clock gate control */
-       reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
-       reg |= SEC_CLK_GATE_ENABLE;
-       writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
+       writel_relaxed(SEC_BD_ERR_CHK_EN3,
+                      SEC_ADDR(qm, SEC_BD_ERR_CHK_EN_REG3));
 
        /* config endian */
        reg = readl_relaxed(SEC_ADDR(qm, SEC_CONTROL_REG));
        reg |= sec_get_endian(qm);
        writel_relaxed(reg, SEC_ADDR(qm, SEC_CONTROL_REG));
 
-       /* Enable sm4 xts mode multiple iv */
-       writel_relaxed(SEC_XTS_MIV_ENABLE_MSK,
-                      qm->io_base + SEC_XTS_MIV_ENABLE_REG);
-
        return 0;
 }
 
@@ -346,10 +348,17 @@ static int sec_set_user_domain_and_cache(struct hisi_qm *qm)
 /* sec_debug_regs_clear() - clear the sec debug regs */
 static void sec_debug_regs_clear(struct hisi_qm *qm)
 {
+       int i;
+
        /* clear current_qm */
        writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
        writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
 
+       /* clear sec dfx regs */
+       writel(0x1, qm->io_base + SEC_CTRL_CNT_CLR_CE);
+       for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++)
+               readl(qm->io_base + sec_dfx_regs[i].offset);
+
        /* clear rdclr_en */
        writel(0x0, qm->io_base + SEC_CTRL_CNT_CLR_CE);
 
@@ -362,14 +371,14 @@ static void sec_hw_error_enable(struct hisi_qm *qm)
 
        if (qm->ver == QM_HW_V1) {
                writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
-               dev_info(&qm->pdev->dev, "V1 not support hw error handle\n");
+               pci_info(qm->pdev, "V1 not support hw error handle\n");
                return;
        }
 
-       val = readl(qm->io_base + SEC_CONTROL_REG);
+       val = readl(SEC_ADDR(qm, SEC_CONTROL_REG));
 
        /* clear SEC hw error source if having */
-       writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_SOURCE);
+       writel(SEC_CORE_INT_CLEAR, qm->io_base + SEC_CORE_INT_SOURCE);
 
        /* enable SEC hw error interrupts */
        writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
@@ -382,14 +391,14 @@ static void sec_hw_error_enable(struct hisi_qm *qm)
        /* enable SEC block master OOO when m-bit error occur */
        val = val | SEC_AXI_SHUTDOWN_ENABLE;
 
-       writel(val, qm->io_base + SEC_CONTROL_REG);
+       writel(val, SEC_ADDR(qm, SEC_CONTROL_REG));
 }
 
 static void sec_hw_error_disable(struct hisi_qm *qm)
 {
        u32 val;
 
-       val = readl(qm->io_base + SEC_CONTROL_REG);
+       val = readl(SEC_ADDR(qm, SEC_CONTROL_REG));
 
        /* disable RAS int */
        writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG);
@@ -402,7 +411,7 @@ static void sec_hw_error_disable(struct hisi_qm *qm)
        /* disable SEC block master OOO when m-bit error occur */
        val = val & SEC_AXI_SHUTDOWN_DISABLE;
 
-       writel(val, qm->io_base + SEC_CONTROL_REG);
+       writel(val, SEC_ADDR(qm, SEC_CONTROL_REG));
 }
 
 static u32 sec_current_qm_read(struct sec_debug_file *file)
@@ -577,20 +586,20 @@ static int sec_debugfs_atomic64_set(void *data, u64 val)
 DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
                         sec_debugfs_atomic64_set, "%lld\n");
 
-static int sec_core_debug_init(struct sec_dev *sec)
+static int sec_core_debug_init(struct hisi_qm *qm)
 {
-       struct hisi_qm *qm = &sec->qm;
+       struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
        struct device *dev = &qm->pdev->dev;
        struct sec_dfx *dfx = &sec->debug.dfx;
        struct debugfs_regset32 *regset;
        struct dentry *tmp_d;
        int i;
 
-       tmp_d = debugfs_create_dir("sec_dfx", sec->qm.debug.debug_root);
+       tmp_d = debugfs_create_dir("sec_dfx", qm->debug.debug_root);
 
        regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
        if (!regset)
-               return -ENOENT;
+               return -ENOMEM;
 
        regset->regs = sec_dfx_regs;
        regset->nregs = ARRAY_SIZE(sec_dfx_regs);
@@ -609,44 +618,44 @@ static int sec_core_debug_init(struct sec_dev *sec)
        return 0;
 }
 
-static int sec_debug_init(struct sec_dev *sec)
+static int sec_debug_init(struct hisi_qm *qm)
 {
+       struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
        int i;
 
-       for (i = SEC_CURRENT_QM; i < SEC_DEBUG_FILE_NUM; i++) {
-               spin_lock_init(&sec->debug.files[i].lock);
-               sec->debug.files[i].index = i;
-               sec->debug.files[i].qm = &sec->qm;
-
-               debugfs_create_file(sec_dbg_file_name[i], 0600,
-                                   sec->qm.debug.debug_root,
-                                   sec->debug.files + i,
-                                   &sec_dbg_fops);
+       if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) {
+               for (i = SEC_CURRENT_QM; i < SEC_DEBUG_FILE_NUM; i++) {
+                       spin_lock_init(&sec->debug.files[i].lock);
+                       sec->debug.files[i].index = i;
+                       sec->debug.files[i].qm = qm;
+
+                       debugfs_create_file(sec_dbg_file_name[i], 0600,
+                                                 qm->debug.debug_root,
+                                                 sec->debug.files + i,
+                                                 &sec_dbg_fops);
+               }
        }
 
-       return sec_core_debug_init(sec);
+       return sec_core_debug_init(qm);
 }
 
-static int sec_debugfs_init(struct sec_dev *sec)
+static int sec_debugfs_init(struct hisi_qm *qm)
 {
-       struct hisi_qm *qm = &sec->qm;
        struct device *dev = &qm->pdev->dev;
        int ret;
 
        qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
                                                  sec_debugfs_root);
-
        qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
        qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
        ret = hisi_qm_debug_init(qm);
        if (ret)
                goto failed_to_create;
 
-       if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) {
-               ret = sec_debug_init(sec);
-               if (ret)
-                       goto failed_to_create;
-       }
+       ret = sec_debug_init(qm);
+       if (ret)
+               goto failed_to_create;
+
 
        return 0;
 
@@ -656,9 +665,9 @@ failed_to_create:
        return ret;
 }
 
-static void sec_debugfs_exit(struct sec_dev *sec)
+static void sec_debugfs_exit(struct hisi_qm *qm)
 {
-       debugfs_remove_recursive(sec->qm.debug.debug_root);
+       debugfs_remove_recursive(qm->debug.debug_root);
 }
 
 static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
@@ -677,8 +686,6 @@ static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
                                                SEC_CORE_SRAM_ECC_ERR_INFO);
                                dev_err(dev, "multi ecc sram num=0x%x\n",
                                        SEC_ECC_NUM(err_val));
-                               dev_err(dev, "multi ecc sram addr=0x%x\n",
-                                       SEC_ECC_ADDR(err_val));
                        }
                }
                errs++;
@@ -868,7 +875,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_probe_uninit;
        }
 
-       ret = sec_debugfs_init(sec);
+       ret = sec_debugfs_init(qm);
        if (ret)
                pci_warn(pdev, "Failed to init debugfs!\n");
 
@@ -893,7 +900,7 @@ err_crypto_unregister:
 
 err_remove_from_list:
        hisi_qm_del_from_list(qm, &sec_devices);
-       sec_debugfs_exit(sec);
+       sec_debugfs_exit(qm);
        hisi_qm_stop(qm);
 
 err_probe_uninit:
@@ -917,7 +924,7 @@ static void sec_remove(struct pci_dev *pdev)
        if (qm->fun_type == QM_HW_PF && qm->vfs_num)
                hisi_qm_sriov_disable(pdev);
 
-       sec_debugfs_exit(sec);
+       sec_debugfs_exit(qm);
 
        (void)hisi_qm_stop(qm);
 
@@ -987,5 +994,6 @@ module_exit(sec_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Zaibo Xu <xuzaibo@huawei.com>");
 MODULE_AUTHOR("Longfang Liu <liulongfang@huawei.com>");
+MODULE_AUTHOR("Kai Ye <yekai13@huawei.com>");
 MODULE_AUTHOR("Wei Zhang <zhangwei375@huawei.com>");
 MODULE_DESCRIPTION("Driver for HiSilicon SEC accelerator");
index f3ed4c0..4484be1 100644 (file)
@@ -76,7 +76,7 @@ struct hisi_zip_sqe {
        u32 rsvd1[4];
 };
 
-int zip_create_qps(struct hisi_qp **qps, int ctx_num);
+int zip_create_qps(struct hisi_qp **qps, int ctx_num, int node);
 int hisi_zip_register_to_crypto(void);
 void hisi_zip_unregister_from_crypto(void);
 #endif
index c73707c..01fd6a7 100644 (file)
@@ -158,13 +158,13 @@ static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
        hisi_qm_release_qp(ctx->qp);
 }
 
-static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type)
+static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int node)
 {
        struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL };
        struct hisi_zip *hisi_zip;
        int ret, i, j;
 
-       ret = zip_create_qps(qps, HZIP_CTX_Q_NUM);
+       ret = zip_create_qps(qps, HZIP_CTX_Q_NUM, node);
        if (ret) {
                pr_err("Can not create zip qps!\n");
                return -ENODEV;
@@ -379,7 +379,7 @@ static int hisi_zip_acomp_init(struct crypto_acomp *tfm)
        struct hisi_zip_ctx *ctx = crypto_tfm_ctx(&tfm->base);
        int ret;
 
-       ret = hisi_zip_ctx_init(ctx, COMP_NAME_TO_TYPE(alg_name));
+       ret = hisi_zip_ctx_init(ctx, COMP_NAME_TO_TYPE(alg_name), tfm->base.node);
        if (ret)
                return ret;
 
index 2229a21..e2845b2 100644 (file)
@@ -234,9 +234,10 @@ static const struct pci_device_id hisi_zip_dev_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
 
-int zip_create_qps(struct hisi_qp **qps, int qp_num)
+int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
 {
-       int node = cpu_to_node(smp_processor_id());
+       if (node == NUMA_NO_NODE)
+               node = cpu_to_node(smp_processor_id());
 
        return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
 }
index 0e25fc3..87226b7 100644 (file)
@@ -330,7 +330,7 @@ static int img_hash_write_via_dma(struct img_hash_dev *hdev)
 static int img_hash_dma_init(struct img_hash_dev *hdev)
 {
        struct dma_slave_config dma_conf;
-       int err = -EINVAL;
+       int err;
 
        hdev->dma_lch = dma_request_chan(hdev->dev, "tx");
        if (IS_ERR(hdev->dma_lch)) {
index 2cb53fb..fa7398e 100644 (file)
@@ -1135,11 +1135,12 @@ static irqreturn_t safexcel_irq_ring_thread(int irq, void *data)
 
 static int safexcel_request_ring_irq(void *pdev, int irqid,
                                     int is_pci_dev,
+                                    int ring_id,
                                     irq_handler_t handler,
                                     irq_handler_t threaded_handler,
                                     struct safexcel_ring_irq_data *ring_irq_priv)
 {
-       int ret, irq;
+       int ret, irq, cpu;
        struct device *dev;
 
        if (IS_ENABLED(CONFIG_PCI) && is_pci_dev) {
@@ -1177,6 +1178,10 @@ static int safexcel_request_ring_irq(void *pdev, int irqid,
                return ret;
        }
 
+       /* Set affinity */
+       cpu = cpumask_local_spread(ring_id, NUMA_NO_NODE);
+       irq_set_affinity_hint(irq, get_cpu_mask(cpu));
+
        return irq;
 }
 
@@ -1611,6 +1616,7 @@ static int safexcel_probe_generic(void *pdev,
                irq = safexcel_request_ring_irq(pdev,
                                                EIP197_IRQ_NUMBER(i, is_pci_dev),
                                                is_pci_dev,
+                                               i,
                                                safexcel_irq_ring,
                                                safexcel_irq_ring_thread,
                                                ring_irq);
@@ -1619,6 +1625,7 @@ static int safexcel_probe_generic(void *pdev,
                        return irq;
                }
 
+               priv->ring[i].irq = irq;
                priv->ring[i].work_data.priv = priv;
                priv->ring[i].work_data.ring = i;
                INIT_WORK(&priv->ring[i].work_data.work,
@@ -1756,8 +1763,10 @@ static int safexcel_remove(struct platform_device *pdev)
        clk_disable_unprepare(priv->reg_clk);
        clk_disable_unprepare(priv->clk);
 
-       for (i = 0; i < priv->config.rings; i++)
+       for (i = 0; i < priv->config.rings; i++) {
+               irq_set_affinity_hint(priv->ring[i].irq, NULL);
                destroy_workqueue(priv->ring[i].workqueue);
+       }
 
        return 0;
 }
index 94016c5..7c5fe38 100644 (file)
@@ -707,6 +707,9 @@ struct safexcel_ring {
         */
        struct crypto_async_request *req;
        struct crypto_async_request *backlog;
+
+       /* irq of this ring */
+       int irq;
 };
 
 /* EIP integration context flags */
index 0c5e80c..1ac3253 100644 (file)
@@ -1300,6 +1300,7 @@ struct safexcel_alg_template safexcel_alg_ecb_aes = {
                        .cra_driver_name = "safexcel-ecb-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1337,6 +1338,7 @@ struct safexcel_alg_template safexcel_alg_cbc_aes = {
                        .cra_driver_name = "safexcel-cbc-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1374,6 +1376,7 @@ struct safexcel_alg_template safexcel_alg_cfb_aes = {
                        .cra_driver_name = "safexcel-cfb-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1411,6 +1414,7 @@ struct safexcel_alg_template safexcel_alg_ofb_aes = {
                        .cra_driver_name = "safexcel-ofb-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1485,6 +1489,7 @@ struct safexcel_alg_template safexcel_alg_ctr_aes = {
                        .cra_driver_name = "safexcel-ctr-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1545,6 +1550,7 @@ struct safexcel_alg_template safexcel_alg_cbc_des = {
                        .cra_driver_name = "safexcel-cbc-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1582,6 +1588,7 @@ struct safexcel_alg_template safexcel_alg_ecb_des = {
                        .cra_driver_name = "safexcel-ecb-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1642,6 +1649,7 @@ struct safexcel_alg_template safexcel_alg_cbc_des3_ede = {
                        .cra_driver_name = "safexcel-cbc-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1679,6 +1687,7 @@ struct safexcel_alg_template safexcel_alg_ecb_des3_ede = {
                        .cra_driver_name = "safexcel-ecb-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1751,6 +1760,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1786,6 +1796,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1821,6 +1832,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1856,6 +1868,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha512-cbc-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1891,6 +1904,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha384-cbc-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = AES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1927,6 +1941,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des3_ede = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1963,6 +1978,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_des3_ede = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -1999,6 +2015,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_des3_ede = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2035,6 +2052,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_des3_ede = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha512-cbc-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2071,6 +2089,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_des3_ede = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha384-cbc-des3_ede",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2107,6 +2126,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2143,6 +2163,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_des = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2179,6 +2200,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_des = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2215,6 +2237,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_des = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha512-cbc-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2251,6 +2274,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_des = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha384-cbc-des",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = DES_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2285,6 +2309,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_ctr_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha1-ctr-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2319,6 +2344,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_ctr_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha256-ctr-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2353,6 +2379,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_ctr_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha224-ctr-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2387,6 +2414,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_ctr_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha512-ctr-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2421,6 +2449,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_ctr_aes = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha384-ctr-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2534,6 +2563,7 @@ struct safexcel_alg_template safexcel_alg_xts_aes = {
                        .cra_driver_name = "safexcel-xts-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = XTS_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2646,6 +2676,7 @@ struct safexcel_alg_template safexcel_alg_gcm = {
                        .cra_driver_name = "safexcel-gcm-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2769,6 +2800,7 @@ struct safexcel_alg_template safexcel_alg_ccm = {
                        .cra_driver_name = "safexcel-ccm-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2832,6 +2864,7 @@ struct safexcel_alg_template safexcel_alg_chacha20 = {
                        .cra_driver_name = "safexcel-chacha20",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -2993,6 +3026,7 @@ struct safexcel_alg_template safexcel_alg_chachapoly = {
                        /* +1 to put it above HW chacha + SW poly */
                        .cra_priority = SAFEXCEL_CRA_PRIORITY + 1,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY |
                                     CRYPTO_ALG_NEED_FALLBACK,
                        .cra_blocksize = 1,
@@ -3032,6 +3066,7 @@ struct safexcel_alg_template safexcel_alg_chachapoly_esp = {
                        /* +1 to put it above HW chacha + SW poly */
                        .cra_priority = SAFEXCEL_CRA_PRIORITY + 1,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY |
                                     CRYPTO_ALG_NEED_FALLBACK,
                        .cra_blocksize = 1,
@@ -3110,6 +3145,7 @@ struct safexcel_alg_template safexcel_alg_ecb_sm4 = {
                        .cra_driver_name = "safexcel-ecb-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SM4_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3147,6 +3183,7 @@ struct safexcel_alg_template safexcel_alg_cbc_sm4 = {
                        .cra_driver_name = "safexcel-cbc-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SM4_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3184,6 +3221,7 @@ struct safexcel_alg_template safexcel_alg_ofb_sm4 = {
                        .cra_driver_name = "safexcel-ofb-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3221,6 +3259,7 @@ struct safexcel_alg_template safexcel_alg_cfb_sm4 = {
                        .cra_driver_name = "safexcel-cfb-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3273,6 +3312,7 @@ struct safexcel_alg_template safexcel_alg_ctr_sm4 = {
                        .cra_driver_name = "safexcel-ctr-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3332,6 +3372,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_sm4 = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SM4_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3441,6 +3482,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sm3_cbc_sm4 = {
                        .cra_driver_name = "safexcel-authenc-hmac-sm3-cbc-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY |
                                     CRYPTO_ALG_NEED_FALLBACK,
                        .cra_blocksize = SM4_BLOCK_SIZE,
@@ -3476,6 +3518,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_ctr_sm4 = {
                        .cra_driver_name = "safexcel-authenc-hmac-sha1-ctr-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3510,6 +3553,7 @@ struct safexcel_alg_template safexcel_alg_authenc_hmac_sm3_ctr_sm4 = {
                        .cra_driver_name = "safexcel-authenc-hmac-sm3-ctr-sm4",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3578,6 +3622,7 @@ struct safexcel_alg_template safexcel_alg_rfc4106_gcm = {
                        .cra_driver_name = "safexcel-rfc4106-gcm-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3622,6 +3667,7 @@ struct safexcel_alg_template safexcel_alg_rfc4543_gcm = {
                        .cra_driver_name = "safexcel-rfc4543-gcm-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
@@ -3713,6 +3759,7 @@ struct safexcel_alg_template safexcel_alg_rfc4309_ccm = {
                        .cra_driver_name = "safexcel-rfc4309-ccm-aes",
                        .cra_priority = SAFEXCEL_CRA_PRIORITY,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = 1,
                        .cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
index 43962bc..16a4679 100644 (file)
@@ -992,6 +992,7 @@ struct safexcel_alg_template safexcel_alg_sha1 = {
                                .cra_driver_name = "safexcel-sha1",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA1_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1235,6 +1236,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha1 = {
                                .cra_driver_name = "safexcel-hmac-sha1",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA1_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1291,6 +1293,7 @@ struct safexcel_alg_template safexcel_alg_sha256 = {
                                .cra_driver_name = "safexcel-sha256",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA256_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1347,6 +1350,7 @@ struct safexcel_alg_template safexcel_alg_sha224 = {
                                .cra_driver_name = "safexcel-sha224",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA224_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1418,6 +1422,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha224 = {
                                .cra_driver_name = "safexcel-hmac-sha224",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA224_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1489,6 +1494,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha256 = {
                                .cra_driver_name = "safexcel-hmac-sha256",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA256_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1545,6 +1551,7 @@ struct safexcel_alg_template safexcel_alg_sha512 = {
                                .cra_driver_name = "safexcel-sha512",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA512_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1601,6 +1608,7 @@ struct safexcel_alg_template safexcel_alg_sha384 = {
                                .cra_driver_name = "safexcel-sha384",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA384_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1672,6 +1680,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha512 = {
                                .cra_driver_name = "safexcel-hmac-sha512",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA512_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1743,6 +1752,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha384 = {
                                .cra_driver_name = "safexcel-hmac-sha384",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SHA384_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1799,6 +1809,7 @@ struct safexcel_alg_template safexcel_alg_md5 = {
                                .cra_driver_name = "safexcel-md5",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1871,6 +1882,7 @@ struct safexcel_alg_template safexcel_alg_hmac_md5 = {
                                .cra_driver_name = "safexcel-hmac-md5",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -1952,6 +1964,7 @@ struct safexcel_alg_template safexcel_alg_crc32 = {
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_OPTIONAL_KEY |
                                             CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = 1,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -2041,6 +2054,7 @@ struct safexcel_alg_template safexcel_alg_cbcmac = {
                                .cra_driver_name = "safexcel-cbcmac-aes",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = 1,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -2136,6 +2150,7 @@ struct safexcel_alg_template safexcel_alg_xcbcmac = {
                                .cra_driver_name = "safexcel-xcbc-aes",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = AES_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -2232,6 +2247,7 @@ struct safexcel_alg_template safexcel_alg_cmac = {
                                .cra_driver_name = "safexcel-cmac-aes",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = AES_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -2288,6 +2304,7 @@ struct safexcel_alg_template safexcel_alg_sm3 = {
                                .cra_driver_name = "safexcel-sm3",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SM3_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
@@ -2359,6 +2376,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sm3 = {
                                .cra_driver_name = "safexcel-hmac-sm3",
                                .cra_priority = SAFEXCEL_CRA_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = SM3_BLOCK_SIZE,
                                .cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
index ad73fc9..f478bb0 100644 (file)
@@ -1402,7 +1402,8 @@ static int __init ixp_module_init(void)
 
                /* block ciphers */
                cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                     CRYPTO_ALG_ASYNC;
+                                     CRYPTO_ALG_ASYNC |
+                                     CRYPTO_ALG_ALLOCATES_MEMORY;
                if (!cra->setkey)
                        cra->setkey = ablk_setkey;
                if (!cra->encrypt)
@@ -1435,7 +1436,8 @@ static int __init ixp_module_init(void)
 
                /* authenc */
                cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                     CRYPTO_ALG_ASYNC;
+                                     CRYPTO_ALG_ASYNC |
+                                     CRYPTO_ALG_ALLOCATES_MEMORY;
                cra->setkey = cra->setkey ?: aead_setkey;
                cra->setauthsize = aead_setauthsize;
                cra->encrypt = aead_encrypt;
index 8a5f0b0..d63bca9 100644 (file)
@@ -438,7 +438,7 @@ static int mv_cesa_probe(struct platform_device *pdev)
        struct mv_cesa_dev *cesa;
        struct mv_cesa_engine *engines;
        struct resource *res;
-       int irq, ret, i;
+       int irq, ret, i, cpu;
        u32 sram_size;
 
        if (cesa_dev) {
@@ -505,6 +505,8 @@ static int mv_cesa_probe(struct platform_device *pdev)
                        goto err_cleanup;
                }
 
+               engine->irq = irq;
+
                /*
                 * Not all platforms can gate the CESA clocks: do not complain
                 * if the clock does not exist.
@@ -548,6 +550,10 @@ static int mv_cesa_probe(struct platform_device *pdev)
                if (ret)
                        goto err_cleanup;
 
+               /* Set affinity */
+               cpu = cpumask_local_spread(engine->id, NUMA_NO_NODE);
+               irq_set_affinity_hint(irq, get_cpu_mask(cpu));
+
                crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN);
                atomic_set(&engine->load, 0);
                INIT_LIST_HEAD(&engine->complete_queue);
@@ -570,6 +576,8 @@ err_cleanup:
                clk_disable_unprepare(cesa->engines[i].zclk);
                clk_disable_unprepare(cesa->engines[i].clk);
                mv_cesa_put_sram(pdev, i);
+               if (cesa->engines[i].irq > 0)
+                       irq_set_affinity_hint(cesa->engines[i].irq, NULL);
        }
 
        return ret;
@@ -586,6 +594,7 @@ static int mv_cesa_remove(struct platform_device *pdev)
                clk_disable_unprepare(cesa->engines[i].zclk);
                clk_disable_unprepare(cesa->engines[i].clk);
                mv_cesa_put_sram(pdev, i);
+               irq_set_affinity_hint(cesa->engines[i].irq, NULL);
        }
 
        return 0;
index e8632d5..0c9cbb6 100644 (file)
@@ -457,6 +457,7 @@ struct mv_cesa_engine {
        atomic_t load;
        struct mv_cesa_tdma_chain chain;
        struct list_head complete_queue;
+       int irq;
 };
 
 /**
index f133c2c..45b4d7a 100644 (file)
@@ -508,7 +508,8 @@ struct skcipher_alg mv_cesa_ecb_des_alg = {
                .cra_name = "ecb(des)",
                .cra_driver_name = "mv-ecb-des",
                .cra_priority = 300,
-               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                            CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = DES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct mv_cesa_des_ctx),
                .cra_alignmask = 0,
@@ -558,7 +559,8 @@ struct skcipher_alg mv_cesa_cbc_des_alg = {
                .cra_name = "cbc(des)",
                .cra_driver_name = "mv-cbc-des",
                .cra_priority = 300,
-               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                            CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = DES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct mv_cesa_des_ctx),
                .cra_alignmask = 0,
@@ -616,7 +618,8 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = {
                .cra_name = "ecb(des3_ede)",
                .cra_driver_name = "mv-ecb-des3-ede",
                .cra_priority = 300,
-               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                            CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct mv_cesa_des3_ctx),
                .cra_alignmask = 0,
@@ -669,7 +672,8 @@ struct skcipher_alg mv_cesa_cbc_des3_ede_alg = {
                .cra_name = "cbc(des3_ede)",
                .cra_driver_name = "mv-cbc-des3-ede",
                .cra_priority = 300,
-               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                            CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct mv_cesa_des3_ctx),
                .cra_alignmask = 0,
@@ -741,7 +745,8 @@ struct skcipher_alg mv_cesa_ecb_aes_alg = {
                .cra_name = "ecb(aes)",
                .cra_driver_name = "mv-ecb-aes",
                .cra_priority = 300,
-               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                            CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct mv_cesa_aes_ctx),
                .cra_alignmask = 0,
@@ -790,7 +795,8 @@ struct skcipher_alg mv_cesa_cbc_aes_alg = {
                .cra_name = "cbc(aes)",
                .cra_driver_name = "mv-cbc-aes",
                .cra_priority = 300,
-               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                            CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct mv_cesa_aes_ctx),
                .cra_alignmask = 0,
index b971284..bd0bd9f 100644 (file)
@@ -921,6 +921,7 @@ struct ahash_alg mv_md5_alg = {
                        .cra_driver_name = "mv-md5",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
@@ -991,6 +992,7 @@ struct ahash_alg mv_sha1_alg = {
                        .cra_driver_name = "mv-sha1",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SHA1_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
@@ -1064,6 +1066,7 @@ struct ahash_alg mv_sha256_alg = {
                        .cra_driver_name = "mv-sha256",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SHA256_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
@@ -1298,6 +1301,7 @@ struct ahash_alg mv_ahmac_md5_alg = {
                        .cra_driver_name = "mv-hmac-md5",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
@@ -1368,6 +1372,7 @@ struct ahash_alg mv_ahmac_sha1_alg = {
                        .cra_driver_name = "mv-hmac-sha1",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SHA1_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
@@ -1438,6 +1443,7 @@ struct ahash_alg mv_ahmac_sha256_alg = {
                        .cra_driver_name = "mv-hmac-sha256",
                        .cra_priority = 300,
                        .cra_flags = CRYPTO_ALG_ASYNC |
+                                    CRYPTO_ALG_ALLOCATES_MEMORY |
                                     CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .cra_blocksize = SHA256_BLOCK_SIZE,
                        .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
index fec8f3b..cc103b1 100644 (file)
@@ -878,11 +878,11 @@ static int copy_ucode_to_dma_mem(struct device *dev,
 
        /* Byte swap 64-bit */
        for (i = 0; i < (ucode->size / 8); i++)
-               ((u64 *)ucode->align_va)[i] =
+               ((__be64 *)ucode->align_va)[i] =
                                cpu_to_be64(((u64 *)ucode->align_va)[i]);
        /*  Ucode needs 16-bit swap */
        for (i = 0; i < (ucode->size / 2); i++)
-               ((u16 *)ucode->align_va)[i] =
+               ((__be16 *)ucode->align_va)[i] =
                                cpu_to_be16(((u16 *)ucode->align_va)[i]);
        return 0;
 }
@@ -1463,8 +1463,8 @@ int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev,
                                        struct otx_cpt_eng_grps *eng_grps,
                                        int pf_type)
 {
-       struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = { 0 };
-       struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+       struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = {};
+       struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = {};
        struct tar_arch_info_t *tar_arch = NULL;
        char *tar_filename;
        int i, ret = 0;
index 14f02b6..8620ac8 100644 (file)
@@ -74,7 +74,7 @@ struct otx_cpt_ucode_ver_num {
 struct otx_cpt_ucode_hdr {
        struct otx_cpt_ucode_ver_num ver_num;
        u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];
-       u32 code_length;
+       __be32 code_length;
        u32 padding[3];
 };
 
index 1e0a1d7..90bb313 100644 (file)
@@ -239,7 +239,6 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
        struct otx_cpt_fc_ctx *fctx = &rctx->fctx;
        int ivsize = crypto_skcipher_ivsize(stfm);
        u32 start = req->cryptlen - ivsize;
-       u64 *ctrl_flags = NULL;
        gfp_t flags;
 
        flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
@@ -280,8 +279,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
 
        memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm));
 
-       ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags;
-       *ctrl_flags = cpu_to_be64(*ctrl_flags);
+       fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.cflags);
 
        /*
         * Storing  Packet Data Information in offset
@@ -692,20 +690,17 @@ static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg)
 
 static inline void swap_data32(void *buf, u32 len)
 {
-       u32 *store = (u32 *) buf;
-       int i = 0;
-
-       for (i = 0 ; i < len/sizeof(u32); i++, store++)
-               *store = cpu_to_be32(*store);
+       cpu_to_be32_array(buf, buf, len / 4);
 }
 
 static inline void swap_data64(void *buf, u32 len)
 {
-       u64 *store = (u64 *) buf;
+       __be64 *dst = buf;
+       u64 *src = buf;
        int i = 0;
 
-       for (i = 0 ; i < len/sizeof(u64); i++, store++)
-               *store = cpu_to_be64(*store);
+       for (i = 0 ; i < len / 8; i++, src++, dst++)
+               *dst = cpu_to_be64p(src);
 }
 
 static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad)
@@ -1012,7 +1007,7 @@ static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc,
                /* Unknown cipher type */
                return -EINVAL;
        }
-       rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.flags);
+       rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.cflags);
 
        req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
        req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ;
@@ -1032,7 +1027,7 @@ static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc,
        fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
        fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type;
        fctx->enc.enc_ctrl.e.mac_len = mac_len;
-       fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.flags);
+       fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.cflags);
 
        /*
         * Storing Packet Data Information in offset
@@ -1306,7 +1301,7 @@ static int otx_cpt_aead_null_decrypt(struct aead_request *req)
 static struct skcipher_alg otx_cpt_skciphers[] = { {
        .base.cra_name = "xts(aes)",
        .base.cra_driver_name = "cpt_xts_aes",
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = AES_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
        .base.cra_alignmask = 7,
@@ -1323,7 +1318,7 @@ static struct skcipher_alg otx_cpt_skciphers[] = { {
 }, {
        .base.cra_name = "cbc(aes)",
        .base.cra_driver_name = "cpt_cbc_aes",
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = AES_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
        .base.cra_alignmask = 7,
@@ -1340,7 +1335,7 @@ static struct skcipher_alg otx_cpt_skciphers[] = { {
 }, {
        .base.cra_name = "ecb(aes)",
        .base.cra_driver_name = "cpt_ecb_aes",
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = AES_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
        .base.cra_alignmask = 7,
@@ -1357,7 +1352,7 @@ static struct skcipher_alg otx_cpt_skciphers[] = { {
 }, {
        .base.cra_name = "cfb(aes)",
        .base.cra_driver_name = "cpt_cfb_aes",
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = AES_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx),
        .base.cra_alignmask = 7,
@@ -1374,7 +1369,7 @@ static struct skcipher_alg otx_cpt_skciphers[] = { {
 }, {
        .base.cra_name = "cbc(des3_ede)",
        .base.cra_driver_name = "cpt_cbc_des3_ede",
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx),
        .base.cra_alignmask = 7,
@@ -1391,7 +1386,7 @@ static struct skcipher_alg otx_cpt_skciphers[] = { {
 }, {
        .base.cra_name = "ecb(des3_ede)",
        .base.cra_driver_name = "cpt_ecb_des3_ede",
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx),
        .base.cra_alignmask = 7,
@@ -1412,7 +1407,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha1),cbc(aes))",
                .cra_driver_name = "cpt_hmac_sha1_cbc_aes",
                .cra_blocksize = AES_BLOCK_SIZE,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1431,7 +1426,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha256),cbc(aes))",
                .cra_driver_name = "cpt_hmac_sha256_cbc_aes",
                .cra_blocksize = AES_BLOCK_SIZE,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1450,7 +1445,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha384),cbc(aes))",
                .cra_driver_name = "cpt_hmac_sha384_cbc_aes",
                .cra_blocksize = AES_BLOCK_SIZE,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1469,7 +1464,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha512),cbc(aes))",
                .cra_driver_name = "cpt_hmac_sha512_cbc_aes",
                .cra_blocksize = AES_BLOCK_SIZE,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1488,7 +1483,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha1),ecb(cipher_null))",
                .cra_driver_name = "cpt_hmac_sha1_ecb_null",
                .cra_blocksize = 1,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1507,7 +1502,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha256),ecb(cipher_null))",
                .cra_driver_name = "cpt_hmac_sha256_ecb_null",
                .cra_blocksize = 1,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1526,7 +1521,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha384),ecb(cipher_null))",
                .cra_driver_name = "cpt_hmac_sha384_ecb_null",
                .cra_blocksize = 1,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1545,7 +1540,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "authenc(hmac(sha512),ecb(cipher_null))",
                .cra_driver_name = "cpt_hmac_sha512_ecb_null",
                .cra_blocksize = 1,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
@@ -1564,7 +1559,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
                .cra_name = "rfc4106(gcm(aes))",
                .cra_driver_name = "cpt_rfc4106_gcm_aes",
                .cra_blocksize = 1,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
                .cra_priority = 4001,
                .cra_alignmask = 0,
index 67cc002..4181b5c 100644 (file)
@@ -66,7 +66,8 @@ enum otx_cpt_aes_key_len {
 };
 
 union otx_cpt_encr_ctrl {
-       u64 flags;
+       __be64 flags;
+       u64 cflags;
        struct {
 #if defined(__BIG_ENDIAN_BITFIELD)
                u64 enc_cipher:4;
@@ -138,7 +139,8 @@ struct otx_cpt_des3_ctx {
 };
 
 union otx_cpt_offset_ctrl_word {
-       u64 flags;
+       __be64 flags;
+       u64 cflags;
        struct {
 #if defined(__BIG_ENDIAN_BITFIELD)
                u64 reserved:32;
index 239195c..cbc3d78 100644 (file)
@@ -202,11 +202,10 @@ static inline int setup_sgio_list(struct pci_dev *pdev,
        info->dlen = dlen;
        info->in_buffer = (u8 *)info + info_len;
 
-       ((u16 *)info->in_buffer)[0] = req->outcnt;
-       ((u16 *)info->in_buffer)[1] = req->incnt;
+       ((__be16 *)info->in_buffer)[0] = cpu_to_be16(req->outcnt);
+       ((__be16 *)info->in_buffer)[1] = cpu_to_be16(req->incnt);
        ((u16 *)info->in_buffer)[2] = 0;
        ((u16 *)info->in_buffer)[3] = 0;
-       *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer);
 
        /* Setup gather (input) components */
        if (setup_sgio_components(pdev, req->in, req->incnt,
@@ -367,8 +366,6 @@ static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req,
        iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
        iq_cmd.cmd.s.dlen   = cpu_to_be16(cpt_req->dlen);
 
-       /* 64-bit swap for microcode data reads, not needed for addresses*/
-       iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64);
        iq_cmd.dptr = info->dptr_baddr;
        iq_cmd.rptr = info->rptr_baddr;
        iq_cmd.cptr.u64 = 0;
@@ -436,7 +433,7 @@ static int cpt_process_ccode(struct pci_dev *pdev,
        u8 ccode = cpt_status->s.compcode;
        union otx_cpt_error_code ecode;
 
-       ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer));
+       ecode.u = be64_to_cpup((__be64 *)cpt_info->out_buffer);
        switch (ccode) {
        case CPT_COMP_E_FAULT:
                dev_err(&pdev->dev,
index a4c9ff7..d912fe0 100644 (file)
@@ -92,10 +92,10 @@ union otx_cpt_ctrl_info {
 union otx_cpt_iq_cmd_word0 {
        u64 u64;
        struct {
-               u16 opcode;
-               u16 param1;
-               u16 param2;
-               u16 dlen;
+               __be16 opcode;
+               __be16 param1;
+               __be16 param2;
+               __be16 dlen;
        } s;
 };
 
@@ -123,16 +123,16 @@ struct otx_cpt_sglist_component {
        union {
                u64 len;
                struct {
-                       u16 len0;
-                       u16 len1;
-                       u16 len2;
-                       u16 len3;
+                       __be16 len0;
+                       __be16 len1;
+                       __be16 len2;
+                       __be16 len3;
                } s;
        } u;
-       u64 ptr0;
-       u64 ptr1;
-       u64 ptr2;
-       u64 ptr3;
+       __be64 ptr0;
+       __be64 ptr1;
+       __be64 ptr2;
+       __be64 ptr3;
 };
 
 struct otx_cpt_pending_entry {
index 78d660d..4ad3571 100644 (file)
@@ -137,8 +137,6 @@ struct mtk_aes_gcm_ctx {
 
        u32 authsize;
        size_t textlen;
-
-       struct crypto_skcipher *ctr;
 };
 
 struct mtk_aes_drv {
@@ -996,17 +994,8 @@ static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
                              u32 keylen)
 {
        struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead);
-       struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
-       struct crypto_skcipher *ctr = gctx->ctr;
-       struct {
-               u32 hash[4];
-               u8 iv[8];
-
-               struct crypto_wait wait;
-
-               struct scatterlist sg[1];
-               struct skcipher_request req;
-       } *data;
+       u8 hash[AES_BLOCK_SIZE] __aligned(4) = {};
+       struct crypto_aes_ctx aes_ctx;
        int err;
 
        switch (keylen) {
@@ -1026,39 +1015,18 @@ static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 
        ctx->keylen = SIZE_IN_WORDS(keylen);
 
-       /* Same as crypto_gcm_setkey() from crypto/gcm.c */
-       crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
-       crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
-                                 CRYPTO_TFM_REQ_MASK);
-       err = crypto_skcipher_setkey(ctr, key, keylen);
+       err = aes_expandkey(&aes_ctx, key, keylen);
        if (err)
                return err;
 
-       data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr),
-                      GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       crypto_init_wait(&data->wait);
-       sg_init_one(data->sg, &data->hash, AES_BLOCK_SIZE);
-       skcipher_request_set_tfm(&data->req, ctr);
-       skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
-                                     CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                     crypto_req_done, &data->wait);
-       skcipher_request_set_crypt(&data->req, data->sg, data->sg,
-                                  AES_BLOCK_SIZE, data->iv);
-
-       err = crypto_wait_req(crypto_skcipher_encrypt(&data->req),
-                             &data->wait);
-       if (err)
-               goto out;
+       aes_encrypt(&aes_ctx, hash, hash);
+       memzero_explicit(&aes_ctx, sizeof(aes_ctx));
 
        mtk_aes_write_state_le(ctx->key, (const u32 *)key, keylen);
-       mtk_aes_write_state_be(ctx->key + ctx->keylen, data->hash,
+       mtk_aes_write_state_be(ctx->key + ctx->keylen, (const u32 *)hash,
                               AES_BLOCK_SIZE);
-out:
-       kzfree(data);
-       return err;
+
+       return 0;
 }
 
 static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead,
@@ -1095,32 +1063,17 @@ static int mtk_aes_gcm_init(struct crypto_aead *aead)
 {
        struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
 
-       ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0,
-                                        CRYPTO_ALG_ASYNC);
-       if (IS_ERR(ctx->ctr)) {
-               pr_err("Error allocating ctr(aes)\n");
-               return PTR_ERR(ctx->ctr);
-       }
-
        crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx));
        ctx->base.start = mtk_aes_gcm_start;
        return 0;
 }
 
-static void mtk_aes_gcm_exit(struct crypto_aead *aead)
-{
-       struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
-
-       crypto_free_skcipher(ctx->ctr);
-}
-
 static struct aead_alg aes_gcm_alg = {
        .setkey         = mtk_aes_gcm_setkey,
        .setauthsize    = mtk_aes_gcm_setauthsize,
        .encrypt        = mtk_aes_gcm_encrypt,
        .decrypt        = mtk_aes_gcm_decrypt,
        .init           = mtk_aes_gcm_init,
-       .exit           = mtk_aes_gcm_exit,
        .ivsize         = GCM_AES_IV_SIZE,
        .maxauthsize    = AES_BLOCK_SIZE,
 
index d845302..909a7eb 100644 (file)
@@ -97,7 +97,7 @@ struct dcp_async_ctx {
        unsigned int                    hot:1;
 
        /* Crypto-specific context */
-       struct crypto_sync_skcipher     *fallback;
+       struct crypto_skcipher          *fallback;
        unsigned int                    key_len;
        uint8_t                         key[AES_KEYSIZE_128];
 };
@@ -105,6 +105,7 @@ struct dcp_async_ctx {
 struct dcp_aes_req_ctx {
        unsigned int    enc:1;
        unsigned int    ecb:1;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 struct dcp_sha_req_ctx {
@@ -426,21 +427,20 @@ static int dcp_chan_thread_aes(void *data)
 static int mxs_dcp_block_fallback(struct skcipher_request *req, int enc)
 {
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
        struct dcp_async_ctx *ctx = crypto_skcipher_ctx(tfm);
-       SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
        int ret;
 
-       skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-       skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
-       skcipher_request_set_crypt(subreq, req->src, req->dst,
+       skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+       skcipher_request_set_callback(&rctx->fallback_req, req->base.flags,
+                                     req->base.complete, req->base.data);
+       skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst,
                                   req->cryptlen, req->iv);
 
        if (enc)
-               ret = crypto_skcipher_encrypt(subreq);
+               ret = crypto_skcipher_encrypt(&rctx->fallback_req);
        else
-               ret = crypto_skcipher_decrypt(subreq);
-
-       skcipher_request_zero(subreq);
+               ret = crypto_skcipher_decrypt(&rctx->fallback_req);
 
        return ret;
 }
@@ -510,24 +510,25 @@ static int mxs_dcp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
         * but is supported by in-kernel software implementation, we use
         * software fallback.
         */
-       crypto_sync_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(actx->fallback,
+       crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(actx->fallback,
                                  tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
-       return crypto_sync_skcipher_setkey(actx->fallback, key, len);
+       return crypto_skcipher_setkey(actx->fallback, key, len);
 }
 
 static int mxs_dcp_aes_fallback_init_tfm(struct crypto_skcipher *tfm)
 {
        const char *name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm));
        struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm);
-       struct crypto_sync_skcipher *blk;
+       struct crypto_skcipher *blk;
 
-       blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+       blk = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(blk))
                return PTR_ERR(blk);
 
        actx->fallback = blk;
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct dcp_aes_req_ctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct dcp_aes_req_ctx) +
+                                        crypto_skcipher_reqsize(blk));
        return 0;
 }
 
@@ -535,7 +536,7 @@ static void mxs_dcp_aes_fallback_exit_tfm(struct crypto_skcipher *tfm)
 {
        struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm);
 
-       crypto_free_sync_skcipher(actx->fallback);
+       crypto_free_skcipher(actx->fallback);
 }
 
 /*
index 6a828bb..d8aec51 100644 (file)
@@ -1382,7 +1382,8 @@ static int __n2_register_one_skcipher(const struct n2_skcipher_tmpl *tmpl)
        snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
        snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->drv_name);
        alg->base.cra_priority = N2_CRA_PRIORITY;
-       alg->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC;
+       alg->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC |
+                             CRYPTO_ALG_ALLOCATES_MEMORY;
        alg->base.cra_blocksize = tmpl->block_size;
        p->enc_type = tmpl->enc_type;
        alg->base.cra_ctxsize = sizeof(struct n2_skcipher_context);
index b5aff20..4fd14d9 100644 (file)
@@ -139,7 +139,7 @@ int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 
        for (i = 0; i < key32; i++) {
                omap_aes_write(dd, AES_REG_KEY(dd, i),
-                       __le32_to_cpu(dd->ctx->key[i]));
+                              (__force u32)cpu_to_le32(dd->ctx->key[i]));
        }
 
        if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->iv)
@@ -363,7 +363,7 @@ int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 {
        int err;
 
-       pr_debug("total: %d\n", dd->total);
+       pr_debug("total: %zu\n", dd->total);
 
        if (!dd->pio_only) {
                err = dma_map_sg(dd->dev, dd->in_sg, dd->in_sg_len,
@@ -409,7 +409,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 
 int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 {
-       pr_debug("total: %d\n", dd->total);
+       pr_debug("total: %zu\n", dd->total);
 
        omap_aes_dma_stop(dd);
 
@@ -548,20 +548,18 @@ static int omap_aes_crypt(struct skcipher_request *req, unsigned long mode)
                  !!(mode & FLAGS_CBC));
 
        if (req->cryptlen < aes_fallback_sz) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-               skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-               skcipher_request_set_callback(subreq, req->base.flags, NULL,
-                                             NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
+               skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
 
                if (mode & FLAGS_ENCRYPT)
-                       ret = crypto_skcipher_encrypt(subreq);
+                       ret = crypto_skcipher_encrypt(&rctx->fallback_req);
                else
-                       ret = crypto_skcipher_decrypt(subreq);
-
-               skcipher_request_zero(subreq);
+                       ret = crypto_skcipher_decrypt(&rctx->fallback_req);
                return ret;
        }
        dd = omap_aes_find_dev(rctx);
@@ -590,11 +588,11 @@ static int omap_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
        memcpy(ctx->key, key, keylen);
        ctx->keylen = keylen;
 
-       crypto_sync_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+       crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
                                                 CRYPTO_TFM_REQ_MASK);
 
-       ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+       ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
        if (!ret)
                return 0;
 
@@ -640,15 +638,16 @@ static int omap_aes_init_tfm(struct crypto_skcipher *tfm)
 {
        const char *name = crypto_tfm_alg_name(&tfm->base);
        struct omap_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-       struct crypto_sync_skcipher *blk;
+       struct crypto_skcipher *blk;
 
-       blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+       blk = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(blk))
                return PTR_ERR(blk);
 
        ctx->fallback = blk;
 
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_aes_reqctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_aes_reqctx) +
+                                        crypto_skcipher_reqsize(blk));
 
        ctx->enginectx.op.prepare_request = omap_aes_prepare_req;
        ctx->enginectx.op.unprepare_request = NULL;
@@ -662,7 +661,7 @@ static void omap_aes_exit_tfm(struct crypto_skcipher *tfm)
        struct omap_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
        if (ctx->fallback)
-               crypto_free_sync_skcipher(ctx->fallback);
+               crypto_free_skcipher(ctx->fallback);
 
        ctx->fallback = NULL;
 }
index 2d111bf..23d073e 100644 (file)
@@ -97,7 +97,7 @@ struct omap_aes_ctx {
        int             keylen;
        u32             key[AES_KEYSIZE_256 / sizeof(u32)];
        u8              nonce[4];
-       struct crypto_sync_skcipher     *fallback;
+       struct crypto_skcipher  *fallback;
 };
 
 struct omap_aes_gcm_ctx {
@@ -110,6 +110,7 @@ struct omap_aes_reqctx {
        unsigned long mode;
        u8 iv[AES_BLOCK_SIZE];
        u32 auth_tag[AES_BLOCK_SIZE / sizeof(u32)];
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 #define OMAP_AES_QUEUE_LENGTH  1
index 8eda433..c9d38bc 100644 (file)
@@ -87,7 +87,7 @@ struct omap_des_ctx {
        struct omap_des_dev *dd;
 
        int             keylen;
-       u32             key[(3 * DES_KEY_SIZE) / sizeof(u32)];
+       __le32          key[(3 * DES_KEY_SIZE) / sizeof(u32)];
        unsigned long   flags;
 };
 
@@ -461,7 +461,7 @@ static int omap_des_crypt_dma_start(struct omap_des_dev *dd)
                                        crypto_skcipher_reqtfm(dd->req));
        int err;
 
-       pr_debug("total: %d\n", dd->total);
+       pr_debug("total: %zd\n", dd->total);
 
        if (!dd->pio_only) {
                err = dma_map_sg(dd->dev, dd->in_sg, dd->in_sg_len,
@@ -504,7 +504,7 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err)
 
 static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
 {
-       pr_debug("total: %d\n", dd->total);
+       pr_debug("total: %zd\n", dd->total);
 
        omap_des_dma_stop(dd);
 
index 82691a0..954d703 100644 (file)
@@ -357,10 +357,10 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
 
        if (big_endian)
                for (i = 0; i < d; i++)
-                       hash[i] = be32_to_cpu(in[i]);
+                       hash[i] = be32_to_cpup((__be32 *)in + i);
        else
                for (i = 0; i < d; i++)
-                       hash[i] = le32_to_cpu(in[i]);
+                       hash[i] = le32_to_cpup((__le32 *)in + i);
 }
 
 static int omap_sham_hw_init(struct omap_sham_dev *dd)
@@ -522,7 +522,7 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, size_t length,
        int mlen;
        struct sg_mapping_iter mi;
 
-       dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+       dev_dbg(dd->dev, "xmit_cpu: digcnt: %zd, length: %zd, final: %d\n",
                                                ctx->digcnt, length, final);
 
        dd->pdata->write_ctrl(dd, length, final, 0);
@@ -588,7 +588,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, size_t length,
        struct dma_slave_config cfg;
        int ret;
 
-       dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
+       dev_dbg(dd->dev, "xmit_dma: digcnt: %zd, length: %zd, final: %d\n",
                                                ctx->digcnt, length, final);
 
        if (!dma_map_sg(dd->dev, ctx->sg, ctx->sg_len, DMA_TO_DEVICE)) {
@@ -871,7 +871,7 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update)
                nbytes += req->nbytes - rctx->offset;
 
        dev_dbg(rctx->dd->dev,
-               "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n",
+               "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%zd\n",
                __func__, nbytes, bs, rctx->total, rctx->offset,
                rctx->bufcnt);
 
@@ -932,7 +932,7 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
        return 0;
 }
 
-struct omap_sham_dev *omap_sham_find_dev(struct omap_sham_reqctx *ctx)
+static struct omap_sham_dev *omap_sham_find_dev(struct omap_sham_reqctx *ctx)
 {
        struct omap_sham_dev *dd;
 
@@ -1023,7 +1023,7 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
        bool final = (ctx->flags & BIT(FLAGS_FINUP)) &&
                        !(dd->flags & BIT(FLAGS_HUGE));
 
-       dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, final: %d",
+       dev_dbg(dd->dev, "update_req: total: %u, digcnt: %zd, final: %d",
                ctx->total, ctx->digcnt, final);
 
        if (ctx->total < get_block_size(ctx) ||
@@ -1036,7 +1036,7 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
                err = omap_sham_xmit_dma(dd, ctx->total, final);
 
        /* wait for dma completion before can take more data */
-       dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
+       dev_dbg(dd->dev, "update: err: %d, digcnt: %zd\n", err, ctx->digcnt);
 
        return err;
 }
@@ -1097,7 +1097,7 @@ static int omap_sham_finish(struct ahash_request *req)
                        err = omap_sham_finish_hmac(req);
        }
 
-       dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt);
+       dev_dbg(dd->dev, "digcnt: %zd, bufcnt: %zd\n", ctx->digcnt, ctx->bufcnt);
 
        return err;
 }
index 7384e91..dac6eb3 100644 (file)
@@ -86,6 +86,7 @@ struct spacc_req {
        dma_addr_t                      src_addr, dst_addr;
        struct spacc_ddt                *src_ddt, *dst_ddt;
        void                            (*complete)(struct spacc_req *req);
+       struct skcipher_request         fallback_req;   // keep at the end
 };
 
 struct spacc_aead {
@@ -158,7 +159,7 @@ struct spacc_ablk_ctx {
         * The fallback cipher. If the operation can't be done in hardware,
         * fallback to a software version.
         */
-       struct crypto_sync_skcipher     *sw_cipher;
+       struct crypto_skcipher          *sw_cipher;
 };
 
 /* AEAD cipher context. */
@@ -792,13 +793,13 @@ static int spacc_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
                 * Set the fallback transform to use the same request flags as
                 * the hardware transform.
                 */
-               crypto_sync_skcipher_clear_flags(ctx->sw_cipher,
+               crypto_skcipher_clear_flags(ctx->sw_cipher,
                                            CRYPTO_TFM_REQ_MASK);
-               crypto_sync_skcipher_set_flags(ctx->sw_cipher,
+               crypto_skcipher_set_flags(ctx->sw_cipher,
                                          cipher->base.crt_flags &
                                          CRYPTO_TFM_REQ_MASK);
 
-               err = crypto_sync_skcipher_setkey(ctx->sw_cipher, key, len);
+               err = crypto_skcipher_setkey(ctx->sw_cipher, key, len);
                if (err)
                        goto sw_setkey_failed;
        }
@@ -900,7 +901,7 @@ static int spacc_ablk_do_fallback(struct skcipher_request *req,
        struct crypto_tfm *old_tfm =
            crypto_skcipher_tfm(crypto_skcipher_reqtfm(req));
        struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm);
-       SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher);
+       struct spacc_req *dev_req = skcipher_request_ctx(req);
        int err;
 
        /*
@@ -908,13 +909,13 @@ static int spacc_ablk_do_fallback(struct skcipher_request *req,
         * the ciphering has completed, put the old transform back into the
         * request.
         */
-       skcipher_request_set_sync_tfm(subreq, ctx->sw_cipher);
-       skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL);
-       skcipher_request_set_crypt(subreq, req->src, req->dst,
+       skcipher_request_set_tfm(&dev_req->fallback_req, ctx->sw_cipher);
+       skcipher_request_set_callback(&dev_req->fallback_req, req->base.flags,
+                                     req->base.complete, req->base.data);
+       skcipher_request_set_crypt(&dev_req->fallback_req, req->src, req->dst,
                                   req->cryptlen, req->iv);
-       err = is_encrypt ? crypto_skcipher_encrypt(subreq) :
-                          crypto_skcipher_decrypt(subreq);
-       skcipher_request_zero(subreq);
+       err = is_encrypt ? crypto_skcipher_encrypt(&dev_req->fallback_req) :
+                          crypto_skcipher_decrypt(&dev_req->fallback_req);
 
        return err;
 }
@@ -1007,19 +1008,24 @@ static int spacc_ablk_init_tfm(struct crypto_skcipher *tfm)
        ctx->generic.flags = spacc_alg->type;
        ctx->generic.engine = engine;
        if (alg->base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
-               ctx->sw_cipher = crypto_alloc_sync_skcipher(
-                       alg->base.cra_name, 0, CRYPTO_ALG_NEED_FALLBACK);
+               ctx->sw_cipher = crypto_alloc_skcipher(alg->base.cra_name, 0,
+                                                      CRYPTO_ALG_NEED_FALLBACK);
                if (IS_ERR(ctx->sw_cipher)) {
                        dev_warn(engine->dev, "failed to allocate fallback for %s\n",
                                 alg->base.cra_name);
                        return PTR_ERR(ctx->sw_cipher);
                }
+               crypto_skcipher_set_reqsize(tfm, sizeof(struct spacc_req) +
+                                                crypto_skcipher_reqsize(ctx->sw_cipher));
+       } else {
+               /* take the size without the fallback skcipher_request at the end */
+               crypto_skcipher_set_reqsize(tfm, offsetof(struct spacc_req,
+                                                         fallback_req));
        }
+
        ctx->generic.key_offs = spacc_alg->key_offs;
        ctx->generic.iv_offs = spacc_alg->iv_offs;
 
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct spacc_req));
-
        return 0;
 }
 
@@ -1027,7 +1033,7 @@ static void spacc_ablk_exit_tfm(struct crypto_skcipher *tfm)
 {
        struct spacc_ablk_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       crypto_free_sync_skcipher(ctx->sw_cipher);
+       crypto_free_skcipher(ctx->sw_cipher);
 }
 
 static int spacc_ablk_encrypt(struct skcipher_request *req)
@@ -1226,6 +1232,7 @@ static struct spacc_alg ipsec_engine_algs[] = {
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_KERN_DRIVER_ONLY |
                                                  CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                                  CRYPTO_ALG_NEED_FALLBACK,
                        .base.cra_blocksize     = AES_BLOCK_SIZE,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
@@ -1251,6 +1258,7 @@ static struct spacc_alg ipsec_engine_algs[] = {
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_KERN_DRIVER_ONLY |
                                                  CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                                  CRYPTO_ALG_NEED_FALLBACK,
                        .base.cra_blocksize     = AES_BLOCK_SIZE,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
@@ -1274,7 +1282,8 @@ static struct spacc_alg ipsec_engine_algs[] = {
                        .base.cra_driver_name   = "cbc-des-picoxcell",
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                                 CRYPTO_ALG_ASYNC,
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY,
                        .base.cra_blocksize     = DES_BLOCK_SIZE,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
                        .base.cra_module        = THIS_MODULE,
@@ -1298,7 +1307,8 @@ static struct spacc_alg ipsec_engine_algs[] = {
                        .base.cra_driver_name   = "ecb-des-picoxcell",
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_KERN_DRIVER_ONLY |
-                                                 CRYPTO_ALG_ASYNC,
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY,
                        .base.cra_blocksize     = DES_BLOCK_SIZE,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
                        .base.cra_module        = THIS_MODULE,
@@ -1321,6 +1331,7 @@ static struct spacc_alg ipsec_engine_algs[] = {
                        .base.cra_driver_name   = "cbc-des3-ede-picoxcell",
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                                  CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .base.cra_blocksize     = DES3_EDE_BLOCK_SIZE,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
@@ -1345,6 +1356,7 @@ static struct spacc_alg ipsec_engine_algs[] = {
                        .base.cra_driver_name   = "ecb-des3-ede-picoxcell",
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                                  CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .base.cra_blocksize     = DES3_EDE_BLOCK_SIZE,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
@@ -1376,6 +1388,7 @@ static struct spacc_aead ipsec_engine_aeads[] = {
                                                   "cbc-aes-picoxcell",
                                .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_NEED_FALLBACK |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = AES_BLOCK_SIZE,
@@ -1406,6 +1419,7 @@ static struct spacc_aead ipsec_engine_aeads[] = {
                                                   "cbc-aes-picoxcell",
                                .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_NEED_FALLBACK |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = AES_BLOCK_SIZE,
@@ -1436,6 +1450,7 @@ static struct spacc_aead ipsec_engine_aeads[] = {
                                                   "cbc-aes-picoxcell",
                                .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_NEED_FALLBACK |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = AES_BLOCK_SIZE,
@@ -1466,6 +1481,7 @@ static struct spacc_aead ipsec_engine_aeads[] = {
                                                   "cbc-3des-picoxcell",
                                .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_NEED_FALLBACK |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1497,6 +1513,7 @@ static struct spacc_aead ipsec_engine_aeads[] = {
                                                   "cbc-3des-picoxcell",
                                .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_NEED_FALLBACK |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1527,6 +1544,7 @@ static struct spacc_aead ipsec_engine_aeads[] = {
                                                   "cbc-3des-picoxcell",
                                .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
                                .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY |
                                             CRYPTO_ALG_NEED_FALLBACK |
                                             CRYPTO_ALG_KERN_DRIVER_ONLY,
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1556,6 +1574,7 @@ static struct spacc_alg l2_engine_algs[] = {
                        .base.cra_driver_name   = "f8-kasumi-picoxcell",
                        .base.cra_priority      = SPACC_CRYPTO_ALG_PRIORITY,
                        .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_ALLOCATES_MEMORY |
                                                  CRYPTO_ALG_KERN_DRIVER_ONLY,
                        .base.cra_blocksize     = 8,
                        .base.cra_ctxsize       = sizeof(struct spacc_ablk_ctx),
index 6bc68bc..aee494d 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in
-         the documentation and/or other materials provided with the
-         distribution.
-       * Neither the name of Intel Corporation nor the names of its
-         contributors may be used to endorse or promote products derived
-         from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_pf2vf_msg.h>
index afc9a0a..8b5dd2c 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_C3XXX_HW_DATA_H_
 #define ADF_C3XXX_HW_DATA_H_
 
index d937cc7..020d099 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
index d2d0ae4..d2fedbd 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_pf2vf_msg.h>
 #include <adf_common_drv.h>
index 934f216..7945a9c 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #ifndef ADF_C3XXXVF_HW_DATA_H_
 #define ADF_C3XXXVF_HW_DATA_H_
 
index 1dc5ac8..11039fe 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
index 618cec3..844ad5e 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in
-         the documentation and/or other materials provided with the
-         distribution.
-       * Neither the name of Intel Corporation nor the names of its
-         contributors may be used to endorse or promote products derived
-         from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_pf2vf_msg.h>
index 17a8a32..88504d2 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_C62X_HW_DATA_H_
 #define ADF_C62X_HW_DATA_H_
 
index 2bc06c8..4ba9c14 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
index 38e4bc0..29fd3f1 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_pf2vf_msg.h>
 #include <adf_common_drv.h>
index a28d83e..a6c04cf 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #ifndef ADF_C62XVF_HW_DATA_H_
 #define ADF_C62XVF_HW_DATA_H_
 
index a68358b..b8b021d 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
index 33f0a62..c1db8c2 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_ACCEL_DEVICES_H_
 #define ADF_ACCEL_DEVICES_H_
 #include <linux/interrupt.h>
@@ -103,8 +59,8 @@ struct adf_accel_pci {
        struct pci_dev *pci_dev;
        struct adf_accel_msix msix_entries;
        struct adf_bar pci_bars[ADF_PCI_MAX_BARS];
-       uint8_t revid;
-       uint8_t sku;
+       u8 revid;
+       u8 sku;
 } __packed;
 
 enum dev_state {
@@ -144,7 +100,7 @@ static inline const char *get_sku_info(enum dev_sku_info info)
 struct adf_hw_device_class {
        const char *name;
        const enum adf_device_type type;
-       uint32_t instances;
+       u32 instances;
 } __packed;
 
 struct adf_cfg_device_data;
@@ -154,15 +110,15 @@ struct adf_etr_ring_data;
 
 struct adf_hw_device_data {
        struct adf_hw_device_class *dev_class;
-       uint32_t (*get_accel_mask)(uint32_t fuse);
-       uint32_t (*get_ae_mask)(uint32_t fuse);
-       uint32_t (*get_sram_bar_id)(struct adf_hw_device_data *self);
-       uint32_t (*get_misc_bar_id)(struct adf_hw_device_data *self);
-       uint32_t (*get_etr_bar_id)(struct adf_hw_device_data *self);
-       uint32_t (*get_num_aes)(struct adf_hw_device_data *self);
-       uint32_t (*get_num_accels)(struct adf_hw_device_data *self);
-       uint32_t (*get_pf2vf_offset)(uint32_t i);
-       uint32_t (*get_vintmsk_offset)(uint32_t i);
+       u32 (*get_accel_mask)(u32 fuse);
+       u32 (*get_ae_mask)(u32 fuse);
+       u32 (*get_sram_bar_id)(struct adf_hw_device_data *self);
+       u32 (*get_misc_bar_id)(struct adf_hw_device_data *self);
+       u32 (*get_etr_bar_id)(struct adf_hw_device_data *self);
+       u32 (*get_num_aes)(struct adf_hw_device_data *self);
+       u32 (*get_num_accels)(struct adf_hw_device_data *self);
+       u32 (*get_pf2vf_offset)(u32 i);
+       u32 (*get_vintmsk_offset)(u32 i);
        enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self);
        int (*alloc_irq)(struct adf_accel_dev *accel_dev);
        void (*free_irq)(struct adf_accel_dev *accel_dev);
@@ -173,25 +129,25 @@ struct adf_hw_device_data {
        int (*init_arb)(struct adf_accel_dev *accel_dev);
        void (*exit_arb)(struct adf_accel_dev *accel_dev);
        void (*get_arb_mapping)(struct adf_accel_dev *accel_dev,
-                               const uint32_t **cfg);
+                               const u32 **cfg);
        void (*disable_iov)(struct adf_accel_dev *accel_dev);
        void (*enable_ints)(struct adf_accel_dev *accel_dev);
        int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev);
        void (*reset_device)(struct adf_accel_dev *accel_dev);
        const char *fw_name;
        const char *fw_mmp_name;
-       uint32_t fuses;
-       uint32_t accel_capabilities_mask;
-       uint32_t instance_id;
-       uint16_t accel_mask;
-       uint16_t ae_mask;
-       uint16_t tx_rings_mask;
-       uint8_t tx_rx_gap;
-       uint8_t num_banks;
-       uint8_t num_accel;
-       uint8_t num_logical_accel;
-       uint8_t num_engines;
-       uint8_t min_iov_compat_ver;
+       u32 fuses;
+       u32 accel_capabilities_mask;
+       u32 instance_id;
+       u16 accel_mask;
+       u16 ae_mask;
+       u16 tx_rings_mask;
+       u8 tx_rx_gap;
+       u8 num_banks;
+       u8 num_accel;
+       u8 num_logical_accel;
+       u8 num_engines;
+       u8 min_iov_compat_ver;
 } __packed;
 
 /* CSR write macro */
@@ -248,8 +204,8 @@ struct adf_accel_dev {
                        struct tasklet_struct pf2vf_bh_tasklet;
                        struct mutex vf2pf_lock; /* protect CSR access */
                        struct completion iov_msg_completion;
-                       uint8_t compatible;
-                       uint8_t pf_version;
+                       u8 compatible;
+                       u8 pf_version;
                } vf;
        };
        bool is_vf;
index a42fc42..c8ad85b 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/firmware.h>
 #include <linux/pci.h>
 #include "adf_cfg.h"
@@ -118,7 +74,7 @@ int adf_ae_start(struct adf_accel_dev *accel_dev)
 {
        struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
-       uint32_t ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev);
+       u32 ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev);
 
        if (!hw_data->fw_name)
                return 0;
@@ -139,7 +95,7 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev)
 {
        struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
-       uint32_t ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev);
+       u32 ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev);
 
        if (!hw_data->fw_name)
                return 0;
index d28cba3..1c8ca15 100644 (file)
@@ -1,53 +1,9 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
+#include <linux/iopoll.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include "adf_accel_devices.h"
@@ -60,6 +16,9 @@
 #define ADF_DH895XCC_MAILBOX_BASE_OFFSET 0x20970
 #define ADF_DH895XCC_MAILBOX_STRIDE 0x1000
 #define ADF_ADMINMSG_LEN 32
+#define ADF_CONST_TABLE_SIZE 1024
+#define ADF_ADMIN_POLL_DELAY_US 20
+#define ADF_ADMIN_POLL_TIMEOUT_US (5 * USEC_PER_SEC)
 
 static const u8 const_tab[1024] __aligned(1024) = {
 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -154,11 +113,13 @@ struct adf_admin_comms {
 static int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev, u32 ae,
                                  void *in, void *out)
 {
+       int ret;
+       u32 status;
        struct adf_admin_comms *admin = accel_dev->admin;
        int offset = ae * ADF_ADMINMSG_LEN * 2;
        void __iomem *mailbox = admin->mailbox_addr;
        int mb_offset = ae * ADF_DH895XCC_MAILBOX_STRIDE;
-       int times, received;
+       struct icp_qat_fw_init_admin_req *request = in;
 
        mutex_lock(&admin->lock);
 
@@ -169,46 +130,71 @@ static int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev, u32 ae,
 
        memcpy(admin->virt_addr + offset, in, ADF_ADMINMSG_LEN);
        ADF_CSR_WR(mailbox, mb_offset, 1);
-       received = 0;
-       for (times = 0; times < 50; times++) {
-               msleep(20);
-               if (ADF_CSR_RD(mailbox, mb_offset) == 0) {
-                       received = 1;
-                       break;
-               }
-       }
-       if (received)
+
+       ret = readl_poll_timeout(mailbox + mb_offset, status,
+                                status == 0, ADF_ADMIN_POLL_DELAY_US,
+                                ADF_ADMIN_POLL_TIMEOUT_US);
+       if (ret < 0) {
+               /* Response timeout */
+               dev_err(&GET_DEV(accel_dev),
+                       "Failed to send admin msg %d to accelerator %d\n",
+                       request->cmd_id, ae);
+       } else {
+               /* Response received from admin message, we can now
+                * make response data available in "out" parameter.
+                */
                memcpy(out, admin->virt_addr + offset +
                       ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN);
-       else
-               dev_err(&GET_DEV(accel_dev),
-                       "Failed to send admin msg to accelerator\n");
+       }
 
        mutex_unlock(&admin->lock);
-       return received ? 0 : -EFAULT;
+       return ret;
+}
+
+static int adf_send_admin(struct adf_accel_dev *accel_dev,
+                         struct icp_qat_fw_init_admin_req *req,
+                         struct icp_qat_fw_init_admin_resp *resp,
+                         const unsigned long ae_mask)
+{
+       u32 ae;
+
+       for_each_set_bit(ae, &ae_mask, ICP_QAT_HW_AE_DELIMITER)
+               if (adf_put_admin_msg_sync(accel_dev, ae, req, resp) ||
+                   resp->status)
+                       return -EFAULT;
+
+       return 0;
 }
 
-static int adf_send_admin_cmd(struct adf_accel_dev *accel_dev, int cmd)
+static int adf_init_me(struct adf_accel_dev *accel_dev)
 {
+       struct icp_qat_fw_init_admin_req req;
+       struct icp_qat_fw_init_admin_resp resp;
        struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+       u32 ae_mask = hw_device->ae_mask;
+
+       memset(&req, 0, sizeof(req));
+       memset(&resp, 0, sizeof(resp));
+       req.cmd_id = ICP_QAT_FW_INIT_ME;
+
+       return adf_send_admin(accel_dev, &req, &resp, ae_mask);
+}
+
+static int adf_set_fw_constants(struct adf_accel_dev *accel_dev)
+{
        struct icp_qat_fw_init_admin_req req;
        struct icp_qat_fw_init_admin_resp resp;
-       int i;
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+       u32 ae_mask = hw_device->ae_mask;
 
-       memset(&req, 0, sizeof(struct icp_qat_fw_init_admin_req));
-       req.init_admin_cmd_id = cmd;
+       memset(&req, 0, sizeof(req));
+       memset(&resp, 0, sizeof(resp));
+       req.cmd_id = ICP_QAT_FW_CONSTANTS_CFG;
 
-       if (cmd == ICP_QAT_FW_CONSTANTS_CFG) {
-               req.init_cfg_sz = 1024;
-               req.init_cfg_ptr = accel_dev->admin->const_tbl_addr;
-       }
-       for (i = 0; i < hw_device->get_num_aes(hw_device); i++) {
-               memset(&resp, 0, sizeof(struct icp_qat_fw_init_admin_resp));
-               if (adf_put_admin_msg_sync(accel_dev, i, &req, &resp) ||
-                   resp.init_resp_hdr.status)
-                       return -EFAULT;
-       }
-       return 0;
+       req.init_cfg_sz = ADF_CONST_TABLE_SIZE;
+       req.init_cfg_ptr = accel_dev->admin->const_tbl_addr;
+
+       return adf_send_admin(accel_dev, &req, &resp, ae_mask);
 }
 
 /**
@@ -221,11 +207,13 @@ static int adf_send_admin_cmd(struct adf_accel_dev *accel_dev, int cmd)
  */
 int adf_send_admin_init(struct adf_accel_dev *accel_dev)
 {
-       int ret = adf_send_admin_cmd(accel_dev, ICP_QAT_FW_INIT_ME);
+       int ret;
 
+       ret = adf_init_me(accel_dev);
        if (ret)
                return ret;
-       return adf_send_admin_cmd(accel_dev, ICP_QAT_FW_CONSTANTS_CFG);
+
+       return adf_set_fw_constants(accel_dev);
 }
 EXPORT_SYMBOL_GPL(adf_send_admin_init);
 
index f5e960d..32102e2 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
@@ -86,7 +42,7 @@ void adf_reset_sbr(struct adf_accel_dev *accel_dev)
 {
        struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
        struct pci_dev *parent = pdev->bus->self;
-       uint16_t bridge_ctl = 0;
+       u16 bridge_ctl = 0;
 
        if (!parent)
                parent = pdev;
index 5c7fdb0..ac46279 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/list.h>
index 6a9c6f6..376cde6 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_CFG_H_
 #define ADF_CFG_H_
 
index 1211261..1ef46cc 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_CFG_COMMON_H_
 #define ADF_CFG_COMMON_H_
 
@@ -81,16 +37,16 @@ enum adf_device_type {
 
 struct adf_dev_status_info {
        enum adf_device_type type;
-       u32 accel_id;
-       u32 instance_id;
-       uint8_t num_ae;
-       uint8_t num_accel;
-       uint8_t num_logical_accel;
-       uint8_t banks_per_accel;
-       uint8_t state;
-       uint8_t bus;
-       uint8_t dev;
-       uint8_t fun;
+       __u32 accel_id;
+       __u32 instance_id;
+       __u8 num_ae;
+       __u8 num_accel;
+       __u8 num_logical_accel;
+       __u8 banks_per_accel;
+       __u8 state;
+       __u8 bus;
+       __u8 dev;
+       __u8 fun;
        char name[MAX_DEVICE_NAME_SIZE];
 };
 
@@ -101,6 +57,6 @@ struct adf_dev_status_info {
                struct adf_user_cfg_ctl_data)
 #define IOCTL_START_ACCEL_DEV _IOW(ADF_CTL_IOC_MAGIC, 2, \
                struct adf_user_cfg_ctl_data)
-#define IOCTL_STATUS_ACCEL_DEV _IOW(ADF_CTL_IOC_MAGIC, 3, uint32_t)
-#define IOCTL_GET_NUM_DEVICES _IOW(ADF_CTL_IOC_MAGIC, 4, int32_t)
+#define IOCTL_STATUS_ACCEL_DEV _IOW(ADF_CTL_IOC_MAGIC, 3, __u32)
+#define IOCTL_GET_NUM_DEVICES _IOW(ADF_CTL_IOC_MAGIC, 4, __s32)
 #endif
index 7632ed0..314790f 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_CFG_STRINGS_H_
 #define ADF_CFG_STRINGS_H_
 
index b5484bf..421f4fb 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_CFG_USER_H_
 #define ADF_CFG_USER_H_
 
@@ -55,7 +11,7 @@ struct adf_user_cfg_key_val {
        char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
        union {
                struct adf_user_cfg_key_val *next;
-               uint64_t padding3;
+               __u64 padding3;
        };
        enum adf_cfg_val_type type;
 } __packed;
@@ -64,19 +20,19 @@ struct adf_user_cfg_section {
        char name[ADF_CFG_MAX_SECTION_LEN_IN_BYTES];
        union {
                struct adf_user_cfg_key_val *params;
-               uint64_t padding1;
+               __u64 padding1;
        };
        union {
                struct adf_user_cfg_section *next;
-               uint64_t padding3;
+               __u64 padding3;
        };
 } __packed;
 
 struct adf_user_cfg_ctl_data {
        union {
                struct adf_user_cfg_section *config_section;
-               uint64_t padding;
+               __u64 padding;
        };
-       uint8_t device_id;
+       __u8 device_id;
 } __packed;
 #endif
index d78f8d5..ebfcb4e 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_DRV_H
 #define ADF_DRV_H
 
@@ -123,11 +79,11 @@ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev,
 void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev,
                       struct adf_accel_dev *pf);
 struct list_head *adf_devmgr_get_head(void);
-struct adf_accel_dev *adf_devmgr_get_dev_by_id(uint32_t id);
+struct adf_accel_dev *adf_devmgr_get_dev_by_id(u32 id);
 struct adf_accel_dev *adf_devmgr_get_first(void);
 struct adf_accel_dev *adf_devmgr_pci_to_accel_dev(struct pci_dev *pci_dev);
-int adf_devmgr_verify_id(uint32_t id);
-void adf_devmgr_get_num_dev(uint32_t *num);
+int adf_devmgr_verify_id(u32 id);
+void adf_devmgr_get_num_dev(u32 *num);
 int adf_devmgr_in_reset(struct adf_accel_dev *accel_dev);
 int adf_dev_started(struct adf_accel_dev *accel_dev);
 int adf_dev_restarting_notify(struct adf_accel_dev *accel_dev);
@@ -198,7 +154,7 @@ void qat_hal_set_pc(struct icp_qat_fw_loader_handle *handle,
                    unsigned char ae, unsigned int ctx_mask, unsigned int upc);
 void qat_hal_wr_uwords(struct icp_qat_fw_loader_handle *handle,
                       unsigned char ae, unsigned int uaddr,
-                      unsigned int words_num, uint64_t *uword);
+                      unsigned int words_num, u64 *uword);
 void qat_hal_wr_umem(struct icp_qat_fw_loader_handle *handle, unsigned char ae,
                     unsigned int uword_addr, unsigned int words_num,
                     unsigned int *data);
@@ -233,9 +189,9 @@ int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle,
 int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
 void adf_disable_sriov(struct adf_accel_dev *accel_dev);
 void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
-                                 uint32_t vf_mask);
+                                 u32 vf_mask);
 void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
-                                uint32_t vf_mask);
+                                u32 vf_mask);
 void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
 void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
 
index ef0e482..71d0c44 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -270,7 +226,7 @@ static int adf_ctl_is_device_in_use(int id)
        return 0;
 }
 
-static void adf_ctl_stop_devices(uint32_t id)
+static void adf_ctl_stop_devices(u32 id)
 {
        struct adf_accel_dev *accel_dev;
 
@@ -374,7 +330,7 @@ out:
 static int adf_ctl_ioctl_get_num_devices(struct file *fp, unsigned int cmd,
                                         unsigned long arg)
 {
-       uint32_t num_devices = 0;
+       u32 num_devices = 0;
 
        adf_devmgr_get_num_dev(&num_devices);
        if (copy_to_user((void __user *)arg, &num_devices, sizeof(num_devices)))
index 2d06409..72753af 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include "adf_cfg.h"
@@ -52,7 +8,7 @@
 static LIST_HEAD(accel_table);
 static LIST_HEAD(vfs_table);
 static DEFINE_MUTEX(table_lock);
-static uint32_t num_devices;
+static u32 num_devices;
 static u8 id_map[ADF_MAX_DEVICES];
 
 struct vf_id_map {
@@ -355,7 +311,7 @@ struct adf_accel_dev *adf_devmgr_pci_to_accel_dev(struct pci_dev *pci_dev)
 }
 EXPORT_SYMBOL_GPL(adf_devmgr_pci_to_accel_dev);
 
-struct adf_accel_dev *adf_devmgr_get_dev_by_id(uint32_t id)
+struct adf_accel_dev *adf_devmgr_get_dev_by_id(u32 id)
 {
        struct list_head *itr;
        int real_id;
@@ -380,7 +336,7 @@ unlock:
        return NULL;
 }
 
-int adf_devmgr_verify_id(uint32_t id)
+int adf_devmgr_verify_id(u32 id)
 {
        if (id == ADF_CFG_ALL_DEVICES)
                return 0;
@@ -407,7 +363,7 @@ static int adf_get_num_dettached_vfs(void)
        return vfs;
 }
 
-void adf_devmgr_get_num_dev(uint32_t *num)
+void adf_devmgr_get_num_dev(u32 *num)
 {
        *num = num_devices - adf_get_num_dettached_vfs();
 }
index d7dd18d..d416278 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
 #include "adf_transport_internal.h"
index 26556c7..4202915 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/bitops.h>
index cd1cdf5..36136f7 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
index b3875fd..519fd5a 100644 (file)
@@ -1,50 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #include <linux/delay.h>
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
index 5acd531..0690c03 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #ifndef ADF_PF2VF_MSG_H
 #define ADF_PF2VF_MSG_H
 
index b36d865..8827aa1 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #include <linux/workqueue.h>
 #include <linux/pci.h>
 #include <linux/device.h>
index 2136cbe..2ad7740 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/delay.h>
 #include "adf_accel_devices.h"
 #include "adf_transport_internal.h"
 #include "adf_cfg.h"
 #include "adf_common_drv.h"
 
-static inline uint32_t adf_modulo(uint32_t data, uint32_t shift)
+static inline u32 adf_modulo(u32 data, u32 shift)
 {
-       uint32_t div = data >> shift;
-       uint32_t mult = div << shift;
+       u32 div = data >> shift;
+       u32 mult = div << shift;
 
        return data - mult;
 }
 
-static inline int adf_check_ring_alignment(uint64_t addr, uint64_t size)
+static inline int adf_check_ring_alignment(u64 addr, u64 size)
 {
        if (((size - 1) & addr) != 0)
                return -EFAULT;
        return 0;
 }
 
-static int adf_verify_ring_size(uint32_t msg_size, uint32_t msg_num)
+static int adf_verify_ring_size(u32 msg_size, u32 msg_num)
 {
        int i = ADF_MIN_RING_SIZE;
 
@@ -77,7 +33,7 @@ static int adf_verify_ring_size(uint32_t msg_size, uint32_t msg_num)
        return ADF_DEFAULT_RING_SIZE;
 }
 
-static int adf_reserve_ring(struct adf_etr_bank_data *bank, uint32_t ring)
+static int adf_reserve_ring(struct adf_etr_bank_data *bank, u32 ring)
 {
        spin_lock(&bank->lock);
        if (bank->ring_mask & (1 << ring)) {
@@ -89,14 +45,14 @@ static int adf_reserve_ring(struct adf_etr_bank_data *bank, uint32_t ring)
        return 0;
 }
 
-static void adf_unreserve_ring(struct adf_etr_bank_data *bank, uint32_t ring)
+static void adf_unreserve_ring(struct adf_etr_bank_data *bank, u32 ring)
 {
        spin_lock(&bank->lock);
        bank->ring_mask &= ~(1 << ring);
        spin_unlock(&bank->lock);
 }
 
-static void adf_enable_ring_irq(struct adf_etr_bank_data *bank, uint32_t ring)
+static void adf_enable_ring_irq(struct adf_etr_bank_data *bank, u32 ring)
 {
        spin_lock_bh(&bank->lock);
        bank->irq_mask |= (1 << ring);
@@ -106,7 +62,7 @@ static void adf_enable_ring_irq(struct adf_etr_bank_data *bank, uint32_t ring)
                              bank->irq_coalesc_timer);
 }
 
-static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, uint32_t ring)
+static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring)
 {
        spin_lock_bh(&bank->lock);
        bank->irq_mask &= ~(1 << ring);
@@ -114,7 +70,7 @@ static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, uint32_t ring)
        WRITE_CSR_INT_COL_EN(bank->csr_addr, bank->bank_number, bank->irq_mask);
 }
 
-int adf_send_message(struct adf_etr_ring_data *ring, uint32_t *msg)
+int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg)
 {
        if (atomic_add_return(1, ring->inflights) >
            ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size)) {
@@ -136,18 +92,18 @@ int adf_send_message(struct adf_etr_ring_data *ring, uint32_t *msg)
 
 static int adf_handle_response(struct adf_etr_ring_data *ring)
 {
-       uint32_t msg_counter = 0;
-       uint32_t *msg = (uint32_t *)((uintptr_t)ring->base_addr + ring->head);
+       u32 msg_counter = 0;
+       u32 *msg = (u32 *)((uintptr_t)ring->base_addr + ring->head);
 
        while (*msg != ADF_RING_EMPTY_SIG) {
-               ring->callback((uint32_t *)msg);
+               ring->callback((u32 *)msg);
                atomic_dec(ring->inflights);
                *msg = ADF_RING_EMPTY_SIG;
                ring->head = adf_modulo(ring->head +
                                        ADF_MSG_SIZE_TO_BYTES(ring->msg_size),
                                        ADF_RING_SIZE_MODULO(ring->ring_size));
                msg_counter++;
-               msg = (uint32_t *)((uintptr_t)ring->base_addr + ring->head);
+               msg = (u32 *)((uintptr_t)ring->base_addr + ring->head);
        }
        if (msg_counter > 0)
                WRITE_CSR_RING_HEAD(ring->bank->csr_addr,
@@ -158,7 +114,7 @@ static int adf_handle_response(struct adf_etr_ring_data *ring)
 
 static void adf_configure_tx_ring(struct adf_etr_ring_data *ring)
 {
-       uint32_t ring_config = BUILD_RING_CONFIG(ring->ring_size);
+       u32 ring_config = BUILD_RING_CONFIG(ring->ring_size);
 
        WRITE_CSR_RING_CONFIG(ring->bank->csr_addr, ring->bank->bank_number,
                              ring->ring_number, ring_config);
@@ -166,7 +122,7 @@ static void adf_configure_tx_ring(struct adf_etr_ring_data *ring)
 
 static void adf_configure_rx_ring(struct adf_etr_ring_data *ring)
 {
-       uint32_t ring_config =
+       u32 ring_config =
                        BUILD_RESP_RING_CONFIG(ring->ring_size,
                                               ADF_RING_NEAR_WATERMARK_512,
                                               ADF_RING_NEAR_WATERMARK_0);
@@ -180,8 +136,8 @@ static int adf_init_ring(struct adf_etr_ring_data *ring)
        struct adf_etr_bank_data *bank = ring->bank;
        struct adf_accel_dev *accel_dev = bank->accel_dev;
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
-       uint64_t ring_base;
-       uint32_t ring_size_bytes =
+       u64 ring_base;
+       u32 ring_size_bytes =
                        ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size);
 
        ring_size_bytes = ADF_RING_SIZE_BYTES_MIN(ring_size_bytes);
@@ -215,7 +171,7 @@ static int adf_init_ring(struct adf_etr_ring_data *ring)
 
 static void adf_cleanup_ring(struct adf_etr_ring_data *ring)
 {
-       uint32_t ring_size_bytes =
+       u32 ring_size_bytes =
                        ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size);
        ring_size_bytes = ADF_RING_SIZE_BYTES_MIN(ring_size_bytes);
 
@@ -228,8 +184,8 @@ static void adf_cleanup_ring(struct adf_etr_ring_data *ring)
 }
 
 int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
-                   uint32_t bank_num, uint32_t num_msgs,
-                   uint32_t msg_size, const char *ring_name,
+                   u32 bank_num, u32 num_msgs,
+                   u32 msg_size, const char *ring_name,
                    adf_callback_fn callback, int poll_mode,
                    struct adf_etr_ring_data **ring_ptr)
 {
@@ -237,7 +193,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
        struct adf_etr_bank_data *bank;
        struct adf_etr_ring_data *ring;
        char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
-       uint32_t ring_num;
+       u32 ring_num;
        int ret;
 
        if (bank_num >= GET_MAX_BANKS(accel_dev)) {
@@ -330,7 +286,7 @@ void adf_remove_ring(struct adf_etr_ring_data *ring)
 
 static void adf_ring_response_handler(struct adf_etr_bank_data *bank)
 {
-       uint32_t empty_rings, i;
+       u32 empty_rings, i;
 
        empty_rings = READ_CSR_E_STAT(bank->csr_addr, bank->bank_number);
        empty_rings = ~empty_rings & bank->irq_mask;
@@ -353,7 +309,7 @@ void adf_response_handler(uintptr_t bank_addr)
 
 static inline int adf_get_cfg_int(struct adf_accel_dev *accel_dev,
                                  const char *section, const char *format,
-                                 uint32_t key, uint32_t *value)
+                                 u32 key, u32 *value)
 {
        char key_buf[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
        char val_buf[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
@@ -370,7 +326,7 @@ static inline int adf_get_cfg_int(struct adf_accel_dev *accel_dev,
 
 static void adf_get_coalesc_timer(struct adf_etr_bank_data *bank,
                                  const char *section,
-                                 uint32_t bank_num_in_accel)
+                                 u32 bank_num_in_accel)
 {
        if (adf_get_cfg_int(bank->accel_dev, section,
                            ADF_ETRMGR_COALESCE_TIMER_FORMAT,
@@ -384,12 +340,12 @@ static void adf_get_coalesc_timer(struct adf_etr_bank_data *bank,
 
 static int adf_init_bank(struct adf_accel_dev *accel_dev,
                         struct adf_etr_bank_data *bank,
-                        uint32_t bank_num, void __iomem *csr_addr)
+                        u32 bank_num, void __iomem *csr_addr)
 {
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        struct adf_etr_ring_data *ring;
        struct adf_etr_ring_data *tx_ring;
-       uint32_t i, coalesc_enabled = 0;
+       u32 i, coalesc_enabled = 0;
 
        memset(bank, 0, sizeof(*bank));
        bank->bank_number = bank_num;
@@ -461,8 +417,8 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev)
        struct adf_etr_data *etr_data;
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        void __iomem *csr_addr;
-       uint32_t size;
-       uint32_t num_banks = 0;
+       u32 size;
+       u32 num_banks = 0;
        int i, ret;
 
        etr_data = kzalloc_node(sizeof(*etr_data), GFP_KERNEL,
@@ -508,7 +464,7 @@ EXPORT_SYMBOL_GPL(adf_init_etr_data);
 
 static void cleanup_bank(struct adf_etr_bank_data *bank)
 {
-       uint32_t i;
+       u32 i;
 
        for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) {
                struct adf_accel_dev *accel_dev = bank->accel_dev;
@@ -528,7 +484,7 @@ static void cleanup_bank(struct adf_etr_bank_data *bank)
 static void adf_cleanup_etr_handles(struct adf_accel_dev *accel_dev)
 {
        struct adf_etr_data *etr_data = accel_dev->transport;
-       uint32_t i, num_banks = GET_MAX_BANKS(accel_dev);
+       u32 i, num_banks = GET_MAX_BANKS(accel_dev);
 
        for (i = 0; i < num_banks; i++)
                cleanup_bank(&etr_data->banks[i]);
index 386485b..2c95f16 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_TRANSPORT_H
 #define ADF_TRANSPORT_H
 
@@ -54,10 +10,10 @@ struct adf_etr_ring_data;
 typedef void (*adf_callback_fn)(void *resp_msg);
 
 int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
-                   uint32_t bank_num, uint32_t num_mgs, uint32_t msg_size,
+                   u32 bank_num, u32 num_mgs, u32 msg_size,
                    const char *ring_name, adf_callback_fn callback,
                    int poll_mode, struct adf_etr_ring_data **ring_ptr);
 
-int adf_send_message(struct adf_etr_ring_data *ring, uint32_t *msg);
+int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg);
 void adf_remove_ring(struct adf_etr_ring_data *ring);
 #endif
index 80e02a2..950d198 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_TRANSPORT_ACCESS_MACROS_H
 #define ADF_TRANSPORT_ACCESS_MACROS_H
 
                ADF_RING_CSR_RING_CONFIG + (ring << 2), value)
 #define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \
 do { \
-       uint32_t l_base = 0, u_base = 0; \
-       l_base = (uint32_t)(value & 0xFFFFFFFF); \
-       u_base = (uint32_t)((value & 0xFFFFFFFF00000000ULL) >> 32); \
+       u32 l_base = 0, u_base = 0; \
+       l_base = (u32)(value & 0xFFFFFFFF); \
+       u_base = (u32)((value & 0xFFFFFFFF00000000ULL) >> 32); \
        ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \
                ADF_RING_CSR_RING_LBASE + (ring << 2), l_base); \
        ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + \
index e794e9d..2a2eccb 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
index bb88336..c7faf4e 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_TRANSPORT_INTRN_H
 #define ADF_TRANSPORT_INTRN_H
 
@@ -59,32 +15,31 @@ struct adf_etr_ring_debug_entry {
 struct adf_etr_ring_data {
        void *base_addr;
        atomic_t *inflights;
-       spinlock_t lock;        /* protects ring data struct */
        adf_callback_fn callback;
        struct adf_etr_bank_data *bank;
        dma_addr_t dma_addr;
-       uint16_t head;
-       uint16_t tail;
-       uint8_t ring_number;
-       uint8_t ring_size;
-       uint8_t msg_size;
-       uint8_t reserved;
        struct adf_etr_ring_debug_entry *ring_debug;
-} __packed;
+       spinlock_t lock;        /* protects ring data struct */
+       u16 head;
+       u16 tail;
+       u8 ring_number;
+       u8 ring_size;
+       u8 msg_size;
+};
 
 struct adf_etr_bank_data {
        struct adf_etr_ring_data rings[ADF_ETR_MAX_RINGS_PER_BANK];
        struct tasklet_struct resp_handler;
        void __iomem *csr_addr;
-       struct adf_accel_dev *accel_dev;
-       uint32_t irq_coalesc_timer;
-       uint16_t ring_mask;
-       uint16_t irq_mask;
+       u32 irq_coalesc_timer;
+       u32 bank_number;
+       u16 ring_mask;
+       u16 irq_mask;
        spinlock_t lock;        /* protects bank data struct */
+       struct adf_accel_dev *accel_dev;
        struct dentry *bank_debug_dir;
        struct dentry *bank_debug_cfg;
-       uint32_t bank_number;
-} __packed;
+};
 
 struct adf_etr_data {
        struct adf_etr_bank_data *banks;
index cd5f37d..2c98fb6 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
 #include "adf_pf2vf_msg.h"
index 4a73fc7..c4a44dc 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
index 46747f0..6dc09d2 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef _ICP_QAT_FW_H_
 #define _ICP_QAT_FW_H_
 #include <linux/types.h>
@@ -89,41 +45,41 @@ enum icp_qat_fw_comn_request_id {
 struct icp_qat_fw_comn_req_hdr_cd_pars {
        union {
                struct {
-                       uint64_t content_desc_addr;
-                       uint16_t content_desc_resrvd1;
-                       uint8_t content_desc_params_sz;
-                       uint8_t content_desc_hdr_resrvd2;
-                       uint32_t content_desc_resrvd3;
+                       __u64 content_desc_addr;
+                       __u16 content_desc_resrvd1;
+                       __u8 content_desc_params_sz;
+                       __u8 content_desc_hdr_resrvd2;
+                       __u32 content_desc_resrvd3;
                } s;
                struct {
-                       uint32_t serv_specif_fields[4];
+                       __u32 serv_specif_fields[4];
                } s1;
        } u;
 };
 
 struct icp_qat_fw_comn_req_mid {
-       uint64_t opaque_data;
-       uint64_t src_data_addr;
-       uint64_t dest_data_addr;
-       uint32_t src_length;
-       uint32_t dst_length;
+       __u64 opaque_data;
+       __u64 src_data_addr;
+       __u64 dest_data_addr;
+       __u32 src_length;
+       __u32 dst_length;
 };
 
 struct icp_qat_fw_comn_req_cd_ctrl {
-       uint32_t content_desc_ctrl_lw[ICP_QAT_FW_NUM_LONGWORDS_5];
+       __u32 content_desc_ctrl_lw[ICP_QAT_FW_NUM_LONGWORDS_5];
 };
 
 struct icp_qat_fw_comn_req_hdr {
-       uint8_t resrvd1;
-       uint8_t service_cmd_id;
-       uint8_t service_type;
-       uint8_t hdr_flags;
-       uint16_t serv_specif_flags;
-       uint16_t comn_req_flags;
+       __u8 resrvd1;
+       __u8 service_cmd_id;
+       __u8 service_type;
+       __u8 hdr_flags;
+       __u16 serv_specif_flags;
+       __u16 comn_req_flags;
 };
 
 struct icp_qat_fw_comn_req_rqpars {
-       uint32_t serv_specif_rqpars_lw[ICP_QAT_FW_NUM_LONGWORDS_13];
+       __u32 serv_specif_rqpars_lw[ICP_QAT_FW_NUM_LONGWORDS_13];
 };
 
 struct icp_qat_fw_comn_req {
@@ -135,24 +91,24 @@ struct icp_qat_fw_comn_req {
 };
 
 struct icp_qat_fw_comn_error {
-       uint8_t xlat_err_code;
-       uint8_t cmp_err_code;
+       __u8 xlat_err_code;
+       __u8 cmp_err_code;
 };
 
 struct icp_qat_fw_comn_resp_hdr {
-       uint8_t resrvd1;
-       uint8_t service_id;
-       uint8_t response_type;
-       uint8_t hdr_flags;
+       __u8 resrvd1;
+       __u8 service_id;
+       __u8 response_type;
+       __u8 hdr_flags;
        struct icp_qat_fw_comn_error comn_error;
-       uint8_t comn_status;
-       uint8_t cmd_id;
+       __u8 comn_status;
+       __u8 cmd_id;
 };
 
 struct icp_qat_fw_comn_resp {
        struct icp_qat_fw_comn_resp_hdr comn_hdr;
-       uint64_t opaque_data;
-       uint32_t resrvd[ICP_QAT_FW_NUM_LONGWORDS_4];
+       __u64 opaque_data;
+       __u32 resrvd[ICP_QAT_FW_NUM_LONGWORDS_4];
 };
 
 #define ICP_QAT_FW_COMN_REQ_FLAG_SET 1
index 72a59fa..d4d188c 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef _ICP_QAT_FW_INIT_ADMIN_H_
 #define _ICP_QAT_FW_INIT_ADMIN_H_
 
@@ -67,50 +23,75 @@ enum icp_qat_fw_init_admin_resp_status {
 };
 
 struct icp_qat_fw_init_admin_req {
-       uint16_t init_cfg_sz;
-       uint8_t resrvd1;
-       uint8_t init_admin_cmd_id;
-       uint32_t resrvd2;
-       uint64_t opaque_data;
-       uint64_t init_cfg_ptr;
-       uint64_t resrvd3;
-};
-
-struct icp_qat_fw_init_admin_resp_hdr {
-       uint8_t flags;
-       uint8_t resrvd1;
-       uint8_t status;
-       uint8_t init_admin_cmd_id;
-};
+       __u16 init_cfg_sz;
+       __u8 resrvd1;
+       __u8 cmd_id;
+       __u32 resrvd2;
+       __u64 opaque_data;
+       __u64 init_cfg_ptr;
 
-struct icp_qat_fw_init_admin_resp_pars {
        union {
-               uint32_t resrvd1[ICP_QAT_FW_NUM_LONGWORDS_4];
                struct {
-                       uint32_t version_patch_num;
-                       uint8_t context_id;
-                       uint8_t ae_id;
-                       uint16_t resrvd1;
-                       uint64_t resrvd2;
-               } s1;
-               struct {
-                       uint64_t req_rec_count;
-                       uint64_t resp_sent_count;
-               } s2;
-       } u;
+                       __u16 ibuf_size_in_kb;
+                       __u16 resrvd3;
+               };
+               __u32 idle_filter;
+       };
+
+       __u32 resrvd4;
 };
 
 struct icp_qat_fw_init_admin_resp {
-       struct icp_qat_fw_init_admin_resp_hdr init_resp_hdr;
+       __u8 flags;
+       __u8 resrvd1;
+       __u8 status;
+       __u8 cmd_id;
        union {
-               uint32_t resrvd2;
+               __u32 resrvd2;
+               struct {
+                       __u16 version_minor_num;
+                       __u16 version_major_num;
+               };
+       };
+       __u64 opaque_data;
+       union {
+               __u32 resrvd3[ICP_QAT_FW_NUM_LONGWORDS_4];
+               struct {
+                       __u32 version_patch_num;
+                       __u8 context_id;
+                       __u8 ae_id;
+                       __u16 resrvd4;
+                       __u64 resrvd5;
+               };
+               struct {
+                       __u64 req_rec_count;
+                       __u64 resp_sent_count;
+               };
+               struct {
+                       __u16 compression_algos;
+                       __u16 checksum_algos;
+                       __u32 deflate_capabilities;
+                       __u32 resrvd6;
+                       __u32 lzs_capabilities;
+               };
+               struct {
+                       __u32 cipher_algos;
+                       __u32 hash_algos;
+                       __u16 keygen_algos;
+                       __u16 other;
+                       __u16 public_key_algos;
+                       __u16 prime_algos;
+               };
+               struct {
+                       __u64 timestamp;
+                       __u64 resrvd7;
+               };
                struct {
-                       uint16_t version_minor_num;
-                       uint16_t version_major_num;
-               } s;
-       } u;
-       uint64_t opaque_data;
-       struct icp_qat_fw_init_admin_resp_pars init_resp_pars;
+                       __u32 successful_count;
+                       __u32 unsuccessful_count;
+                       __u64 resrvd8;
+               };
+       };
 };
 
 #define ICP_QAT_FW_COMN_HEARTBEAT_OK 0
index c8d2669..6757ec0 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef _ICP_QAT_FW_LA_H_
 #define _ICP_QAT_FW_LA_H_
 #include "icp_qat_fw.h"
@@ -226,14 +182,14 @@ struct icp_qat_fw_la_bulk_req {
 struct icp_qat_fw_cipher_req_hdr_cd_pars {
        union {
                struct {
-                       uint64_t content_desc_addr;
-                       uint16_t content_desc_resrvd1;
-                       uint8_t content_desc_params_sz;
-                       uint8_t content_desc_hdr_resrvd2;
-                       uint32_t content_desc_resrvd3;
+                       __u64 content_desc_addr;
+                       __u16 content_desc_resrvd1;
+                       __u8 content_desc_params_sz;
+                       __u8 content_desc_hdr_resrvd2;
+                       __u32 content_desc_resrvd3;
                } s;
                struct {
-                       uint32_t cipher_key_array[ICP_QAT_FW_NUM_LONGWORDS_4];
+                       __u32 cipher_key_array[ICP_QAT_FW_NUM_LONGWORDS_4];
                } s1;
        } u;
 };
@@ -241,70 +197,70 @@ struct icp_qat_fw_cipher_req_hdr_cd_pars {
 struct icp_qat_fw_cipher_auth_req_hdr_cd_pars {
        union {
                struct {
-                       uint64_t content_desc_addr;
-                       uint16_t content_desc_resrvd1;
-                       uint8_t content_desc_params_sz;
-                       uint8_t content_desc_hdr_resrvd2;
-                       uint32_t content_desc_resrvd3;
+                       __u64 content_desc_addr;
+                       __u16 content_desc_resrvd1;
+                       __u8 content_desc_params_sz;
+                       __u8 content_desc_hdr_resrvd2;
+                       __u32 content_desc_resrvd3;
                } s;
                struct {
-                       uint32_t cipher_key_array[ICP_QAT_FW_NUM_LONGWORDS_4];
+                       __u32 cipher_key_array[ICP_QAT_FW_NUM_LONGWORDS_4];
                } sl;
        } u;
 };
 
 struct icp_qat_fw_cipher_cd_ctrl_hdr {
-       uint8_t cipher_state_sz;
-       uint8_t cipher_key_sz;
-       uint8_t cipher_cfg_offset;
-       uint8_t next_curr_id;
-       uint8_t cipher_padding_sz;
-       uint8_t resrvd1;
-       uint16_t resrvd2;
-       uint32_t resrvd3[ICP_QAT_FW_NUM_LONGWORDS_3];
+       __u8 cipher_state_sz;
+       __u8 cipher_key_sz;
+       __u8 cipher_cfg_offset;
+       __u8 next_curr_id;
+       __u8 cipher_padding_sz;
+       __u8 resrvd1;
+       __u16 resrvd2;
+       __u32 resrvd3[ICP_QAT_FW_NUM_LONGWORDS_3];
 };
 
 struct icp_qat_fw_auth_cd_ctrl_hdr {
-       uint32_t resrvd1;
-       uint8_t resrvd2;
-       uint8_t hash_flags;
-       uint8_t hash_cfg_offset;
-       uint8_t next_curr_id;
-       uint8_t resrvd3;
-       uint8_t outer_prefix_sz;
-       uint8_t final_sz;
-       uint8_t inner_res_sz;
-       uint8_t resrvd4;
-       uint8_t inner_state1_sz;
-       uint8_t inner_state2_offset;
-       uint8_t inner_state2_sz;
-       uint8_t outer_config_offset;
-       uint8_t outer_state1_sz;
-       uint8_t outer_res_sz;
-       uint8_t outer_prefix_offset;
+       __u32 resrvd1;
+       __u8 resrvd2;
+       __u8 hash_flags;
+       __u8 hash_cfg_offset;
+       __u8 next_curr_id;
+       __u8 resrvd3;
+       __u8 outer_prefix_sz;
+       __u8 final_sz;
+       __u8 inner_res_sz;
+       __u8 resrvd4;
+       __u8 inner_state1_sz;
+       __u8 inner_state2_offset;
+       __u8 inner_state2_sz;
+       __u8 outer_config_offset;
+       __u8 outer_state1_sz;
+       __u8 outer_res_sz;
+       __u8 outer_prefix_offset;
 };
 
 struct icp_qat_fw_cipher_auth_cd_ctrl_hdr {
-       uint8_t cipher_state_sz;
-       uint8_t cipher_key_sz;
-       uint8_t cipher_cfg_offset;
-       uint8_t next_curr_id_cipher;
-       uint8_t cipher_padding_sz;
-       uint8_t hash_flags;
-       uint8_t hash_cfg_offset;
-       uint8_t next_curr_id_auth;
-       uint8_t resrvd1;
-       uint8_t outer_prefix_sz;
-       uint8_t final_sz;
-       uint8_t inner_res_sz;
-       uint8_t resrvd2;
-       uint8_t inner_state1_sz;
-       uint8_t inner_state2_offset;
-       uint8_t inner_state2_sz;
-       uint8_t outer_config_offset;
-       uint8_t outer_state1_sz;
-       uint8_t outer_res_sz;
-       uint8_t outer_prefix_offset;
+       __u8 cipher_state_sz;
+       __u8 cipher_key_sz;
+       __u8 cipher_cfg_offset;
+       __u8 next_curr_id_cipher;
+       __u8 cipher_padding_sz;
+       __u8 hash_flags;
+       __u8 hash_cfg_offset;
+       __u8 next_curr_id_auth;
+       __u8 resrvd1;
+       __u8 outer_prefix_sz;
+       __u8 final_sz;
+       __u8 inner_res_sz;
+       __u8 resrvd2;
+       __u8 inner_state1_sz;
+       __u8 inner_state2_offset;
+       __u8 inner_state2_sz;
+       __u8 outer_config_offset;
+       __u8 outer_state1_sz;
+       __u8 outer_res_sz;
+       __u8 outer_prefix_offset;
 };
 
 #define ICP_QAT_FW_AUTH_HDR_FLAG_DO_NESTED 1
@@ -315,48 +271,48 @@ struct icp_qat_fw_cipher_auth_cd_ctrl_hdr {
 #define ICP_QAT_FW_CIPHER_REQUEST_PARAMETERS_OFFSET (0)
 
 struct icp_qat_fw_la_cipher_req_params {
-       uint32_t cipher_offset;
-       uint32_t cipher_length;
+       __u32 cipher_offset;
+       __u32 cipher_length;
        union {
-               uint32_t cipher_IV_array[ICP_QAT_FW_NUM_LONGWORDS_4];
+               __u32 cipher_IV_array[ICP_QAT_FW_NUM_LONGWORDS_4];
                struct {
-                       uint64_t cipher_IV_ptr;
-                       uint64_t resrvd1;
+                       __u64 cipher_IV_ptr;
+                       __u64 resrvd1;
                } s;
        } u;
 };
 
 struct icp_qat_fw_la_auth_req_params {
-       uint32_t auth_off;
-       uint32_t auth_len;
+       __u32 auth_off;
+       __u32 auth_len;
        union {
-               uint64_t auth_partial_st_prefix;
-               uint64_t aad_adr;
+               __u64 auth_partial_st_prefix;
+               __u64 aad_adr;
        } u1;
-       uint64_t auth_res_addr;
+       __u64 auth_res_addr;
        union {
-               uint8_t inner_prefix_sz;
-               uint8_t aad_sz;
+               __u8 inner_prefix_sz;
+               __u8 aad_sz;
        } u2;
-       uint8_t resrvd1;
-       uint8_t hash_state_sz;
-       uint8_t auth_res_sz;
+       __u8 resrvd1;
+       __u8 hash_state_sz;
+       __u8 auth_res_sz;
 } __packed;
 
 struct icp_qat_fw_la_auth_req_params_resrvd_flds {
-       uint32_t resrvd[ICP_QAT_FW_NUM_LONGWORDS_6];
+       __u32 resrvd[ICP_QAT_FW_NUM_LONGWORDS_6];
        union {
-               uint8_t inner_prefix_sz;
-               uint8_t aad_sz;
+               __u8 inner_prefix_sz;
+               __u8 aad_sz;
        } u2;
-       uint8_t resrvd1;
-       uint16_t resrvd2;
+       __u8 resrvd1;
+       __u16 resrvd2;
 };
 
 struct icp_qat_fw_la_resp {
        struct icp_qat_fw_comn_resp_hdr comn_resp;
-       uint64_t opaque_data;
-       uint32_t resrvd[ICP_QAT_FW_NUM_LONGWORDS_4];
+       __u64 opaque_data;
+       __u32 resrvd[ICP_QAT_FW_NUM_LONGWORDS_4];
 };
 
 #define ICP_QAT_FW_CIPHER_NEXT_ID_GET(cd_ctrl_hdr_t) \
index 2ffef3e..3e8e291 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef __ICP_QAT_FW_LOADER_HANDLE_H__
 #define __ICP_QAT_FW_LOADER_HANDLE_H__
 #include "icp_qat_uclo.h"
index 0d7a9b5..9dddae0 100644 (file)
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in
-         the documentation and/or other materials provided with the
-         distribution.
-       * Neither the name of Intel Corporation nor the names of its
-         contributors may be used to endorse or promote products derived
-         from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef _ICP_QAT_FW_PKE_
 #define _ICP_QAT_FW_PKE_
 
 #include "icp_qat_fw.h"
 
 struct icp_qat_fw_req_hdr_pke_cd_pars {
-       u64 content_desc_addr;
-       u32 content_desc_resrvd;
-       u32 func_id;
+       __u64 content_desc_addr;
+       __u32 content_desc_resrvd;
+       __u32 func_id;
 };
 
 struct icp_qat_fw_req_pke_mid {
-       u64 opaque;
-       u64 src_data_addr;
-       u64 dest_data_addr;
+       __u64 opaque;
+       __u64 src_data_addr;
+       __u64 dest_data_addr;
 };
 
 struct icp_qat_fw_req_pke_hdr {
-       u8 resrvd1;
-       u8 resrvd2;
-       u8 service_type;
-       u8 hdr_flags;
-       u16 comn_req_flags;
-       u16 resrvd4;
+       __u8 resrvd1;
+       __u8 resrvd2;
+       __u8 service_type;
+       __u8 hdr_flags;
+       __u16 comn_req_flags;
+       __u16 resrvd4;
        struct icp_qat_fw_req_hdr_pke_cd_pars cd_pars;
 };
 
 struct icp_qat_fw_pke_request {
        struct icp_qat_fw_req_pke_hdr pke_hdr;
        struct icp_qat_fw_req_pke_mid pke_mid;
-       u8 output_param_count;
-       u8 input_param_count;
-       u16 resrvd1;
-       u32 resrvd2;
-       u64 next_req_adr;
+       __u8 output_param_count;
+       __u8 input_param_count;
+       __u16 resrvd1;
+       __u32 resrvd2;
+       __u64 next_req_adr;
 };
 
 struct icp_qat_fw_resp_pke_hdr {
-       u8 resrvd1;
-       u8 resrvd2;
-       u8 response_type;
-       u8 hdr_flags;
-       u16 comn_resp_flags;
-       u16 resrvd4;
+       __u8 resrvd1;
+       __u8 resrvd2;
+       __u8 response_type;
+       __u8 hdr_flags;
+       __u16 comn_resp_flags;
+       __u16 resrvd4;
 };
 
 struct icp_qat_fw_pke_resp {
        struct icp_qat_fw_resp_pke_hdr pke_resp_hdr;
-       u64 opaque;
-       u64 src_data_addr;
-       u64 dest_data_addr;
+       __u64 opaque;
+       __u64 src_data_addr;
+       __u64 dest_data_addr;
 };
 
 #define ICP_QAT_FW_PKE_HDR_VALID_FLAG_BITPOS              7
index 7187917..c0e9fc0 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef __ICP_QAT_HAL_H
 #define __ICP_QAT_HAL_H
 #include "icp_qat_fw_loader_handle.h"
index 121d5e6..c4b6ef1 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef _ICP_QAT_HW_H_
 #define _ICP_QAT_HW_H_
 
@@ -105,8 +61,8 @@ enum icp_qat_hw_auth_mode {
 };
 
 struct icp_qat_hw_auth_config {
-       uint32_t config;
-       uint32_t reserved;
+       __u32 config;
+       __u32 reserved;
 };
 
 #define QAT_AUTH_MODE_BITPOS 4
@@ -131,7 +87,7 @@ struct icp_qat_hw_auth_config {
 
 struct icp_qat_hw_auth_counter {
        __be32 counter;
-       uint32_t reserved;
+       __u32 reserved;
 };
 
 #define QAT_AUTH_COUNT_MASK 0xFFFFFFFF
@@ -191,9 +147,9 @@ struct icp_qat_hw_auth_setup {
 
 struct icp_qat_hw_auth_sha512 {
        struct icp_qat_hw_auth_setup inner_setup;
-       uint8_t state1[ICP_QAT_HW_SHA512_STATE1_SZ];
+       __u8 state1[ICP_QAT_HW_SHA512_STATE1_SZ];
        struct icp_qat_hw_auth_setup outer_setup;
-       uint8_t state2[ICP_QAT_HW_SHA512_STATE2_SZ];
+       __u8 state2[ICP_QAT_HW_SHA512_STATE2_SZ];
 };
 
 struct icp_qat_hw_auth_algo_blk {
@@ -227,8 +183,8 @@ enum icp_qat_hw_cipher_mode {
 };
 
 struct icp_qat_hw_cipher_config {
-       uint32_t val;
-       uint32_t reserved;
+       __u32 val;
+       __u32 reserved;
 };
 
 enum icp_qat_hw_cipher_dir {
@@ -296,7 +252,7 @@ enum icp_qat_hw_cipher_convert {
 
 struct icp_qat_hw_cipher_aes256_f8 {
        struct icp_qat_hw_cipher_config cipher_config;
-       uint8_t key[ICP_QAT_HW_AES_256_F8_KEY_SZ];
+       __u8 key[ICP_QAT_HW_AES_256_F8_KEY_SZ];
 };
 
 struct icp_qat_hw_cipher_algo_blk {
index 5d1ee7e..8fe1ec3 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef __ICP_QAT_UCLO_H__
 #define __ICP_QAT_UCLO_H__
 
@@ -176,7 +132,7 @@ struct icp_qat_uof_encap_obj {
 struct icp_qat_uclo_encap_uwblock {
        unsigned int start_addr;
        unsigned int words_num;
-       uint64_t micro_words;
+       u64 micro_words;
 };
 
 struct icp_qat_uclo_encap_page {
@@ -215,7 +171,7 @@ struct icp_qat_uclo_objhdr {
 struct icp_qat_uof_strtable {
        unsigned int table_len;
        unsigned int reserved;
-       uint64_t strings;
+       u64 strings;
 };
 
 struct icp_qat_uclo_objhandle {
@@ -235,7 +191,7 @@ struct icp_qat_uclo_objhandle {
        unsigned int ae_num;
        unsigned int ustore_phy_size;
        void *obj_buf;
-       uint64_t *uword_buf;
+       u64 *uword_buf;
 };
 
 struct icp_qat_uof_uword_block {
index e14d3dd..72753b8 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/crypto.h>
@@ -55,6 +11,7 @@
 #include <crypto/hmac.h>
 #include <crypto/algapi.h>
 #include <crypto/authenc.h>
+#include <crypto/xts.h>
 #include <linux/dma-mapping.h>
 #include "adf_accel_devices.h"
 #include "adf_transport.h"
@@ -78,15 +35,15 @@ static DEFINE_MUTEX(algs_lock);
 static unsigned int active_devs;
 
 struct qat_alg_buf {
-       uint32_t len;
-       uint32_t resrvd;
-       uint64_t addr;
+       u32 len;
+       u32 resrvd;
+       u64 addr;
 } __packed;
 
 struct qat_alg_buf_list {
-       uint64_t resrvd;
-       uint32_t num_bufs;
-       uint32_t num_mapped_bufs;
+       u64 resrvd;
+       u32 num_bufs;
+       u32 num_mapped_bufs;
        struct qat_alg_buf bufers[];
 } __packed __aligned(64);
 
@@ -131,7 +88,8 @@ struct qat_alg_skcipher_ctx {
        struct icp_qat_fw_la_bulk_req enc_fw_req;
        struct icp_qat_fw_la_bulk_req dec_fw_req;
        struct qat_crypto_instance *inst;
-       struct crypto_skcipher *tfm;
+       struct crypto_skcipher *ftfm;
+       bool fallback;
 };
 
 static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg)
@@ -151,7 +109,7 @@ static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg)
 
 static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
                                  struct qat_alg_aead_ctx *ctx,
-                                 const uint8_t *auth_key,
+                                 const u8 *auth_key,
                                  unsigned int auth_keylen)
 {
        SHASH_DESC_ON_STACK(shash, ctx->hash_tfm);
@@ -467,7 +425,7 @@ static int qat_alg_aead_init_dec_session(struct crypto_aead *aead_tfm,
 static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx,
                                      struct icp_qat_fw_la_bulk_req *req,
                                      struct icp_qat_hw_cipher_algo_blk *cd,
-                                     const uint8_t *key, unsigned int keylen)
+                                     const u8 *key, unsigned int keylen)
 {
        struct icp_qat_fw_comn_req_hdr_cd_pars *cd_pars = &req->cd_pars;
        struct icp_qat_fw_comn_req_hdr *header = &req->comn_hdr;
@@ -487,7 +445,7 @@ static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx,
 }
 
 static void qat_alg_skcipher_init_enc(struct qat_alg_skcipher_ctx *ctx,
-                                     int alg, const uint8_t *key,
+                                     int alg, const u8 *key,
                                      unsigned int keylen, int mode)
 {
        struct icp_qat_hw_cipher_algo_blk *enc_cd = ctx->enc_cd;
@@ -500,7 +458,7 @@ static void qat_alg_skcipher_init_enc(struct qat_alg_skcipher_ctx *ctx,
 }
 
 static void qat_alg_skcipher_init_dec(struct qat_alg_skcipher_ctx *ctx,
-                                     int alg, const uint8_t *key,
+                                     int alg, const u8 *key,
                                      unsigned int keylen, int mode)
 {
        struct icp_qat_hw_cipher_algo_blk *dec_cd = ctx->dec_cd;
@@ -578,7 +536,7 @@ error:
 }
 
 static int qat_alg_skcipher_init_sessions(struct qat_alg_skcipher_ctx *ctx,
-                                         const uint8_t *key,
+                                         const u8 *key,
                                          unsigned int keylen,
                                          int mode)
 {
@@ -592,7 +550,7 @@ static int qat_alg_skcipher_init_sessions(struct qat_alg_skcipher_ctx *ctx,
        return 0;
 }
 
-static int qat_alg_aead_rekey(struct crypto_aead *tfm, const uint8_t *key,
+static int qat_alg_aead_rekey(struct crypto_aead *tfm, const u8 *key,
                              unsigned int keylen)
 {
        struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
@@ -606,7 +564,7 @@ static int qat_alg_aead_rekey(struct crypto_aead *tfm, const uint8_t *key,
                                          ICP_QAT_HW_CIPHER_CBC_MODE);
 }
 
-static int qat_alg_aead_newkey(struct crypto_aead *tfm, const uint8_t *key,
+static int qat_alg_aead_newkey(struct crypto_aead *tfm, const u8 *key,
                               unsigned int keylen)
 {
        struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
@@ -658,7 +616,7 @@ out_free_inst:
        return ret;
 }
 
-static int qat_alg_aead_setkey(struct crypto_aead *tfm, const uint8_t *key,
+static int qat_alg_aead_setkey(struct crypto_aead *tfm, const u8 *key,
                               unsigned int keylen)
 {
        struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
@@ -820,7 +778,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
        struct qat_alg_aead_ctx *ctx = qat_req->aead_ctx;
        struct qat_crypto_instance *inst = ctx->inst;
        struct aead_request *areq = qat_req->aead_req;
-       uint8_t stat_filed = qat_resp->comn_resp.comn_status;
+       u8 stat_filed = qat_resp->comn_resp.comn_status;
        int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
 
        qat_alg_free_bufl(inst, qat_req);
@@ -835,7 +793,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
        struct qat_alg_skcipher_ctx *ctx = qat_req->skcipher_ctx;
        struct qat_crypto_instance *inst = ctx->inst;
        struct skcipher_request *sreq = qat_req->skcipher_req;
-       uint8_t stat_filed = qat_resp->comn_resp.comn_status;
+       u8 stat_filed = qat_resp->comn_resp.comn_status;
        struct device *dev = &GET_DEV(ctx->inst->accel_dev);
        int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
 
@@ -880,18 +838,18 @@ static int qat_alg_aead_dec(struct aead_request *areq)
        qat_req->aead_ctx = ctx;
        qat_req->aead_req = areq;
        qat_req->cb = qat_aead_alg_callback;
-       qat_req->req.comn_mid.opaque_data = (uint64_t)(__force long)qat_req;
+       qat_req->req.comn_mid.opaque_data = (u64)(__force long)qat_req;
        qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp;
        qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp;
        cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
        cipher_param->cipher_length = areq->cryptlen - digst_size;
        cipher_param->cipher_offset = areq->assoclen;
        memcpy(cipher_param->u.cipher_IV_array, areq->iv, AES_BLOCK_SIZE);
-       auth_param = (void *)((uint8_t *)cipher_param + sizeof(*cipher_param));
+       auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param));
        auth_param->auth_off = 0;
        auth_param->auth_len = areq->assoclen + cipher_param->cipher_length;
        do {
-               ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
        } while (ret == -EAGAIN && ctr++ < 10);
 
        if (ret == -EAGAIN) {
@@ -910,7 +868,7 @@ static int qat_alg_aead_enc(struct aead_request *areq)
        struct icp_qat_fw_la_cipher_req_params *cipher_param;
        struct icp_qat_fw_la_auth_req_params *auth_param;
        struct icp_qat_fw_la_bulk_req *msg;
-       uint8_t *iv = areq->iv;
+       u8 *iv = areq->iv;
        int ret, ctr = 0;
 
        ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
@@ -922,11 +880,11 @@ static int qat_alg_aead_enc(struct aead_request *areq)
        qat_req->aead_ctx = ctx;
        qat_req->aead_req = areq;
        qat_req->cb = qat_aead_alg_callback;
-       qat_req->req.comn_mid.opaque_data = (uint64_t)(__force long)qat_req;
+       qat_req->req.comn_mid.opaque_data = (u64)(__force long)qat_req;
        qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp;
        qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp;
        cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
-       auth_param = (void *)((uint8_t *)cipher_param + sizeof(*cipher_param));
+       auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param));
 
        memcpy(cipher_param->u.cipher_IV_array, iv, AES_BLOCK_SIZE);
        cipher_param->cipher_length = areq->cryptlen;
@@ -936,7 +894,7 @@ static int qat_alg_aead_enc(struct aead_request *areq)
        auth_param->auth_len = areq->assoclen + areq->cryptlen;
 
        do {
-               ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
        } while (ret == -EAGAIN && ctr++ < 10);
 
        if (ret == -EAGAIN) {
@@ -1038,6 +996,25 @@ static int qat_alg_skcipher_ctr_setkey(struct crypto_skcipher *tfm,
 static int qat_alg_skcipher_xts_setkey(struct crypto_skcipher *tfm,
                                       const u8 *key, unsigned int keylen)
 {
+       struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       int ret;
+
+       ret = xts_verify_key(tfm, key, keylen);
+       if (ret)
+               return ret;
+
+       if (keylen >> 1 == AES_KEYSIZE_192) {
+               ret = crypto_skcipher_setkey(ctx->ftfm, key, keylen);
+               if (ret)
+                       return ret;
+
+               ctx->fallback = true;
+
+               return 0;
+       }
+
+       ctx->fallback = false;
+
        return qat_alg_skcipher_setkey(tfm, key, keylen,
                                       ICP_QAT_HW_CIPHER_XTS_MODE);
 }
@@ -1073,7 +1050,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
        qat_req->skcipher_ctx = ctx;
        qat_req->skcipher_req = req;
        qat_req->cb = qat_skcipher_alg_callback;
-       qat_req->req.comn_mid.opaque_data = (uint64_t)(__force long)qat_req;
+       qat_req->req.comn_mid.opaque_data = (u64)(__force long)qat_req;
        qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp;
        qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp;
        cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
@@ -1082,7 +1059,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
        cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr;
        memcpy(qat_req->iv, req->iv, AES_BLOCK_SIZE);
        do {
-               ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
        } while (ret == -EAGAIN && ctr++ < 10);
 
        if (ret == -EAGAIN) {
@@ -1102,6 +1079,24 @@ static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req)
        return qat_alg_skcipher_encrypt(req);
 }
 
+static int qat_alg_skcipher_xts_encrypt(struct skcipher_request *req)
+{
+       struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+       struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(stfm);
+       struct skcipher_request *nreq = skcipher_request_ctx(req);
+
+       if (req->cryptlen < XTS_BLOCK_SIZE)
+               return -EINVAL;
+
+       if (ctx->fallback) {
+               memcpy(nreq, req, sizeof(*req));
+               skcipher_request_set_tfm(nreq, ctx->ftfm);
+               return crypto_skcipher_encrypt(nreq);
+       }
+
+       return qat_alg_skcipher_encrypt(req);
+}
+
 static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 {
        struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
@@ -1133,7 +1128,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
        qat_req->skcipher_ctx = ctx;
        qat_req->skcipher_req = req;
        qat_req->cb = qat_skcipher_alg_callback;
-       qat_req->req.comn_mid.opaque_data = (uint64_t)(__force long)qat_req;
+       qat_req->req.comn_mid.opaque_data = (u64)(__force long)qat_req;
        qat_req->req.comn_mid.src_data_addr = qat_req->buf.blp;
        qat_req->req.comn_mid.dest_data_addr = qat_req->buf.bloutp;
        cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
@@ -1142,7 +1137,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
        cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr;
        memcpy(qat_req->iv, req->iv, AES_BLOCK_SIZE);
        do {
-               ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
        } while (ret == -EAGAIN && ctr++ < 10);
 
        if (ret == -EAGAIN) {
@@ -1161,6 +1156,25 @@ static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req)
 
        return qat_alg_skcipher_decrypt(req);
 }
+
+static int qat_alg_skcipher_xts_decrypt(struct skcipher_request *req)
+{
+       struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
+       struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(stfm);
+       struct skcipher_request *nreq = skcipher_request_ctx(req);
+
+       if (req->cryptlen < XTS_BLOCK_SIZE)
+               return -EINVAL;
+
+       if (ctx->fallback) {
+               memcpy(nreq, req, sizeof(*req));
+               skcipher_request_set_tfm(nreq, ctx->ftfm);
+               return crypto_skcipher_decrypt(nreq);
+       }
+
+       return qat_alg_skcipher_decrypt(req);
+}
+
 static int qat_alg_aead_init(struct crypto_aead *tfm,
                             enum icp_qat_hw_auth_algo hash,
                             const char *hash_name)
@@ -1216,11 +1230,26 @@ static void qat_alg_aead_exit(struct crypto_aead *tfm)
 }
 
 static int qat_alg_skcipher_init_tfm(struct crypto_skcipher *tfm)
+{
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct qat_crypto_request));
+       return 0;
+}
+
+static int qat_alg_skcipher_init_xts_tfm(struct crypto_skcipher *tfm)
 {
        struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       int reqsize;
+
+       ctx->ftfm = crypto_alloc_skcipher("xts(aes)", 0,
+                                         CRYPTO_ALG_NEED_FALLBACK);
+       if (IS_ERR(ctx->ftfm))
+               return PTR_ERR(ctx->ftfm);
+
+       reqsize = max(sizeof(struct qat_crypto_request),
+                     sizeof(struct skcipher_request) +
+                     crypto_skcipher_reqsize(ctx->ftfm));
+       crypto_skcipher_set_reqsize(tfm, reqsize);
 
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct qat_crypto_request));
-       ctx->tfm = tfm;
        return 0;
 }
 
@@ -1251,13 +1280,22 @@ static void qat_alg_skcipher_exit_tfm(struct crypto_skcipher *tfm)
        qat_crypto_put_instance(inst);
 }
 
+static void qat_alg_skcipher_exit_xts_tfm(struct crypto_skcipher *tfm)
+{
+       struct qat_alg_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       if (ctx->ftfm)
+               crypto_free_skcipher(ctx->ftfm);
+
+       qat_alg_skcipher_exit_tfm(tfm);
+}
 
 static struct aead_alg qat_aeads[] = { {
        .base = {
                .cra_name = "authenc(hmac(sha1),cbc(aes))",
                .cra_driver_name = "qat_aes_cbc_hmac_sha1",
                .cra_priority = 4001,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct qat_alg_aead_ctx),
                .cra_module = THIS_MODULE,
@@ -1274,7 +1312,7 @@ static struct aead_alg qat_aeads[] = { {
                .cra_name = "authenc(hmac(sha256),cbc(aes))",
                .cra_driver_name = "qat_aes_cbc_hmac_sha256",
                .cra_priority = 4001,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct qat_alg_aead_ctx),
                .cra_module = THIS_MODULE,
@@ -1291,7 +1329,7 @@ static struct aead_alg qat_aeads[] = { {
                .cra_name = "authenc(hmac(sha512),cbc(aes))",
                .cra_driver_name = "qat_aes_cbc_hmac_sha512",
                .cra_priority = 4001,
-               .cra_flags = CRYPTO_ALG_ASYNC,
+               .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
                .cra_blocksize = AES_BLOCK_SIZE,
                .cra_ctxsize = sizeof(struct qat_alg_aead_ctx),
                .cra_module = THIS_MODULE,
@@ -1309,7 +1347,7 @@ static struct skcipher_alg qat_skciphers[] = { {
        .base.cra_name = "cbc(aes)",
        .base.cra_driver_name = "qat_aes_cbc",
        .base.cra_priority = 4001,
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = AES_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx),
        .base.cra_alignmask = 0,
@@ -1327,7 +1365,7 @@ static struct skcipher_alg qat_skciphers[] = { {
        .base.cra_name = "ctr(aes)",
        .base.cra_driver_name = "qat_aes_ctr",
        .base.cra_priority = 4001,
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = 1,
        .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx),
        .base.cra_alignmask = 0,
@@ -1345,17 +1383,18 @@ static struct skcipher_alg qat_skciphers[] = { {
        .base.cra_name = "xts(aes)",
        .base.cra_driver_name = "qat_aes_xts",
        .base.cra_priority = 4001,
-       .base.cra_flags = CRYPTO_ALG_ASYNC,
+       .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK |
+                         CRYPTO_ALG_ALLOCATES_MEMORY,
        .base.cra_blocksize = AES_BLOCK_SIZE,
        .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx),
        .base.cra_alignmask = 0,
        .base.cra_module = THIS_MODULE,
 
-       .init = qat_alg_skcipher_init_tfm,
-       .exit = qat_alg_skcipher_exit_tfm,
+       .init = qat_alg_skcipher_init_xts_tfm,
+       .exit = qat_alg_skcipher_exit_xts_tfm,
        .setkey = qat_alg_skcipher_xts_setkey,
-       .decrypt = qat_alg_skcipher_blk_decrypt,
-       .encrypt = qat_alg_skcipher_blk_encrypt,
+       .decrypt = qat_alg_skcipher_xts_decrypt,
+       .encrypt = qat_alg_skcipher_xts_encrypt,
        .min_keysize = 2 * AES_MIN_KEY_SIZE,
        .max_keysize = 2 * AES_MAX_KEY_SIZE,
        .ivsize = AES_BLOCK_SIZE,
index 692a7aa..846569e 100644 (file)
@@ -1,50 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in
-         the documentation and/or other materials provided with the
-         distribution.
-       * Neither the name of Intel Corporation nor the names of its
-         contributors may be used to endorse or promote products derived
-         from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/module.h>
 #include <crypto/internal/rsa.h>
 #include <crypto/internal/akcipher.h>
@@ -384,12 +339,12 @@ static int qat_dh_compute_value(struct kpp_request *req)
 
        msg->pke_mid.src_data_addr = qat_req->phy_in;
        msg->pke_mid.dest_data_addr = qat_req->phy_out;
-       msg->pke_mid.opaque = (uint64_t)(__force long)qat_req;
+       msg->pke_mid.opaque = (u64)(__force long)qat_req;
        msg->input_param_count = n_input_params;
        msg->output_param_count = 1;
 
        do {
-               ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
        } while (ret == -EBUSY && ctr++ < 100);
 
        if (!ret)
@@ -779,11 +734,11 @@ static int qat_rsa_enc(struct akcipher_request *req)
 
        msg->pke_mid.src_data_addr = qat_req->phy_in;
        msg->pke_mid.dest_data_addr = qat_req->phy_out;
-       msg->pke_mid.opaque = (uint64_t)(__force long)qat_req;
+       msg->pke_mid.opaque = (u64)(__force long)qat_req;
        msg->input_param_count = 3;
        msg->output_param_count = 1;
        do {
-               ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
        } while (ret == -EBUSY && ctr++ < 100);
 
        if (!ret)
@@ -927,7 +882,7 @@ static int qat_rsa_dec(struct akcipher_request *req)
 
        msg->pke_mid.src_data_addr = qat_req->phy_in;
        msg->pke_mid.dest_data_addr = qat_req->phy_out;
-       msg->pke_mid.opaque = (uint64_t)(__force long)qat_req;
+       msg->pke_mid.opaque = (u64)(__force long)qat_req;
        if (ctx->crt_mode)
                msg->input_param_count = 6;
        else
@@ -935,7 +890,7 @@ static int qat_rsa_dec(struct akcipher_request *req)
 
        msg->output_param_count = 1;
        do {
-               ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
+               ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
        } while (ret == -EBUSY && ctr++ < 100);
 
        if (!ret)
index fb504ce..ab621b7 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/module.h>
 #include <linux/slab.h>
 #include "adf_accel_devices.h"
index 300bb91..12682d1 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef _QAT_CRYPTO_INSTANCE_H_
 #define _QAT_CRYPTO_INSTANCE_H_
 
index ff149e1..fa467e0 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/slab.h>
 #include <linux/delay.h>
 
 
 #define AE(handle, ae) handle->hal_handle->aes[ae]
 
-static const uint64_t inst_4b[] = {
+static const u64 inst_4b[] = {
        0x0F0400C0000ull, 0x0F4400C0000ull, 0x0F040000300ull, 0x0F440000300ull,
        0x0FC066C0000ull, 0x0F0000C0300ull, 0x0F0000C0300ull, 0x0F0000C0300ull,
        0x0A021000000ull
 };
 
-static const uint64_t inst[] = {
+static const u64 inst[] = {
        0x0F0000C0000ull, 0x0F000000380ull, 0x0D805000011ull, 0x0FC082C0300ull,
        0x0F0000C0300ull, 0x0F0000C0300ull, 0x0F0000C0300ull, 0x0F0000C0300ull,
        0x0A0643C0000ull, 0x0BAC0000301ull, 0x0D802000101ull, 0x0F0000C0001ull,
@@ -546,7 +502,7 @@ static void qat_hal_disable_ctx(struct icp_qat_fw_loader_handle *handle,
        qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx);
 }
 
-static uint64_t qat_hal_parity_64bit(uint64_t word)
+static u64 qat_hal_parity_64bit(u64 word)
 {
        word ^= word >> 1;
        word ^= word >> 2;
@@ -557,9 +513,9 @@ static uint64_t qat_hal_parity_64bit(uint64_t word)
        return word & 1;
 }
 
-static uint64_t qat_hal_set_uword_ecc(uint64_t uword)
+static u64 qat_hal_set_uword_ecc(u64 uword)
 {
-       uint64_t bit0_mask = 0xff800007fffULL, bit1_mask = 0x1f801ff801fULL,
+       u64 bit0_mask = 0xff800007fffULL, bit1_mask = 0x1f801ff801fULL,
                bit2_mask = 0xe387e0781e1ULL, bit3_mask = 0x7cb8e388e22ULL,
                bit4_mask = 0xaf5b2c93244ULL, bit5_mask = 0xf56d5525488ULL,
                bit6_mask = 0xdaf69a46910ULL;
@@ -578,7 +534,7 @@ static uint64_t qat_hal_set_uword_ecc(uint64_t uword)
 
 void qat_hal_wr_uwords(struct icp_qat_fw_loader_handle *handle,
                       unsigned char ae, unsigned int uaddr,
-                      unsigned int words_num, uint64_t *uword)
+                      unsigned int words_num, u64 *uword)
 {
        unsigned int ustore_addr;
        unsigned int i;
@@ -588,7 +544,7 @@ void qat_hal_wr_uwords(struct icp_qat_fw_loader_handle *handle,
        qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
        for (i = 0; i < words_num; i++) {
                unsigned int uwrd_lo, uwrd_hi;
-               uint64_t tmp;
+               u64 tmp;
 
                tmp = qat_hal_set_uword_ecc(uword[i]);
                uwrd_lo = (unsigned int)(tmp & 0xffffffff);
@@ -644,7 +600,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
                csr_val |= CE_NN_MODE;
                qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, csr_val);
                qat_hal_wr_uwords(handle, ae, 0, ARRAY_SIZE(inst),
-                                 (uint64_t *)inst);
+                                 (u64 *)inst);
                qat_hal_wr_indr_csr(handle, ae, ctx_mask, CTX_STS_INDIRECT,
                                    handle->hal_handle->upc_mask &
                                    INIT_PC_VALUE);
@@ -821,7 +777,7 @@ void qat_hal_set_pc(struct icp_qat_fw_loader_handle *handle,
 
 static void qat_hal_get_uwords(struct icp_qat_fw_loader_handle *handle,
                               unsigned char ae, unsigned int uaddr,
-                              unsigned int words_num, uint64_t *uword)
+                              unsigned int words_num, u64 *uword)
 {
        unsigned int i, uwrd_lo, uwrd_hi;
        unsigned int ustore_addr, misc_control;
@@ -871,11 +827,11 @@ void qat_hal_wr_umem(struct icp_qat_fw_loader_handle *handle,
 #define MAX_EXEC_INST 100
 static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle,
                                   unsigned char ae, unsigned char ctx,
-                                  uint64_t *micro_inst, unsigned int inst_num,
+                                  u64 *micro_inst, unsigned int inst_num,
                                   int code_off, unsigned int max_cycle,
                                   unsigned int *endpc)
 {
-       uint64_t savuwords[MAX_EXEC_INST];
+       u64 savuwords[MAX_EXEC_INST];
        unsigned int ind_lm_addr0, ind_lm_addr1;
        unsigned int ind_lm_addr_byte0, ind_lm_addr_byte1;
        unsigned int ind_cnt_sig;
@@ -972,7 +928,7 @@ static int qat_hal_rd_rel_reg(struct icp_qat_fw_loader_handle *handle,
        unsigned int ctxarb_cntl, ustore_addr, ctx_enables;
        unsigned short reg_addr;
        int status = 0;
-       uint64_t insts, savuword;
+       u64 insts, savuword;
 
        reg_addr = qat_hal_get_reg_addr(reg_type, reg_num);
        if (reg_addr == BAD_REGADDR) {
@@ -984,7 +940,7 @@ static int qat_hal_rd_rel_reg(struct icp_qat_fw_loader_handle *handle,
                insts = 0xA070000000ull | (reg_addr & 0x3ff);
                break;
        default:
-               insts = (uint64_t)0xA030000000ull | ((reg_addr & 0x3ff) << 10);
+               insts = (u64)0xA030000000ull | ((reg_addr & 0x3ff) << 10);
                break;
        }
        savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
@@ -1030,7 +986,7 @@ static int qat_hal_wr_rel_reg(struct icp_qat_fw_loader_handle *handle,
                              unsigned short reg_num, unsigned int data)
 {
        unsigned short src_hiaddr, src_lowaddr, dest_addr, data16hi, data16lo;
-       uint64_t insts[] = {
+       u64 insts[] = {
                0x0F440000000ull,
                0x0F040000000ull,
                0x0F0000C0300ull,
@@ -1076,13 +1032,13 @@ int qat_hal_get_ins_num(void)
        return ARRAY_SIZE(inst_4b);
 }
 
-static int qat_hal_concat_micro_code(uint64_t *micro_inst,
+static int qat_hal_concat_micro_code(u64 *micro_inst,
                                     unsigned int inst_num, unsigned int size,
                                     unsigned int addr, unsigned int *value)
 {
        int i;
        unsigned int cur_value;
-       const uint64_t *inst_arr;
+       const u64 *inst_arr;
        int fixup_offset;
        int usize = 0;
        int orig_num;
@@ -1107,7 +1063,7 @@ static int qat_hal_concat_micro_code(uint64_t *micro_inst,
 
 static int qat_hal_exec_micro_init_lm(struct icp_qat_fw_loader_handle *handle,
                                      unsigned char ae, unsigned char ctx,
-                                     int *pfirst_exec, uint64_t *micro_inst,
+                                     int *pfirst_exec, u64 *micro_inst,
                                      unsigned int inst_num)
 {
        int stat = 0;
@@ -1140,7 +1096,7 @@ int qat_hal_batch_wr_lm(struct icp_qat_fw_loader_handle *handle,
                        struct icp_qat_uof_batch_init *lm_init_header)
 {
        struct icp_qat_uof_batch_init *plm_init;
-       uint64_t *micro_inst_arry;
+       u64 *micro_inst_arry;
        int micro_inst_num;
        int alloc_inst_size;
        int first_exec = 1;
@@ -1150,7 +1106,7 @@ int qat_hal_batch_wr_lm(struct icp_qat_fw_loader_handle *handle,
        alloc_inst_size = lm_init_header->size;
        if ((unsigned int)alloc_inst_size > handle->hal_handle->max_ustore)
                alloc_inst_size = handle->hal_handle->max_ustore;
-       micro_inst_arry = kmalloc_array(alloc_inst_size, sizeof(uint64_t),
+       micro_inst_arry = kmalloc_array(alloc_inst_size, sizeof(u64),
                                        GFP_KERNEL);
        if (!micro_inst_arry)
                return -ENOMEM;
@@ -1229,7 +1185,7 @@ static int qat_hal_put_rel_wr_xfer(struct icp_qat_fw_loader_handle *handle,
            data16low;
        unsigned short reg_mask;
        int status = 0;
-       uint64_t micro_inst[] = {
+       u64 micro_inst[] = {
                0x0F440000000ull,
                0x0F040000000ull,
                0x0A000000000ull,
index 6bd8f6a..bff759e 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/slab.h>
 #include <linux/ctype.h>
 #include <linux/kernel.h>
@@ -332,13 +288,18 @@ static int qat_uclo_create_batch_init_list(struct icp_qat_fw_loader_handle
        }
        return 0;
 out_err:
+       /* Do not free the list head unless we allocated it. */
+       tail_old = tail_old->next;
+       if (flag) {
+               kfree(*init_tab_base);
+               *init_tab_base = NULL;
+       }
+
        while (tail_old) {
                mem_init = tail_old->next;
                kfree(tail_old);
                tail_old = mem_init;
        }
-       if (flag)
-               kfree(*init_tab_base);
        return -ENOMEM;
 }
 
@@ -411,16 +372,16 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle,
        unsigned int ustore_size;
        unsigned int patt_pos;
        struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle;
-       uint64_t *fill_data;
+       u64 *fill_data;
 
        uof_image = image->img_ptr;
-       fill_data = kcalloc(ICP_QAT_UCLO_MAX_USTORE, sizeof(uint64_t),
+       fill_data = kcalloc(ICP_QAT_UCLO_MAX_USTORE, sizeof(u64),
                            GFP_KERNEL);
        if (!fill_data)
                return -ENOMEM;
        for (i = 0; i < ICP_QAT_UCLO_MAX_USTORE; i++)
                memcpy(&fill_data[i], &uof_image->fill_pattern,
-                      sizeof(uint64_t));
+                      sizeof(u64));
        page = image->page;
 
        for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
@@ -981,7 +942,7 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
                pr_err("QAT: UOF incompatible\n");
                return -EINVAL;
        }
-       obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(uint64_t),
+       obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(u64),
                                        GFP_KERNEL);
        if (!obj_handle->uword_buf)
                return -ENOMEM;
@@ -1185,7 +1146,7 @@ static int qat_uclo_map_suof(struct icp_qat_fw_loader_handle *handle,
        return 0;
 }
 
-#define ADD_ADDR(high, low)  ((((uint64_t)high) << 32) + low)
+#define ADD_ADDR(high, low)  ((((u64)high) << 32) + low)
 #define BITS_IN_DWORD 32
 
 static int qat_uclo_auth_fw(struct icp_qat_fw_loader_handle *handle,
@@ -1514,10 +1475,10 @@ void qat_uclo_del_uof_obj(struct icp_qat_fw_loader_handle *handle)
 
 static void qat_uclo_fill_uwords(struct icp_qat_uclo_objhandle *obj_handle,
                                 struct icp_qat_uclo_encap_page *encap_page,
-                                uint64_t *uword, unsigned int addr_p,
-                                unsigned int raddr, uint64_t fill)
+                                u64 *uword, unsigned int addr_p,
+                                unsigned int raddr, u64 fill)
 {
-       uint64_t uwrd = 0;
+       u64 uwrd = 0;
        unsigned int i;
 
        if (!encap_page) {
@@ -1547,12 +1508,12 @@ static void qat_uclo_wr_uimage_raw_page(struct icp_qat_fw_loader_handle *handle,
 {
        unsigned int uw_physical_addr, uw_relative_addr, i, words_num, cpylen;
        struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle;
-       uint64_t fill_pat;
+       u64 fill_pat;
 
        /* load the page starting at appropriate ustore address */
        /* get fill-pattern from an image -- they are all the same */
        memcpy(&fill_pat, obj_handle->ae_uimage[0].img_ptr->fill_pattern,
-              sizeof(uint64_t));
+              sizeof(u64));
        uw_physical_addr = encap_page->beg_addr_p;
        uw_relative_addr = 0;
        words_num = encap_page->micro_words_num;
index 1dfcab3..b975c26 100644 (file)
@@ -1,62 +1,18 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_pf2vf_msg.h>
 #include <adf_common_drv.h>
 #include "adf_dh895xcc_hw_data.h"
 
 /* Worker thread to service arbiter mappings based on dev SKUs */
-static const uint32_t thrd_to_arb_map_sku4[] = {
+static const u32 thrd_to_arb_map_sku4[] = {
        0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666,
        0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222,
        0x00000000, 0x00000000, 0x00000000, 0x00000000
 };
 
-static const uint32_t thrd_to_arb_map_sku6[] = {
+static const u32 thrd_to_arb_map_sku6[] = {
        0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666,
        0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222,
        0x12222AAA, 0x11222222, 0x12222AAA, 0x11222222
@@ -68,20 +24,20 @@ static struct adf_hw_device_class dh895xcc_class = {
        .instances = 0
 };
 
-static uint32_t get_accel_mask(uint32_t fuse)
+static u32 get_accel_mask(u32 fuse)
 {
        return (~fuse) >> ADF_DH895XCC_ACCELERATORS_REG_OFFSET &
                          ADF_DH895XCC_ACCELERATORS_MASK;
 }
 
-static uint32_t get_ae_mask(uint32_t fuse)
+static u32 get_ae_mask(u32 fuse)
 {
        return (~fuse) & ADF_DH895XCC_ACCELENGINES_MASK;
 }
 
-static uint32_t get_num_accels(struct adf_hw_device_data *self)
+static u32 get_num_accels(struct adf_hw_device_data *self)
 {
-       uint32_t i, ctr = 0;
+       u32 i, ctr = 0;
 
        if (!self || !self->accel_mask)
                return 0;
@@ -93,9 +49,9 @@ static uint32_t get_num_accels(struct adf_hw_device_data *self)
        return ctr;
 }
 
-static uint32_t get_num_aes(struct adf_hw_device_data *self)
+static u32 get_num_aes(struct adf_hw_device_data *self)
 {
-       uint32_t i, ctr = 0;
+       u32 i, ctr = 0;
 
        if (!self || !self->ae_mask)
                return 0;
@@ -107,17 +63,17 @@ static uint32_t get_num_aes(struct adf_hw_device_data *self)
        return ctr;
 }
 
-static uint32_t get_misc_bar_id(struct adf_hw_device_data *self)
+static u32 get_misc_bar_id(struct adf_hw_device_data *self)
 {
        return ADF_DH895XCC_PMISC_BAR;
 }
 
-static uint32_t get_etr_bar_id(struct adf_hw_device_data *self)
+static u32 get_etr_bar_id(struct adf_hw_device_data *self)
 {
        return ADF_DH895XCC_ETR_BAR;
 }
 
-static uint32_t get_sram_bar_id(struct adf_hw_device_data *self)
+static u32 get_sram_bar_id(struct adf_hw_device_data *self)
 {
        return ADF_DH895XCC_SRAM_BAR;
 }
@@ -161,12 +117,12 @@ static void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev,
        }
 }
 
-static uint32_t get_pf2vf_offset(uint32_t i)
+static u32 get_pf2vf_offset(u32 i)
 {
        return ADF_DH895XCC_PF2VF_OFFSET(i);
 }
 
-static uint32_t get_vintmsk_offset(uint32_t i)
+static u32 get_vintmsk_offset(u32 i)
 {
        return ADF_DH895XCC_VINTMSK_OFFSET(i);
 }
index 092f735..082a044 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #ifndef ADF_DH895x_HW_DATA_H_
 #define ADF_DH895x_HW_DATA_H_
 
index b11bf8c..4e877b7 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
index a3b4dd8..5246f05 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_pf2vf_msg.h>
 #include <adf_common_drv.h>
index 6ddc19b..2bfcc67 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2015 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2015 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2015 - 2020 Intel Corporation */
 #ifndef ADF_DH895XVF_HW_DATA_H_
 #define ADF_DH895XVF_HW_DATA_H_
 
index 1b762ee..7d6e1db 100644 (file)
@@ -1,49 +1,5 @@
-/*
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-  Copyright(c) 2014 Intel Corporation.
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  Contact Information:
-  qat-linux@intel.com
-
-  BSD LICENSE
-  Copyright(c) 2014 Intel Corporation.
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2014 - 2020 Intel Corporation */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
index 7770660..cffa9fc 100644 (file)
@@ -14,7 +14,7 @@
 struct qce_cipher_ctx {
        u8 enc_key[QCE_MAX_KEY_SIZE];
        unsigned int enc_keylen;
-       struct crypto_sync_skcipher *fallback;
+       struct crypto_skcipher *fallback;
 };
 
 /**
@@ -43,6 +43,7 @@ struct qce_cipher_reqctx {
        struct sg_table src_tbl;
        struct scatterlist *src_sg;
        unsigned int cryptlen;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 static inline struct qce_alg_template *to_cipher_tmpl(struct crypto_skcipher *tfm)
index 9f989cb..85ba164 100644 (file)
@@ -87,6 +87,8 @@ struct qce_alg_template {
                struct ahash_alg ahash;
        } alg;
        struct qce_device *qce;
+       const u8 *hash_zero;
+       const u32 digest_size;
 };
 
 void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len);
index 1ab62e7..c230843 100644 (file)
@@ -203,10 +203,18 @@ static int qce_import_common(struct ahash_request *req, u64 in_count,
 
 static int qce_ahash_import(struct ahash_request *req, const void *in)
 {
-       struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
-       unsigned long flags = rctx->flags;
-       bool hmac = IS_SHA_HMAC(flags);
-       int ret = -EINVAL;
+       struct qce_sha_reqctx *rctx;
+       unsigned long flags;
+       bool hmac;
+       int ret;
+
+       ret = qce_ahash_init(req);
+       if (ret)
+               return ret;
+
+       rctx = ahash_request_ctx(req);
+       flags = rctx->flags;
+       hmac = IS_SHA_HMAC(flags);
 
        if (IS_SHA1(flags) || IS_SHA1_HMAC(flags)) {
                const struct sha1_state *state = in;
@@ -284,8 +292,6 @@ static int qce_ahash_update(struct ahash_request *req)
        if (!sg_last)
                return -EINVAL;
 
-       sg_mark_end(sg_last);
-
        if (rctx->buflen) {
                sg_init_table(rctx->sg, 2);
                sg_set_buf(rctx->sg, rctx->tmpbuf, rctx->buflen);
@@ -305,8 +311,12 @@ static int qce_ahash_final(struct ahash_request *req)
        struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm);
        struct qce_device *qce = tmpl->qce;
 
-       if (!rctx->buflen)
+       if (!rctx->buflen) {
+               if (tmpl->hash_zero)
+                       memcpy(req->result, tmpl->hash_zero,
+                                       tmpl->alg.ahash.halg.digestsize);
                return 0;
+       }
 
        rctx->last_blk = true;
 
@@ -338,6 +348,13 @@ static int qce_ahash_digest(struct ahash_request *req)
        rctx->first_blk = true;
        rctx->last_blk = true;
 
+       if (!rctx->nbytes_orig) {
+               if (tmpl->hash_zero)
+                       memcpy(req->result, tmpl->hash_zero,
+                                       tmpl->alg.ahash.halg.digestsize);
+               return 0;
+       }
+
        return qce->async_req_enqueue(tmpl->qce, &req->base);
 }
 
@@ -490,6 +507,11 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def,
        alg->halg.digestsize = def->digestsize;
        alg->halg.statesize = def->statesize;
 
+       if (IS_SHA1(def->flags))
+               tmpl->hash_zero = sha1_zero_message_hash;
+       else if (IS_SHA256(def->flags))
+               tmpl->hash_zero = sha256_zero_message_hash;
+
        base = &alg->halg.base;
        base->cra_blocksize = def->blocksize;
        base->cra_priority = 300;
index 9412433..5630c5a 100644 (file)
@@ -178,7 +178,7 @@ static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key,
                break;
        }
 
-       ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+       ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
        if (!ret)
                ctx->enc_keylen = keylen;
        return ret;
@@ -235,16 +235,15 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt)
              req->cryptlen <= aes_sw_max_len) ||
             (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE &&
              req->cryptlen % QCE_SECTOR_SIZE))) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-               skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-               skcipher_request_set_callback(subreq, req->base.flags,
-                                             NULL, NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
-               ret = encrypt ? crypto_skcipher_encrypt(subreq) :
-                               crypto_skcipher_decrypt(subreq);
-               skcipher_request_zero(subreq);
+               skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
+               ret = encrypt ? crypto_skcipher_encrypt(&rctx->fallback_req) :
+                               crypto_skcipher_decrypt(&rctx->fallback_req);
                return ret;
        }
 
@@ -263,10 +262,9 @@ static int qce_skcipher_decrypt(struct skcipher_request *req)
 
 static int qce_skcipher_init(struct crypto_skcipher *tfm)
 {
-       struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-
-       memset(ctx, 0, sizeof(*ctx));
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct qce_cipher_reqctx));
+       /* take the size without the fallback skcipher_request at the end */
+       crypto_skcipher_set_reqsize(tfm, offsetof(struct qce_cipher_reqctx,
+                                                 fallback_req));
        return 0;
 }
 
@@ -274,17 +272,21 @@ static int qce_skcipher_init_fallback(struct crypto_skcipher *tfm)
 {
        struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       qce_skcipher_init(tfm);
-       ctx->fallback = crypto_alloc_sync_skcipher(crypto_tfm_alg_name(&tfm->base),
-                                                  0, CRYPTO_ALG_NEED_FALLBACK);
-       return PTR_ERR_OR_ZERO(ctx->fallback);
+       ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base),
+                                             0, CRYPTO_ALG_NEED_FALLBACK);
+       if (IS_ERR(ctx->fallback))
+               return PTR_ERR(ctx->fallback);
+
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct qce_cipher_reqctx) +
+                                        crypto_skcipher_reqsize(ctx->fallback));
+       return 0;
 }
 
 static void qce_skcipher_exit(struct crypto_skcipher *tfm)
 {
        struct qce_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       crypto_free_sync_skcipher(ctx->fallback);
+       crypto_free_skcipher(ctx->fallback);
 }
 
 struct qce_skcipher_def {
@@ -404,6 +406,7 @@ static int qce_skcipher_register_one(const struct qce_skcipher_def *def,
 
        alg->base.cra_priority          = 300;
        alg->base.cra_flags             = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY |
                                          CRYPTO_ALG_KERN_DRIVER_ONLY;
        alg->base.cra_ctxsize           = sizeof(struct qce_cipher_ctx);
        alg->base.cra_alignmask         = 0;
diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
new file mode 100644 (file)
index 0000000..5bc0990
--- /dev/null
@@ -0,0 +1,2420 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 SA2UL crypto accelerator driver
+ *
+ * Copyright (C) 2018-2020 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors:    Keerthy
+ *             Vitaly Andrianov
+ *             Tero Kristo
+ */
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <crypto/aes.h>
+#include <crypto/authenc.h>
+#include <crypto/des.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+
+#include "sa2ul.h"
+
+/* Byte offset for key in encryption security context */
+#define SC_ENC_KEY_OFFSET (1 + 27 + 4)
+/* Byte offset for Aux-1 in encryption security context */
+#define SC_ENC_AUX1_OFFSET (1 + 27 + 4 + 32)
+
+#define SA_CMDL_UPD_ENC         0x0001
+#define SA_CMDL_UPD_AUTH        0x0002
+#define SA_CMDL_UPD_ENC_IV      0x0004
+#define SA_CMDL_UPD_AUTH_IV     0x0008
+#define SA_CMDL_UPD_AUX_KEY     0x0010
+
+#define SA_AUTH_SUBKEY_LEN     16
+#define SA_CMDL_PAYLOAD_LENGTH_MASK    0xFFFF
+#define SA_CMDL_SOP_BYPASS_LEN_MASK    0xFF000000
+
+#define MODE_CONTROL_BYTES     27
+#define SA_HASH_PROCESSING     0
+#define SA_CRYPTO_PROCESSING   0
+#define SA_UPLOAD_HASH_TO_TLR  BIT(6)
+
+#define SA_SW0_FLAGS_MASK      0xF0000
+#define SA_SW0_CMDL_INFO_MASK  0x1F00000
+#define SA_SW0_CMDL_PRESENT    BIT(4)
+#define SA_SW0_ENG_ID_MASK     0x3E000000
+#define SA_SW0_DEST_INFO_PRESENT       BIT(30)
+#define SA_SW2_EGRESS_LENGTH           0xFF000000
+#define SA_BASIC_HASH          0x10
+
+#define SHA256_DIGEST_WORDS    8
+/* Make 32-bit word from 4 bytes */
+#define SA_MK_U32(b0, b1, b2, b3) (((b0) << 24) | ((b1) << 16) | \
+                                  ((b2) << 8) | (b3))
+
+/* size of SCCTL structure in bytes */
+#define SA_SCCTL_SZ 16
+
+/* Max Authentication tag size */
+#define SA_MAX_AUTH_TAG_SZ 64
+
+#define PRIV_ID        0x1
+#define PRIV   0x1
+
+static struct device *sa_k3_dev;
+
+/**
+ * struct sa_cmdl_cfg - Command label configuration descriptor
+ * @aalg: authentication algorithm ID
+ * @enc_eng_id: Encryption Engine ID supported by the SA hardware
+ * @auth_eng_id: Authentication Engine ID
+ * @iv_size: Initialization Vector size
+ * @akey: Authentication key
+ * @akey_len: Authentication key length
+ * @enc: True, if this is an encode request
+ */
+struct sa_cmdl_cfg {
+       int aalg;
+       u8 enc_eng_id;
+       u8 auth_eng_id;
+       u8 iv_size;
+       const u8 *akey;
+       u16 akey_len;
+       bool enc;
+};
+
+/**
+ * struct algo_data - Crypto algorithm specific data
+ * @enc_eng: Encryption engine info structure
+ * @auth_eng: Authentication engine info structure
+ * @auth_ctrl: Authentication control word
+ * @hash_size: Size of digest
+ * @iv_idx: iv index in psdata
+ * @iv_out_size: iv out size
+ * @ealg_id: Encryption Algorithm ID
+ * @aalg_id: Authentication algorithm ID
+ * @mci_enc: Mode Control Instruction for Encryption algorithm
+ * @mci_dec: Mode Control Instruction for Decryption
+ * @inv_key: Whether the encryption algorithm demands key inversion
+ * @ctx: Pointer to the algorithm context
+ * @keyed_mac: Whether the authentication algorithm has key
+ * @prep_iopad: Function pointer to generate intermediate ipad/opad
+ */
+struct algo_data {
+       struct sa_eng_info enc_eng;
+       struct sa_eng_info auth_eng;
+       u8 auth_ctrl;
+       u8 hash_size;
+       u8 iv_idx;
+       u8 iv_out_size;
+       u8 ealg_id;
+       u8 aalg_id;
+       u8 *mci_enc;
+       u8 *mci_dec;
+       bool inv_key;
+       struct sa_tfm_ctx *ctx;
+       bool keyed_mac;
+       void (*prep_iopad)(struct algo_data *algo, const u8 *key,
+                          u16 key_sz, __be32 *ipad, __be32 *opad);
+};
+
+/**
+ * struct sa_alg_tmpl: A generic template encompassing crypto/aead algorithms
+ * @type: Type of the crypto algorithm.
+ * @alg: Union of crypto algorithm definitions.
+ * @registered: Flag indicating if the crypto algorithm is already registered
+ */
+struct sa_alg_tmpl {
+       u32 type;               /* CRYPTO_ALG_TYPE from <linux/crypto.h> */
+       union {
+               struct skcipher_alg skcipher;
+               struct ahash_alg ahash;
+               struct aead_alg aead;
+       } alg;
+       bool registered;
+};
+
+/**
+ * struct sa_rx_data: RX Packet miscellaneous data place holder
+ * @req: crypto request data pointer
+ * @ddev: pointer to the DMA device
+ * @tx_in: dma_async_tx_descriptor pointer for rx channel
+ * @split_src_sg: Set if the src sg is split and needs to be freed up
+ * @split_dst_sg: Set if the dst sg is split and needs to be freed up
+ * @enc: Flag indicating either encryption or decryption
+ * @enc_iv_size: Initialisation vector size
+ * @iv_idx: Initialisation vector index
+ * @rx_sg: Static scatterlist entry for overriding RX data
+ * @tx_sg: Static scatterlist entry for overriding TX data
+ * @src: Source data pointer
+ * @dst: Destination data pointer
+ */
+struct sa_rx_data {
+       void *req;
+       struct device *ddev;
+       struct dma_async_tx_descriptor *tx_in;
+       struct scatterlist *split_src_sg;
+       struct scatterlist *split_dst_sg;
+       u8 enc;
+       u8 enc_iv_size;
+       u8 iv_idx;
+       struct scatterlist rx_sg;
+       struct scatterlist tx_sg;
+       struct scatterlist *src;
+       struct scatterlist *dst;
+};
+
+/**
+ * struct sa_req: SA request definition
+ * @dev: device for the request
+ * @size: total data to the xmitted via DMA
+ * @enc_offset: offset of cipher data
+ * @enc_size: data to be passed to cipher engine
+ * @enc_iv: cipher IV
+ * @auth_offset: offset of the authentication data
+ * @auth_size: size of the authentication data
+ * @auth_iv: authentication IV
+ * @type: algorithm type for the request
+ * @cmdl: command label pointer
+ * @base: pointer to the base request
+ * @ctx: pointer to the algorithm context data
+ * @enc: true if this is an encode request
+ * @src: source data
+ * @dst: destination data
+ * @callback: DMA callback for the request
+ * @mdata_size: metadata size passed to DMA
+ */
+struct sa_req {
+       struct device *dev;
+       u16 size;
+       u8 enc_offset;
+       u16 enc_size;
+       u8 *enc_iv;
+       u8 auth_offset;
+       u16 auth_size;
+       u8 *auth_iv;
+       u32 type;
+       u32 *cmdl;
+       struct crypto_async_request *base;
+       struct sa_tfm_ctx *ctx;
+       bool enc;
+       struct scatterlist *src;
+       struct scatterlist *dst;
+       dma_async_tx_callback callback;
+       u16 mdata_size;
+};
+
+/*
+ * Mode Control Instructions for various Key lengths 128, 192, 256
+ * For CBC (Cipher Block Chaining) mode for encryption
+ */
+static u8 mci_cbc_enc_array[3][MODE_CONTROL_BYTES] = {
+       {       0x61, 0x00, 0x00, 0x18, 0x88, 0x0a, 0xaa, 0x4b, 0x7e, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x61, 0x00, 0x00, 0x18, 0x88, 0x4a, 0xaa, 0x4b, 0x7e, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x61, 0x00, 0x00, 0x18, 0x88, 0x8a, 0xaa, 0x4b, 0x7e, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+};
+
+/*
+ * Mode Control Instructions for various Key lengths 128, 192, 256
+ * For CBC (Cipher Block Chaining) mode for decryption
+ */
+static u8 mci_cbc_dec_array[3][MODE_CONTROL_BYTES] = {
+       {       0x71, 0x00, 0x00, 0x80, 0x8a, 0xca, 0x98, 0xf4, 0x40, 0xc0,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x71, 0x00, 0x00, 0x84, 0x8a, 0xca, 0x98, 0xf4, 0x40, 0xc0,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x71, 0x00, 0x00, 0x88, 0x8a, 0xca, 0x98, 0xf4, 0x40, 0xc0,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+};
+
+/*
+ * Mode Control Instructions for various Key lengths 128, 192, 256
+ * For CBC (Cipher Block Chaining) mode for encryption
+ */
+static u8 mci_cbc_enc_no_iv_array[3][MODE_CONTROL_BYTES] = {
+       {       0x21, 0x00, 0x00, 0x18, 0x88, 0x0a, 0xaa, 0x4b, 0x7e, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x21, 0x00, 0x00, 0x18, 0x88, 0x4a, 0xaa, 0x4b, 0x7e, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x21, 0x00, 0x00, 0x18, 0x88, 0x8a, 0xaa, 0x4b, 0x7e, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+};
+
+/*
+ * Mode Control Instructions for various Key lengths 128, 192, 256
+ * For CBC (Cipher Block Chaining) mode for decryption
+ */
+static u8 mci_cbc_dec_no_iv_array[3][MODE_CONTROL_BYTES] = {
+       {       0x31, 0x00, 0x00, 0x80, 0x8a, 0xca, 0x98, 0xf4, 0x40, 0xc0,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x31, 0x00, 0x00, 0x84, 0x8a, 0xca, 0x98, 0xf4, 0x40, 0xc0,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x31, 0x00, 0x00, 0x88, 0x8a, 0xca, 0x98, 0xf4, 0x40, 0xc0,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+};
+
+/*
+ * Mode Control Instructions for various Key lengths 128, 192, 256
+ * For ECB (Electronic Code Book) mode for encryption
+ */
+static u8 mci_ecb_enc_array[3][27] = {
+       {       0x21, 0x00, 0x00, 0x80, 0x8a, 0x04, 0xb7, 0x90, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x21, 0x00, 0x00, 0x84, 0x8a, 0x04, 0xb7, 0x90, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x21, 0x00, 0x00, 0x88, 0x8a, 0x04, 0xb7, 0x90, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+};
+
+/*
+ * Mode Control Instructions for various Key lengths 128, 192, 256
+ * For ECB (Electronic Code Book) mode for decryption
+ */
+static u8 mci_ecb_dec_array[3][27] = {
+       {       0x31, 0x00, 0x00, 0x80, 0x8a, 0x04, 0xb7, 0x90, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x31, 0x00, 0x00, 0x84, 0x8a, 0x04, 0xb7, 0x90, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+       {       0x31, 0x00, 0x00, 0x88, 0x8a, 0x04, 0xb7, 0x90, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00        },
+};
+
+/*
+ * Mode Control Instructions for DES algorithm
+ * For CBC (Cipher Block Chaining) mode and ECB mode
+ * encryption and for decryption respectively
+ */
+static u8 mci_cbc_3des_enc_array[MODE_CONTROL_BYTES] = {
+       0x60, 0x00, 0x00, 0x18, 0x88, 0x52, 0xaa, 0x4b, 0x7e, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00,
+};
+
+static u8 mci_cbc_3des_dec_array[MODE_CONTROL_BYTES] = {
+       0x70, 0x00, 0x00, 0x85, 0x0a, 0xca, 0x98, 0xf4, 0x40, 0xc0, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00,
+};
+
+static u8 mci_ecb_3des_enc_array[MODE_CONTROL_BYTES] = {
+       0x20, 0x00, 0x00, 0x85, 0x0a, 0x04, 0xb7, 0x90, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00,
+};
+
+static u8 mci_ecb_3des_dec_array[MODE_CONTROL_BYTES] = {
+       0x30, 0x00, 0x00, 0x85, 0x0a, 0x04, 0xb7, 0x90, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00,
+};
+
+/*
+ * Perform 16 byte or 128 bit swizzling
+ * The SA2UL Expects the security context to
+ * be in little Endian and the bus width is 128 bits or 16 bytes
+ * Hence swap 16 bytes at a time from higher to lower address
+ */
+static void sa_swiz_128(u8 *in, u16 len)
+{
+       u8 data[16];
+       int i, j;
+
+       for (i = 0; i < len; i += 16) {
+               memcpy(data, &in[i], 16);
+               for (j = 0; j < 16; j++)
+                       in[i + j] = data[15 - j];
+       }
+}
+
+/* Prepare the ipad and opad from key as per SHA algorithm step 1*/
+static void prepare_kiopad(u8 *k_ipad, u8 *k_opad, const u8 *key, u16 key_sz)
+{
+       int i;
+
+       for (i = 0; i < key_sz; i++) {
+               k_ipad[i] = key[i] ^ 0x36;
+               k_opad[i] = key[i] ^ 0x5c;
+       }
+
+       /* Instead of XOR with 0 */
+       for (; i < SHA1_BLOCK_SIZE; i++) {
+               k_ipad[i] = 0x36;
+               k_opad[i] = 0x5c;
+       }
+}
+
+static void sa_export_shash(struct shash_desc *hash, int block_size,
+                           int digest_size, __be32 *out)
+{
+       union {
+               struct sha1_state sha1;
+               struct sha256_state sha256;
+               struct sha512_state sha512;
+       } sha;
+       void *state;
+       u32 *result;
+       int i;
+
+       switch (digest_size) {
+       case SHA1_DIGEST_SIZE:
+               state = &sha.sha1;
+               result = sha.sha1.state;
+               break;
+       case SHA256_DIGEST_SIZE:
+               state = &sha.sha256;
+               result = sha.sha256.state;
+               break;
+       default:
+               dev_err(sa_k3_dev, "%s: bad digest_size=%d\n", __func__,
+                       digest_size);
+               return;
+       }
+
+       crypto_shash_export(hash, state);
+
+       for (i = 0; i < digest_size >> 2; i++)
+               out[i] = cpu_to_be32(result[i]);
+}
+
+static void sa_prepare_iopads(struct algo_data *data, const u8 *key,
+                             u16 key_sz, __be32 *ipad, __be32 *opad)
+{
+       SHASH_DESC_ON_STACK(shash, data->ctx->shash);
+       int block_size = crypto_shash_blocksize(data->ctx->shash);
+       int digest_size = crypto_shash_digestsize(data->ctx->shash);
+       u8 k_ipad[SHA1_BLOCK_SIZE];
+       u8 k_opad[SHA1_BLOCK_SIZE];
+
+       shash->tfm = data->ctx->shash;
+
+       prepare_kiopad(k_ipad, k_opad, key, key_sz);
+
+       memzero_explicit(ipad, block_size);
+       memzero_explicit(opad, block_size);
+
+       crypto_shash_init(shash);
+       crypto_shash_update(shash, k_ipad, block_size);
+       sa_export_shash(shash, block_size, digest_size, ipad);
+
+       crypto_shash_init(shash);
+       crypto_shash_update(shash, k_opad, block_size);
+
+       sa_export_shash(shash, block_size, digest_size, opad);
+}
+
+/* Derive the inverse key used in AES-CBC decryption operation */
+static inline int sa_aes_inv_key(u8 *inv_key, const u8 *key, u16 key_sz)
+{
+       struct crypto_aes_ctx ctx;
+       int key_pos;
+
+       if (aes_expandkey(&ctx, key, key_sz)) {
+               dev_err(sa_k3_dev, "%s: bad key len(%d)\n", __func__, key_sz);
+               return -EINVAL;
+       }
+
+       /* work around to get the right inverse for AES_KEYSIZE_192 size keys */
+       if (key_sz == AES_KEYSIZE_192) {
+               ctx.key_enc[52] = ctx.key_enc[51] ^ ctx.key_enc[46];
+               ctx.key_enc[53] = ctx.key_enc[52] ^ ctx.key_enc[47];
+       }
+
+       /* Based crypto_aes_expand_key logic */
+       switch (key_sz) {
+       case AES_KEYSIZE_128:
+       case AES_KEYSIZE_192:
+               key_pos = key_sz + 24;
+               break;
+
+       case AES_KEYSIZE_256:
+               key_pos = key_sz + 24 - 4;
+               break;
+
+       default:
+               dev_err(sa_k3_dev, "%s: bad key len(%d)\n", __func__, key_sz);
+               return -EINVAL;
+       }
+
+       memcpy(inv_key, &ctx.key_enc[key_pos], key_sz);
+       return 0;
+}
+
+/* Set Security context for the encryption engine */
+static int sa_set_sc_enc(struct algo_data *ad, const u8 *key, u16 key_sz,
+                        u8 enc, u8 *sc_buf)
+{
+       const u8 *mci = NULL;
+
+       /* Set Encryption mode selector to crypto processing */
+       sc_buf[0] = SA_CRYPTO_PROCESSING;
+
+       if (enc)
+               mci = ad->mci_enc;
+       else
+               mci = ad->mci_dec;
+       /* Set the mode control instructions in security context */
+       if (mci)
+               memcpy(&sc_buf[1], mci, MODE_CONTROL_BYTES);
+
+       /* For AES-CBC decryption get the inverse key */
+       if (ad->inv_key && !enc) {
+               if (sa_aes_inv_key(&sc_buf[SC_ENC_KEY_OFFSET], key, key_sz))
+                       return -EINVAL;
+       /* For all other cases: key is used */
+       } else {
+               memcpy(&sc_buf[SC_ENC_KEY_OFFSET], key, key_sz);
+       }
+
+       return 0;
+}
+
+/* Set Security context for the authentication engine */
+static void sa_set_sc_auth(struct algo_data *ad, const u8 *key, u16 key_sz,
+                          u8 *sc_buf)
+{
+       __be32 ipad[64], opad[64];
+
+       /* Set Authentication mode selector to hash processing */
+       sc_buf[0] = SA_HASH_PROCESSING;
+       /* Auth SW ctrl word: bit[6]=1 (upload computed hash to TLR section) */
+       sc_buf[1] = SA_UPLOAD_HASH_TO_TLR;
+       sc_buf[1] |= ad->auth_ctrl;
+
+       /* Copy the keys or ipad/opad */
+       if (ad->keyed_mac) {
+               ad->prep_iopad(ad, key, key_sz, ipad, opad);
+
+               /* Copy ipad to AuthKey */
+               memcpy(&sc_buf[32], ipad, ad->hash_size);
+               /* Copy opad to Aux-1 */
+               memcpy(&sc_buf[64], opad, ad->hash_size);
+       } else {
+               /* basic hash */
+               sc_buf[1] |= SA_BASIC_HASH;
+       }
+}
+
+static inline void sa_copy_iv(__be32 *out, const u8 *iv, bool size16)
+{
+       int j;
+
+       for (j = 0; j < ((size16) ? 4 : 2); j++) {
+               *out = cpu_to_be32(*((u32 *)iv));
+               iv += 4;
+               out++;
+       }
+}
+
+/* Format general command label */
+static int sa_format_cmdl_gen(struct sa_cmdl_cfg *cfg, u8 *cmdl,
+                             struct sa_cmdl_upd_info *upd_info)
+{
+       u8 enc_offset = 0, auth_offset = 0, total = 0;
+       u8 enc_next_eng = SA_ENG_ID_OUTPORT2;
+       u8 auth_next_eng = SA_ENG_ID_OUTPORT2;
+       u32 *word_ptr = (u32 *)cmdl;
+       int i;
+
+       /* Clear the command label */
+       memzero_explicit(cmdl, (SA_MAX_CMDL_WORDS * sizeof(u32)));
+
+       /* Iniialize the command update structure */
+       memzero_explicit(upd_info, sizeof(*upd_info));
+
+       if (cfg->enc_eng_id && cfg->auth_eng_id) {
+               if (cfg->enc) {
+                       auth_offset = SA_CMDL_HEADER_SIZE_BYTES;
+                       enc_next_eng = cfg->auth_eng_id;
+
+                       if (cfg->iv_size)
+                               auth_offset += cfg->iv_size;
+               } else {
+                       enc_offset = SA_CMDL_HEADER_SIZE_BYTES;
+                       auth_next_eng = cfg->enc_eng_id;
+               }
+       }
+
+       if (cfg->enc_eng_id) {
+               upd_info->flags |= SA_CMDL_UPD_ENC;
+               upd_info->enc_size.index = enc_offset >> 2;
+               upd_info->enc_offset.index = upd_info->enc_size.index + 1;
+               /* Encryption command label */
+               cmdl[enc_offset + SA_CMDL_OFFSET_NESC] = enc_next_eng;
+
+               /* Encryption modes requiring IV */
+               if (cfg->iv_size) {
+                       upd_info->flags |= SA_CMDL_UPD_ENC_IV;
+                       upd_info->enc_iv.index =
+                               (enc_offset + SA_CMDL_HEADER_SIZE_BYTES) >> 2;
+                       upd_info->enc_iv.size = cfg->iv_size;
+
+                       cmdl[enc_offset + SA_CMDL_OFFSET_LABEL_LEN] =
+                               SA_CMDL_HEADER_SIZE_BYTES + cfg->iv_size;
+
+                       cmdl[enc_offset + SA_CMDL_OFFSET_OPTION_CTRL1] =
+                               (SA_CTX_ENC_AUX2_OFFSET | (cfg->iv_size >> 3));
+                       total += SA_CMDL_HEADER_SIZE_BYTES + cfg->iv_size;
+               } else {
+                       cmdl[enc_offset + SA_CMDL_OFFSET_LABEL_LEN] =
+                                               SA_CMDL_HEADER_SIZE_BYTES;
+                       total += SA_CMDL_HEADER_SIZE_BYTES;
+               }
+       }
+
+       if (cfg->auth_eng_id) {
+               upd_info->flags |= SA_CMDL_UPD_AUTH;
+               upd_info->auth_size.index = auth_offset >> 2;
+               upd_info->auth_offset.index = upd_info->auth_size.index + 1;
+               cmdl[auth_offset + SA_CMDL_OFFSET_NESC] = auth_next_eng;
+               cmdl[auth_offset + SA_CMDL_OFFSET_LABEL_LEN] =
+                       SA_CMDL_HEADER_SIZE_BYTES;
+               total += SA_CMDL_HEADER_SIZE_BYTES;
+       }
+
+       total = roundup(total, 8);
+
+       for (i = 0; i < total / 4; i++)
+               word_ptr[i] = swab32(word_ptr[i]);
+
+       return total;
+}
+
+/* Update Command label */
+static inline void sa_update_cmdl(struct sa_req *req, u32 *cmdl,
+                                 struct sa_cmdl_upd_info *upd_info)
+{
+       int i = 0, j;
+
+       if (likely(upd_info->flags & SA_CMDL_UPD_ENC)) {
+               cmdl[upd_info->enc_size.index] &= ~SA_CMDL_PAYLOAD_LENGTH_MASK;
+               cmdl[upd_info->enc_size.index] |= req->enc_size;
+               cmdl[upd_info->enc_offset.index] &=
+                                               ~SA_CMDL_SOP_BYPASS_LEN_MASK;
+               cmdl[upd_info->enc_offset.index] |=
+                       ((u32)req->enc_offset <<
+                        __ffs(SA_CMDL_SOP_BYPASS_LEN_MASK));
+
+               if (likely(upd_info->flags & SA_CMDL_UPD_ENC_IV)) {
+                       __be32 *data = (__be32 *)&cmdl[upd_info->enc_iv.index];
+                       u32 *enc_iv = (u32 *)req->enc_iv;
+
+                       for (j = 0; i < upd_info->enc_iv.size; i += 4, j++) {
+                               data[j] = cpu_to_be32(*enc_iv);
+                               enc_iv++;
+                       }
+               }
+       }
+
+       if (likely(upd_info->flags & SA_CMDL_UPD_AUTH)) {
+               cmdl[upd_info->auth_size.index] &= ~SA_CMDL_PAYLOAD_LENGTH_MASK;
+               cmdl[upd_info->auth_size.index] |= req->auth_size;
+               cmdl[upd_info->auth_offset.index] &=
+                       ~SA_CMDL_SOP_BYPASS_LEN_MASK;
+               cmdl[upd_info->auth_offset.index] |=
+                       ((u32)req->auth_offset <<
+                        __ffs(SA_CMDL_SOP_BYPASS_LEN_MASK));
+               if (upd_info->flags & SA_CMDL_UPD_AUTH_IV) {
+                       sa_copy_iv((void *)&cmdl[upd_info->auth_iv.index],
+                                  req->auth_iv,
+                                  (upd_info->auth_iv.size > 8));
+               }
+               if (upd_info->flags & SA_CMDL_UPD_AUX_KEY) {
+                       int offset = (req->auth_size & 0xF) ? 4 : 0;
+
+                       memcpy(&cmdl[upd_info->aux_key_info.index],
+                              &upd_info->aux_key[offset], 16);
+               }
+       }
+}
+
+/* Format SWINFO words to be sent to SA */
+static
+void sa_set_swinfo(u8 eng_id, u16 sc_id, dma_addr_t sc_phys,
+                  u8 cmdl_present, u8 cmdl_offset, u8 flags,
+                  u8 hash_size, u32 *swinfo)
+{
+       swinfo[0] = sc_id;
+       swinfo[0] |= (flags << __ffs(SA_SW0_FLAGS_MASK));
+       if (likely(cmdl_present))
+               swinfo[0] |= ((cmdl_offset | SA_SW0_CMDL_PRESENT) <<
+                                               __ffs(SA_SW0_CMDL_INFO_MASK));
+       swinfo[0] |= (eng_id << __ffs(SA_SW0_ENG_ID_MASK));
+
+       swinfo[0] |= SA_SW0_DEST_INFO_PRESENT;
+       swinfo[1] = (u32)(sc_phys & 0xFFFFFFFFULL);
+       swinfo[2] = (u32)((sc_phys & 0xFFFFFFFF00000000ULL) >> 32);
+       swinfo[2] |= (hash_size << __ffs(SA_SW2_EGRESS_LENGTH));
+}
+
+/* Dump the security context */
+static void sa_dump_sc(u8 *buf, dma_addr_t dma_addr)
+{
+#ifdef DEBUG
+       dev_info(sa_k3_dev, "Security context dump:: 0x%pad\n", &dma_addr);
+       print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET,
+                      16, 1, buf, SA_CTX_MAX_SZ, false);
+#endif
+}
+
+static
+int sa_init_sc(struct sa_ctx_info *ctx, const u8 *enc_key,
+              u16 enc_key_sz, const u8 *auth_key, u16 auth_key_sz,
+              struct algo_data *ad, u8 enc, u32 *swinfo)
+{
+       int enc_sc_offset = 0;
+       int auth_sc_offset = 0;
+       u8 *sc_buf = ctx->sc;
+       u16 sc_id = ctx->sc_id;
+       u8 first_engine = 0;
+
+       memzero_explicit(sc_buf, SA_CTX_MAX_SZ);
+
+       if (ad->auth_eng.eng_id) {
+               if (enc)
+                       first_engine = ad->enc_eng.eng_id;
+               else
+                       first_engine = ad->auth_eng.eng_id;
+
+               enc_sc_offset = SA_CTX_PHP_PE_CTX_SZ;
+               auth_sc_offset = enc_sc_offset + ad->enc_eng.sc_size;
+               sc_buf[1] = SA_SCCTL_FE_AUTH_ENC;
+               if (!ad->hash_size)
+                       return -EINVAL;
+               ad->hash_size = roundup(ad->hash_size, 8);
+
+       } else if (ad->enc_eng.eng_id && !ad->auth_eng.eng_id) {
+               enc_sc_offset = SA_CTX_PHP_PE_CTX_SZ;
+               first_engine = ad->enc_eng.eng_id;
+               sc_buf[1] = SA_SCCTL_FE_ENC;
+               ad->hash_size = ad->iv_out_size;
+       }
+
+       /* SCCTL Owner info: 0=host, 1=CP_ACE */
+       sc_buf[SA_CTX_SCCTL_OWNER_OFFSET] = 0;
+       memcpy(&sc_buf[2], &sc_id, 2);
+       sc_buf[4] = 0x0;
+       sc_buf[5] = PRIV_ID;
+       sc_buf[6] = PRIV;
+       sc_buf[7] = 0x0;
+
+       /* Prepare context for encryption engine */
+       if (ad->enc_eng.sc_size) {
+               if (sa_set_sc_enc(ad, enc_key, enc_key_sz, enc,
+                                 &sc_buf[enc_sc_offset]))
+                       return -EINVAL;
+       }
+
+       /* Prepare context for authentication engine */
+       if (ad->auth_eng.sc_size)
+               sa_set_sc_auth(ad, auth_key, auth_key_sz,
+                              &sc_buf[auth_sc_offset]);
+
+       /* Set the ownership of context to CP_ACE */
+       sc_buf[SA_CTX_SCCTL_OWNER_OFFSET] = 0x80;
+
+       /* swizzle the security context */
+       sa_swiz_128(sc_buf, SA_CTX_MAX_SZ);
+
+       sa_set_swinfo(first_engine, ctx->sc_id, ctx->sc_phys, 1, 0,
+                     SA_SW_INFO_FLAG_EVICT, ad->hash_size, swinfo);
+
+       sa_dump_sc(sc_buf, ctx->sc_phys);
+
+       return 0;
+}
+
+/* Free the per direction context memory */
+static void sa_free_ctx_info(struct sa_ctx_info *ctx,
+                            struct sa_crypto_data *data)
+{
+       unsigned long bn;
+
+       bn = ctx->sc_id - data->sc_id_start;
+       spin_lock(&data->scid_lock);
+       __clear_bit(bn, data->ctx_bm);
+       data->sc_id--;
+       spin_unlock(&data->scid_lock);
+
+       if (ctx->sc) {
+               dma_pool_free(data->sc_pool, ctx->sc, ctx->sc_phys);
+               ctx->sc = NULL;
+       }
+}
+
+static int sa_init_ctx_info(struct sa_ctx_info *ctx,
+                           struct sa_crypto_data *data)
+{
+       unsigned long bn;
+       int err;
+
+       spin_lock(&data->scid_lock);
+       bn = find_first_zero_bit(data->ctx_bm, SA_MAX_NUM_CTX);
+       __set_bit(bn, data->ctx_bm);
+       data->sc_id++;
+       spin_unlock(&data->scid_lock);
+
+       ctx->sc_id = (u16)(data->sc_id_start + bn);
+
+       ctx->sc = dma_pool_alloc(data->sc_pool, GFP_KERNEL, &ctx->sc_phys);
+       if (!ctx->sc) {
+               dev_err(&data->pdev->dev, "Failed to allocate SC memory\n");
+               err = -ENOMEM;
+               goto scid_rollback;
+       }
+
+       return 0;
+
+scid_rollback:
+       spin_lock(&data->scid_lock);
+       __clear_bit(bn, data->ctx_bm);
+       data->sc_id--;
+       spin_unlock(&data->scid_lock);
+
+       return err;
+}
+
+static void sa_cipher_cra_exit(struct crypto_skcipher *tfm)
+{
+       struct sa_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev);
+
+       dev_dbg(sa_k3_dev, "%s(0x%p) sc-ids(0x%x(0x%pad), 0x%x(0x%pad))\n",
+               __func__, tfm, ctx->enc.sc_id, &ctx->enc.sc_phys,
+               ctx->dec.sc_id, &ctx->dec.sc_phys);
+
+       sa_free_ctx_info(&ctx->enc, data);
+       sa_free_ctx_info(&ctx->dec, data);
+
+       crypto_free_sync_skcipher(ctx->fallback.skcipher);
+}
+
+static int sa_cipher_cra_init(struct crypto_skcipher *tfm)
+{
+       struct sa_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev);
+       const char *name = crypto_tfm_alg_name(&tfm->base);
+       int ret;
+
+       memzero_explicit(ctx, sizeof(*ctx));
+       ctx->dev_data = data;
+
+       ret = sa_init_ctx_info(&ctx->enc, data);
+       if (ret)
+               return ret;
+       ret = sa_init_ctx_info(&ctx->dec, data);
+       if (ret) {
+               sa_free_ctx_info(&ctx->enc, data);
+               return ret;
+       }
+
+       ctx->fallback.skcipher =
+               crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+
+       if (IS_ERR(ctx->fallback.skcipher)) {
+               dev_err(sa_k3_dev, "Error allocating fallback algo %s\n", name);
+               return PTR_ERR(ctx->fallback.skcipher);
+       }
+
+       dev_dbg(sa_k3_dev, "%s(0x%p) sc-ids(0x%x(0x%pad), 0x%x(0x%pad))\n",
+               __func__, tfm, ctx->enc.sc_id, &ctx->enc.sc_phys,
+               ctx->dec.sc_id, &ctx->dec.sc_phys);
+       return 0;
+}
+
+static int sa_cipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                           unsigned int keylen, struct algo_data *ad)
+{
+       struct sa_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+       int cmdl_len;
+       struct sa_cmdl_cfg cfg;
+       int ret;
+
+       if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+           keylen != AES_KEYSIZE_256)
+               return -EINVAL;
+
+       ad->enc_eng.eng_id = SA_ENG_ID_EM1;
+       ad->enc_eng.sc_size = SA_CTX_ENC_TYPE1_SZ;
+
+       memzero_explicit(&cfg, sizeof(cfg));
+       cfg.enc_eng_id = ad->enc_eng.eng_id;
+       cfg.iv_size = crypto_skcipher_ivsize(tfm);
+
+       crypto_sync_skcipher_clear_flags(ctx->fallback.skcipher,
+                                        CRYPTO_TFM_REQ_MASK);
+       crypto_sync_skcipher_set_flags(ctx->fallback.skcipher,
+                                      tfm->base.crt_flags &
+                                      CRYPTO_TFM_REQ_MASK);
+       ret = crypto_sync_skcipher_setkey(ctx->fallback.skcipher, key, keylen);
+       if (ret)
+               return ret;
+
+       /* Setup Encryption Security Context & Command label template */
+       if (sa_init_sc(&ctx->enc, key, keylen, NULL, 0, ad, 1,
+                      &ctx->enc.epib[1]))
+               goto badkey;
+
+       cmdl_len = sa_format_cmdl_gen(&cfg,
+                                     (u8 *)ctx->enc.cmdl,
+                                     &ctx->enc.cmdl_upd_info);
+       if (cmdl_len <= 0 || (cmdl_len > SA_MAX_CMDL_WORDS * sizeof(u32)))
+               goto badkey;
+
+       ctx->enc.cmdl_size = cmdl_len;
+
+       /* Setup Decryption Security Context & Command label template */
+       if (sa_init_sc(&ctx->dec, key, keylen, NULL, 0, ad, 0,
+                      &ctx->dec.epib[1]))
+               goto badkey;
+
+       cfg.enc_eng_id = ad->enc_eng.eng_id;
+       cmdl_len = sa_format_cmdl_gen(&cfg, (u8 *)ctx->dec.cmdl,
+                                     &ctx->dec.cmdl_upd_info);
+
+       if (cmdl_len <= 0 || (cmdl_len > SA_MAX_CMDL_WORDS * sizeof(u32)))
+               goto badkey;
+
+       ctx->dec.cmdl_size = cmdl_len;
+       ctx->iv_idx = ad->iv_idx;
+
+       return 0;
+
+badkey:
+       dev_err(sa_k3_dev, "%s: badkey\n", __func__);
+       return -EINVAL;
+}
+
+static int sa_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                            unsigned int keylen)
+{
+       struct algo_data ad = { 0 };
+       /* Convert the key size (16/24/32) to the key size index (0/1/2) */
+       int key_idx = (keylen >> 3) - 2;
+
+       if (key_idx >= 3)
+               return -EINVAL;
+
+       ad.mci_enc = mci_cbc_enc_array[key_idx];
+       ad.mci_dec = mci_cbc_dec_array[key_idx];
+       ad.inv_key = true;
+       ad.ealg_id = SA_EALG_ID_AES_CBC;
+       ad.iv_idx = 4;
+       ad.iv_out_size = 16;
+
+       return sa_cipher_setkey(tfm, key, keylen, &ad);
+}
+
+static int sa_aes_ecb_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                            unsigned int keylen)
+{
+       struct algo_data ad = { 0 };
+       /* Convert the key size (16/24/32) to the key size index (0/1/2) */
+       int key_idx = (keylen >> 3) - 2;
+
+       if (key_idx >= 3)
+               return -EINVAL;
+
+       ad.mci_enc = mci_ecb_enc_array[key_idx];
+       ad.mci_dec = mci_ecb_dec_array[key_idx];
+       ad.inv_key = true;
+       ad.ealg_id = SA_EALG_ID_AES_ECB;
+
+       return sa_cipher_setkey(tfm, key, keylen, &ad);
+}
+
+static int sa_3des_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                             unsigned int keylen)
+{
+       struct algo_data ad = { 0 };
+
+       ad.mci_enc = mci_cbc_3des_enc_array;
+       ad.mci_dec = mci_cbc_3des_dec_array;
+       ad.ealg_id = SA_EALG_ID_3DES_CBC;
+       ad.iv_idx = 6;
+       ad.iv_out_size = 8;
+
+       return sa_cipher_setkey(tfm, key, keylen, &ad);
+}
+
+static int sa_3des_ecb_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                             unsigned int keylen)
+{
+       struct algo_data ad = { 0 };
+
+       ad.mci_enc = mci_ecb_3des_enc_array;
+       ad.mci_dec = mci_ecb_3des_dec_array;
+
+       return sa_cipher_setkey(tfm, key, keylen, &ad);
+}
+
+static void sa_aes_dma_in_callback(void *data)
+{
+       struct sa_rx_data *rxd = (struct sa_rx_data *)data;
+       struct skcipher_request *req;
+       int sglen;
+       u32 *result;
+       __be32 *mdptr;
+       size_t ml, pl;
+       int i;
+       enum dma_data_direction dir_src;
+       bool diff_dst;
+
+       req = container_of(rxd->req, struct skcipher_request, base);
+       sglen = sg_nents_for_len(req->src, req->cryptlen);
+
+       diff_dst = (req->src != req->dst) ? true : false;
+       dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+
+       if (req->iv) {
+               mdptr = (__be32 *)dmaengine_desc_get_metadata_ptr(rxd->tx_in, &pl,
+                                                              &ml);
+               result = (u32 *)req->iv;
+
+               for (i = 0; i < (rxd->enc_iv_size / 4); i++)
+                       result[i] = be32_to_cpu(mdptr[i + rxd->iv_idx]);
+       }
+
+       dma_unmap_sg(rxd->ddev, req->src, sglen, dir_src);
+       kfree(rxd->split_src_sg);
+
+       if (diff_dst) {
+               sglen = sg_nents_for_len(req->dst, req->cryptlen);
+
+               dma_unmap_sg(rxd->ddev, req->dst, sglen,
+                            DMA_FROM_DEVICE);
+               kfree(rxd->split_dst_sg);
+       }
+
+       kfree(rxd);
+
+       skcipher_request_complete(req, 0);
+}
+
+static void
+sa_prepare_tx_desc(u32 *mdptr, u32 pslen, u32 *psdata, u32 epiblen, u32 *epib)
+{
+       u32 *out, *in;
+       int i;
+
+       for (out = mdptr, in = epib, i = 0; i < epiblen / sizeof(u32); i++)
+               *out++ = *in++;
+
+       mdptr[4] = (0xFFFF << 16);
+       for (out = &mdptr[5], in = psdata, i = 0;
+            i < pslen / sizeof(u32); i++)
+               *out++ = *in++;
+}
+
+static int sa_run(struct sa_req *req)
+{
+       struct sa_rx_data *rxd;
+       gfp_t gfp_flags;
+       u32 cmdl[SA_MAX_CMDL_WORDS];
+       struct sa_crypto_data *pdata = dev_get_drvdata(sa_k3_dev);
+       struct device *ddev;
+       struct dma_chan *dma_rx;
+       int sg_nents, src_nents, dst_nents;
+       int mapped_src_nents, mapped_dst_nents;
+       struct scatterlist *src, *dst;
+       size_t pl, ml, split_size;
+       struct sa_ctx_info *sa_ctx = req->enc ? &req->ctx->enc : &req->ctx->dec;
+       int ret;
+       struct dma_async_tx_descriptor *tx_out;
+       u32 *mdptr;
+       bool diff_dst;
+       enum dma_data_direction dir_src;
+
+       gfp_flags = req->base->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+               GFP_KERNEL : GFP_ATOMIC;
+
+       rxd = kzalloc(sizeof(*rxd), gfp_flags);
+       if (!rxd)
+               return -ENOMEM;
+
+       if (req->src != req->dst) {
+               diff_dst = true;
+               dir_src = DMA_TO_DEVICE;
+       } else {
+               diff_dst = false;
+               dir_src = DMA_BIDIRECTIONAL;
+       }
+
+       /*
+        * SA2UL has an interesting feature where the receive DMA channel
+        * is selected based on the data passed to the engine. Within the
+        * transition range, there is also a space where it is impossible
+        * to determine where the data will end up, and this should be
+        * avoided. This will be handled by the SW fallback mechanism by
+        * the individual algorithm implementations.
+        */
+       if (req->size >= 256)
+               dma_rx = pdata->dma_rx2;
+       else
+               dma_rx = pdata->dma_rx1;
+
+       ddev = dma_rx->device->dev;
+
+       memcpy(cmdl, sa_ctx->cmdl, sa_ctx->cmdl_size);
+
+       sa_update_cmdl(req, cmdl, &sa_ctx->cmdl_upd_info);
+
+       if (req->type != CRYPTO_ALG_TYPE_AHASH) {
+               if (req->enc)
+                       req->type |=
+                               (SA_REQ_SUBTYPE_ENC << SA_REQ_SUBTYPE_SHIFT);
+               else
+                       req->type |=
+                               (SA_REQ_SUBTYPE_DEC << SA_REQ_SUBTYPE_SHIFT);
+       }
+
+       cmdl[sa_ctx->cmdl_size / sizeof(u32)] = req->type;
+
+       /*
+        * Map the packets, first we check if the data fits into a single
+        * sg entry and use that if possible. If it does not fit, we check
+        * if we need to do sg_split to align the scatterlist data on the
+        * actual data size being processed by the crypto engine.
+        */
+       src = req->src;
+       sg_nents = sg_nents_for_len(src, req->size);
+
+       split_size = req->size;
+
+       if (sg_nents == 1 && split_size <= req->src->length) {
+               src = &rxd->rx_sg;
+               sg_init_table(src, 1);
+               sg_set_page(src, sg_page(req->src), split_size,
+                           req->src->offset);
+               src_nents = 1;
+               dma_map_sg(ddev, src, sg_nents, dir_src);
+       } else {
+               mapped_src_nents = dma_map_sg(ddev, req->src, sg_nents,
+                                             dir_src);
+               ret = sg_split(req->src, mapped_src_nents, 0, 1, &split_size,
+                              &src, &src_nents, gfp_flags);
+               if (ret) {
+                       src_nents = sg_nents;
+                       src = req->src;
+               } else {
+                       rxd->split_src_sg = src;
+               }
+       }
+
+       if (!diff_dst) {
+               dst_nents = src_nents;
+               dst = src;
+       } else {
+               dst_nents = sg_nents_for_len(req->dst, req->size);
+
+               if (dst_nents == 1 && split_size <= req->dst->length) {
+                       dst = &rxd->tx_sg;
+                       sg_init_table(dst, 1);
+                       sg_set_page(dst, sg_page(req->dst), split_size,
+                                   req->dst->offset);
+                       dst_nents = 1;
+                       dma_map_sg(ddev, dst, dst_nents, DMA_FROM_DEVICE);
+               } else {
+                       mapped_dst_nents = dma_map_sg(ddev, req->dst, dst_nents,
+                                                     DMA_FROM_DEVICE);
+                       ret = sg_split(req->dst, mapped_dst_nents, 0, 1,
+                                      &split_size, &dst, &dst_nents,
+                                      gfp_flags);
+                       if (ret) {
+                               dst_nents = dst_nents;
+                               dst = req->dst;
+                       } else {
+                               rxd->split_dst_sg = dst;
+                       }
+               }
+       }
+
+       if (unlikely(src_nents != sg_nents)) {
+               dev_warn_ratelimited(sa_k3_dev, "failed to map tx pkt\n");
+               ret = -EIO;
+               goto err_cleanup;
+       }
+
+       rxd->tx_in = dmaengine_prep_slave_sg(dma_rx, dst, dst_nents,
+                                            DMA_DEV_TO_MEM,
+                                            DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       if (!rxd->tx_in) {
+               dev_err(pdata->dev, "IN prep_slave_sg() failed\n");
+               ret = -EINVAL;
+               goto err_cleanup;
+       }
+
+       rxd->req = (void *)req->base;
+       rxd->enc = req->enc;
+       rxd->ddev = ddev;
+       rxd->src = src;
+       rxd->dst = dst;
+       rxd->iv_idx = req->ctx->iv_idx;
+       rxd->enc_iv_size = sa_ctx->cmdl_upd_info.enc_iv.size;
+       rxd->tx_in->callback = req->callback;
+       rxd->tx_in->callback_param = rxd;
+
+       tx_out = dmaengine_prep_slave_sg(pdata->dma_tx, src,
+                                        src_nents, DMA_MEM_TO_DEV,
+                                        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+       if (!tx_out) {
+               dev_err(pdata->dev, "OUT prep_slave_sg() failed\n");
+               ret = -EINVAL;
+               goto err_cleanup;
+       }
+
+       /*
+        * Prepare metadata for DMA engine. This essentially describes the
+        * crypto algorithm to be used, data sizes, different keys etc.
+        */
+       mdptr = (u32 *)dmaengine_desc_get_metadata_ptr(tx_out, &pl, &ml);
+
+       sa_prepare_tx_desc(mdptr, (sa_ctx->cmdl_size + (SA_PSDATA_CTX_WORDS *
+                                  sizeof(u32))), cmdl, sizeof(sa_ctx->epib),
+                          sa_ctx->epib);
+
+       ml = sa_ctx->cmdl_size + (SA_PSDATA_CTX_WORDS * sizeof(u32));
+       dmaengine_desc_set_metadata_len(tx_out, req->mdata_size);
+
+       dmaengine_submit(tx_out);
+       dmaengine_submit(rxd->tx_in);
+
+       dma_async_issue_pending(dma_rx);
+       dma_async_issue_pending(pdata->dma_tx);
+
+       return -EINPROGRESS;
+
+err_cleanup:
+       dma_unmap_sg(ddev, req->src, sg_nents, DMA_TO_DEVICE);
+       kfree(rxd->split_src_sg);
+
+       if (req->src != req->dst) {
+               dst_nents = sg_nents_for_len(req->dst, req->size);
+               dma_unmap_sg(ddev, req->dst, dst_nents, DMA_FROM_DEVICE);
+               kfree(rxd->split_dst_sg);
+       }
+
+       kfree(rxd);
+
+       return ret;
+}
+
+static int sa_cipher_run(struct skcipher_request *req, u8 *iv, int enc)
+{
+       struct sa_tfm_ctx *ctx =
+           crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+       struct crypto_alg *alg = req->base.tfm->__crt_alg;
+       struct sa_req sa_req = { 0 };
+       int ret;
+
+       if (!req->cryptlen)
+               return 0;
+
+       if (req->cryptlen % alg->cra_blocksize)
+               return -EINVAL;
+
+       /* Use SW fallback if the data size is not supported */
+       if (req->cryptlen > SA_MAX_DATA_SZ ||
+           (req->cryptlen >= SA_UNSAFE_DATA_SZ_MIN &&
+            req->cryptlen <= SA_UNSAFE_DATA_SZ_MAX)) {
+               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback.skcipher);
+
+               skcipher_request_set_sync_tfm(subreq, ctx->fallback.skcipher);
+               skcipher_request_set_callback(subreq, req->base.flags,
+                                             NULL, NULL);
+               skcipher_request_set_crypt(subreq, req->src, req->dst,
+                                          req->cryptlen, req->iv);
+               if (enc)
+                       ret = crypto_skcipher_encrypt(subreq);
+               else
+                       ret = crypto_skcipher_decrypt(subreq);
+
+               skcipher_request_zero(subreq);
+               return ret;
+       }
+
+       sa_req.size = req->cryptlen;
+       sa_req.enc_size = req->cryptlen;
+       sa_req.src = req->src;
+       sa_req.dst = req->dst;
+       sa_req.enc_iv = iv;
+       sa_req.type = CRYPTO_ALG_TYPE_SKCIPHER;
+       sa_req.enc = enc;
+       sa_req.callback = sa_aes_dma_in_callback;
+       sa_req.mdata_size = 44;
+       sa_req.base = &req->base;
+       sa_req.ctx = ctx;
+
+       return sa_run(&sa_req);
+}
+
+static int sa_encrypt(struct skcipher_request *req)
+{
+       return sa_cipher_run(req, req->iv, 1);
+}
+
+static int sa_decrypt(struct skcipher_request *req)
+{
+       return sa_cipher_run(req, req->iv, 0);
+}
+
+static void sa_sha_dma_in_callback(void *data)
+{
+       struct sa_rx_data *rxd = (struct sa_rx_data *)data;
+       struct ahash_request *req;
+       struct crypto_ahash *tfm;
+       unsigned int authsize;
+       int i, sg_nents;
+       size_t ml, pl;
+       u32 *result;
+       __be32 *mdptr;
+
+       req = container_of(rxd->req, struct ahash_request, base);
+       tfm = crypto_ahash_reqtfm(req);
+       authsize = crypto_ahash_digestsize(tfm);
+
+       mdptr = (__be32 *)dmaengine_desc_get_metadata_ptr(rxd->tx_in, &pl, &ml);
+       result = (u32 *)req->result;
+
+       for (i = 0; i < (authsize / 4); i++)
+               result[i] = be32_to_cpu(mdptr[i + 4]);
+
+       sg_nents = sg_nents_for_len(req->src, req->nbytes);
+       dma_unmap_sg(rxd->ddev, req->src, sg_nents, DMA_FROM_DEVICE);
+
+       kfree(rxd->split_src_sg);
+
+       kfree(rxd);
+
+       ahash_request_complete(req, 0);
+}
+
+static int zero_message_process(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       int sa_digest_size = crypto_ahash_digestsize(tfm);
+
+       switch (sa_digest_size) {
+       case SHA1_DIGEST_SIZE:
+               memcpy(req->result, sha1_zero_message_hash, sa_digest_size);
+               break;
+       case SHA256_DIGEST_SIZE:
+               memcpy(req->result, sha256_zero_message_hash, sa_digest_size);
+               break;
+       case SHA512_DIGEST_SIZE:
+               memcpy(req->result, sha512_zero_message_hash, sa_digest_size);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int sa_sha_run(struct ahash_request *req)
+{
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct sa_req sa_req = { 0 };
+       size_t auth_len;
+
+       auth_len = req->nbytes;
+
+       if (!auth_len)
+               return zero_message_process(req);
+
+       if (auth_len > SA_MAX_DATA_SZ ||
+           (auth_len >= SA_UNSAFE_DATA_SZ_MIN &&
+            auth_len <= SA_UNSAFE_DATA_SZ_MAX)) {
+               struct ahash_request *subreq = &rctx->fallback_req;
+               int ret = 0;
+
+               ahash_request_set_tfm(subreq, ctx->fallback.ahash);
+               subreq->base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+               crypto_ahash_init(subreq);
+
+               subreq->nbytes = auth_len;
+               subreq->src = req->src;
+               subreq->result = req->result;
+
+               ret |= crypto_ahash_update(subreq);
+
+               subreq->nbytes = 0;
+
+               ret |= crypto_ahash_final(subreq);
+
+               return ret;
+       }
+
+       sa_req.size = auth_len;
+       sa_req.auth_size = auth_len;
+       sa_req.src = req->src;
+       sa_req.dst = req->src;
+       sa_req.enc = true;
+       sa_req.type = CRYPTO_ALG_TYPE_AHASH;
+       sa_req.callback = sa_sha_dma_in_callback;
+       sa_req.mdata_size = 28;
+       sa_req.ctx = ctx;
+       sa_req.base = &req->base;
+
+       return sa_run(&sa_req);
+}
+
+static int sa_sha_setup(struct sa_tfm_ctx *ctx, struct  algo_data *ad)
+{
+       int bs = crypto_shash_blocksize(ctx->shash);
+       int cmdl_len;
+       struct sa_cmdl_cfg cfg;
+
+       ad->enc_eng.sc_size = SA_CTX_ENC_TYPE1_SZ;
+       ad->auth_eng.eng_id = SA_ENG_ID_AM1;
+       ad->auth_eng.sc_size = SA_CTX_AUTH_TYPE2_SZ;
+
+       memset(ctx->authkey, 0, bs);
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.aalg = ad->aalg_id;
+       cfg.enc_eng_id = ad->enc_eng.eng_id;
+       cfg.auth_eng_id = ad->auth_eng.eng_id;
+       cfg.iv_size = 0;
+       cfg.akey = NULL;
+       cfg.akey_len = 0;
+
+       /* Setup Encryption Security Context & Command label template */
+       if (sa_init_sc(&ctx->enc, NULL, 0, NULL, 0, ad, 0,
+                      &ctx->enc.epib[1]))
+               goto badkey;
+
+       cmdl_len = sa_format_cmdl_gen(&cfg,
+                                     (u8 *)ctx->enc.cmdl,
+                                     &ctx->enc.cmdl_upd_info);
+       if (cmdl_len <= 0 || (cmdl_len > SA_MAX_CMDL_WORDS * sizeof(u32)))
+               goto badkey;
+
+       ctx->enc.cmdl_size = cmdl_len;
+
+       return 0;
+
+badkey:
+       dev_err(sa_k3_dev, "%s: badkey\n", __func__);
+       return -EINVAL;
+}
+
+static int sa_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+       struct sa_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev);
+       int ret;
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->dev_data = data;
+       ret = sa_init_ctx_info(&ctx->enc, data);
+       if (ret)
+               return ret;
+
+       if (alg_base) {
+               ctx->shash = crypto_alloc_shash(alg_base, 0,
+                                               CRYPTO_ALG_NEED_FALLBACK);
+               if (IS_ERR(ctx->shash)) {
+                       dev_err(sa_k3_dev, "base driver %s couldn't be loaded\n",
+                               alg_base);
+                       return PTR_ERR(ctx->shash);
+               }
+               /* for fallback */
+               ctx->fallback.ahash =
+                       crypto_alloc_ahash(alg_base, 0,
+                                          CRYPTO_ALG_NEED_FALLBACK);
+               if (IS_ERR(ctx->fallback.ahash)) {
+                       dev_err(ctx->dev_data->dev,
+                               "Could not load fallback driver\n");
+                       return PTR_ERR(ctx->fallback.ahash);
+               }
+       }
+
+       dev_dbg(sa_k3_dev, "%s(0x%p) sc-ids(0x%x(0x%pad), 0x%x(0x%pad))\n",
+               __func__, tfm, ctx->enc.sc_id, &ctx->enc.sc_phys,
+               ctx->dec.sc_id, &ctx->dec.sc_phys);
+
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct sa_sha_req_ctx) +
+                                crypto_ahash_reqsize(ctx->fallback.ahash));
+
+       return 0;
+}
+
+static int sa_sha_digest(struct ahash_request *req)
+{
+       return sa_sha_run(req);
+}
+
+static int sa_sha_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       dev_dbg(sa_k3_dev, "init: digest size: %d, rctx=%llx\n",
+               crypto_ahash_digestsize(tfm), (u64)rctx);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
+       rctx->fallback_req.base.flags =
+               req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_ahash_init(&rctx->fallback_req);
+}
+
+static int sa_sha_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
+       rctx->fallback_req.base.flags =
+               req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+       rctx->fallback_req.nbytes = req->nbytes;
+       rctx->fallback_req.src = req->src;
+
+       return crypto_ahash_update(&rctx->fallback_req);
+}
+
+static int sa_sha_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
+       rctx->fallback_req.base.flags =
+               req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+       rctx->fallback_req.result = req->result;
+
+       return crypto_ahash_final(&rctx->fallback_req);
+}
+
+static int sa_sha_finup(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
+       rctx->fallback_req.base.flags =
+               req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       rctx->fallback_req.nbytes = req->nbytes;
+       rctx->fallback_req.src = req->src;
+       rctx->fallback_req.result = req->result;
+
+       return crypto_ahash_finup(&rctx->fallback_req);
+}
+
+static int sa_sha_import(struct ahash_request *req, const void *in)
+{
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback.ahash);
+       rctx->fallback_req.base.flags = req->base.flags &
+               CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_ahash_import(&rctx->fallback_req, in);
+}
+
+static int sa_sha_export(struct ahash_request *req, void *out)
+{
+       struct sa_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct sa_tfm_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct ahash_request *subreq = &rctx->fallback_req;
+
+       ahash_request_set_tfm(subreq, ctx->fallback.ahash);
+       subreq->base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_ahash_export(subreq, out);
+}
+
+static int sa_sha1_cra_init(struct crypto_tfm *tfm)
+{
+       struct algo_data ad = { 0 };
+       struct sa_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       sa_sha_cra_init_alg(tfm, "sha1");
+
+       ad.aalg_id = SA_AALG_ID_SHA1;
+       ad.hash_size = SHA1_DIGEST_SIZE;
+       ad.auth_ctrl = SA_AUTH_SW_CTRL_SHA1;
+
+       sa_sha_setup(ctx, &ad);
+
+       return 0;
+}
+
+static int sa_sha256_cra_init(struct crypto_tfm *tfm)
+{
+       struct algo_data ad = { 0 };
+       struct sa_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       sa_sha_cra_init_alg(tfm, "sha256");
+
+       ad.aalg_id = SA_AALG_ID_SHA2_256;
+       ad.hash_size = SHA256_DIGEST_SIZE;
+       ad.auth_ctrl = SA_AUTH_SW_CTRL_SHA256;
+
+       sa_sha_setup(ctx, &ad);
+
+       return 0;
+}
+
+static int sa_sha512_cra_init(struct crypto_tfm *tfm)
+{
+       struct algo_data ad = { 0 };
+       struct sa_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       sa_sha_cra_init_alg(tfm, "sha512");
+
+       ad.aalg_id = SA_AALG_ID_SHA2_512;
+       ad.hash_size = SHA512_DIGEST_SIZE;
+       ad.auth_ctrl = SA_AUTH_SW_CTRL_SHA512;
+
+       sa_sha_setup(ctx, &ad);
+
+       return 0;
+}
+
+static void sa_sha_cra_exit(struct crypto_tfm *tfm)
+{
+       struct sa_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev);
+
+       dev_dbg(sa_k3_dev, "%s(0x%p) sc-ids(0x%x(0x%pad), 0x%x(0x%pad))\n",
+               __func__, tfm, ctx->enc.sc_id, &ctx->enc.sc_phys,
+               ctx->dec.sc_id, &ctx->dec.sc_phys);
+
+       if (crypto_tfm_alg_type(tfm) == CRYPTO_ALG_TYPE_AHASH)
+               sa_free_ctx_info(&ctx->enc, data);
+
+       crypto_free_shash(ctx->shash);
+       crypto_free_ahash(ctx->fallback.ahash);
+}
+
+static void sa_aead_dma_in_callback(void *data)
+{
+       struct sa_rx_data *rxd = (struct sa_rx_data *)data;
+       struct aead_request *req;
+       struct crypto_aead *tfm;
+       unsigned int start;
+       unsigned int authsize;
+       u8 auth_tag[SA_MAX_AUTH_TAG_SZ];
+       size_t pl, ml;
+       int i, sglen;
+       int err = 0;
+       u16 auth_len;
+       u32 *mdptr;
+       bool diff_dst;
+       enum dma_data_direction dir_src;
+
+       req = container_of(rxd->req, struct aead_request, base);
+       tfm = crypto_aead_reqtfm(req);
+       start = req->assoclen + req->cryptlen;
+       authsize = crypto_aead_authsize(tfm);
+
+       diff_dst = (req->src != req->dst) ? true : false;
+       dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+
+       mdptr = (u32 *)dmaengine_desc_get_metadata_ptr(rxd->tx_in, &pl, &ml);
+       for (i = 0; i < (authsize / 4); i++)
+               mdptr[i + 4] = swab32(mdptr[i + 4]);
+
+       auth_len = req->assoclen + req->cryptlen;
+       if (!rxd->enc)
+               auth_len -= authsize;
+
+       sglen =  sg_nents_for_len(rxd->src, auth_len);
+       dma_unmap_sg(rxd->ddev, rxd->src, sglen, dir_src);
+       kfree(rxd->split_src_sg);
+
+       if (diff_dst) {
+               sglen = sg_nents_for_len(rxd->dst, auth_len);
+               dma_unmap_sg(rxd->ddev, rxd->dst, sglen, DMA_FROM_DEVICE);
+               kfree(rxd->split_dst_sg);
+       }
+
+       if (rxd->enc) {
+               scatterwalk_map_and_copy(&mdptr[4], req->dst, start, authsize,
+                                        1);
+       } else {
+               start -= authsize;
+               scatterwalk_map_and_copy(auth_tag, req->src, start, authsize,
+                                        0);
+
+               err = memcmp(&mdptr[4], auth_tag, authsize) ? -EBADMSG : 0;
+       }
+
+       kfree(rxd);
+
+       aead_request_complete(req, err);
+}
+
+static int sa_cra_init_aead(struct crypto_aead *tfm, const char *hash,
+                           const char *fallback)
+{
+       struct sa_tfm_ctx *ctx = crypto_aead_ctx(tfm);
+       struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev);
+       int ret;
+
+       memzero_explicit(ctx, sizeof(*ctx));
+
+       ctx->shash = crypto_alloc_shash(hash, 0, CRYPTO_ALG_NEED_FALLBACK);
+       if (IS_ERR(ctx->shash)) {
+               dev_err(sa_k3_dev, "base driver %s couldn't be loaded\n", hash);
+               return PTR_ERR(ctx->shash);
+       }
+
+       ctx->fallback.aead = crypto_alloc_aead(fallback, 0,
+                                              CRYPTO_ALG_NEED_FALLBACK);
+
+       if (IS_ERR(ctx->fallback.aead)) {
+               dev_err(sa_k3_dev, "fallback driver %s couldn't be loaded\n",
+                       fallback);
+               return PTR_ERR(ctx->fallback.aead);
+       }
+
+       crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) +
+                               crypto_aead_reqsize(ctx->fallback.aead));
+
+       ret = sa_init_ctx_info(&ctx->enc, data);
+       if (ret)
+               return ret;
+
+       ret = sa_init_ctx_info(&ctx->dec, data);
+       if (ret) {
+               sa_free_ctx_info(&ctx->enc, data);
+               return ret;
+       }
+
+       dev_dbg(sa_k3_dev, "%s(0x%p) sc-ids(0x%x(0x%pad), 0x%x(0x%pad))\n",
+               __func__, tfm, ctx->enc.sc_id, &ctx->enc.sc_phys,
+               ctx->dec.sc_id, &ctx->dec.sc_phys);
+
+       return ret;
+}
+
+static int sa_cra_init_aead_sha1(struct crypto_aead *tfm)
+{
+       return sa_cra_init_aead(tfm, "sha1",
+                               "authenc(hmac(sha1-ce),cbc(aes-ce))");
+}
+
+static int sa_cra_init_aead_sha256(struct crypto_aead *tfm)
+{
+       return sa_cra_init_aead(tfm, "sha256",
+                               "authenc(hmac(sha256-ce),cbc(aes-ce))");
+}
+
+static void sa_exit_tfm_aead(struct crypto_aead *tfm)
+{
+       struct sa_tfm_ctx *ctx = crypto_aead_ctx(tfm);
+       struct sa_crypto_data *data = dev_get_drvdata(sa_k3_dev);
+
+       crypto_free_shash(ctx->shash);
+       crypto_free_aead(ctx->fallback.aead);
+
+       sa_free_ctx_info(&ctx->enc, data);
+       sa_free_ctx_info(&ctx->dec, data);
+}
+
+/* AEAD algorithm configuration interface function */
+static int sa_aead_setkey(struct crypto_aead *authenc,
+                         const u8 *key, unsigned int keylen,
+                         struct algo_data *ad)
+{
+       struct sa_tfm_ctx *ctx = crypto_aead_ctx(authenc);
+       struct crypto_authenc_keys keys;
+       int cmdl_len;
+       struct sa_cmdl_cfg cfg;
+       int key_idx;
+
+       if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
+               return -EINVAL;
+
+       /* Convert the key size (16/24/32) to the key size index (0/1/2) */
+       key_idx = (keys.enckeylen >> 3) - 2;
+       if (key_idx >= 3)
+               return -EINVAL;
+
+       ad->ctx = ctx;
+       ad->enc_eng.eng_id = SA_ENG_ID_EM1;
+       ad->enc_eng.sc_size = SA_CTX_ENC_TYPE1_SZ;
+       ad->auth_eng.eng_id = SA_ENG_ID_AM1;
+       ad->auth_eng.sc_size = SA_CTX_AUTH_TYPE2_SZ;
+       ad->mci_enc = mci_cbc_enc_no_iv_array[key_idx];
+       ad->mci_dec = mci_cbc_dec_no_iv_array[key_idx];
+       ad->inv_key = true;
+       ad->keyed_mac = true;
+       ad->ealg_id = SA_EALG_ID_AES_CBC;
+       ad->prep_iopad = sa_prepare_iopads;
+
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.enc = true;
+       cfg.aalg = ad->aalg_id;
+       cfg.enc_eng_id = ad->enc_eng.eng_id;
+       cfg.auth_eng_id = ad->auth_eng.eng_id;
+       cfg.iv_size = crypto_aead_ivsize(authenc);
+       cfg.akey = keys.authkey;
+       cfg.akey_len = keys.authkeylen;
+
+       /* Setup Encryption Security Context & Command label template */
+       if (sa_init_sc(&ctx->enc, keys.enckey, keys.enckeylen,
+                      keys.authkey, keys.authkeylen,
+                      ad, 1, &ctx->enc.epib[1]))
+               return -EINVAL;
+
+       cmdl_len = sa_format_cmdl_gen(&cfg,
+                                     (u8 *)ctx->enc.cmdl,
+                                     &ctx->enc.cmdl_upd_info);
+       if (cmdl_len <= 0 || (cmdl_len > SA_MAX_CMDL_WORDS * sizeof(u32)))
+               return -EINVAL;
+
+       ctx->enc.cmdl_size = cmdl_len;
+
+       /* Setup Decryption Security Context & Command label template */
+       if (sa_init_sc(&ctx->dec, keys.enckey, keys.enckeylen,
+                      keys.authkey, keys.authkeylen,
+                      ad, 0, &ctx->dec.epib[1]))
+               return -EINVAL;
+
+       cfg.enc = false;
+       cmdl_len = sa_format_cmdl_gen(&cfg, (u8 *)ctx->dec.cmdl,
+                                     &ctx->dec.cmdl_upd_info);
+
+       if (cmdl_len <= 0 || (cmdl_len > SA_MAX_CMDL_WORDS * sizeof(u32)))
+               return -EINVAL;
+
+       ctx->dec.cmdl_size = cmdl_len;
+
+       crypto_aead_clear_flags(ctx->fallback.aead, CRYPTO_TFM_REQ_MASK);
+       crypto_aead_set_flags(ctx->fallback.aead,
+                             crypto_aead_get_flags(authenc) &
+                             CRYPTO_TFM_REQ_MASK);
+       crypto_aead_setkey(ctx->fallback.aead, key, keylen);
+
+       return 0;
+}
+
+static int sa_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+       struct sa_tfm_ctx *ctx = crypto_tfm_ctx(crypto_aead_tfm(tfm));
+
+       return crypto_aead_setauthsize(ctx->fallback.aead, authsize);
+}
+
+static int sa_aead_cbc_sha1_setkey(struct crypto_aead *authenc,
+                                  const u8 *key, unsigned int keylen)
+{
+       struct algo_data ad = { 0 };
+
+       ad.ealg_id = SA_EALG_ID_AES_CBC;
+       ad.aalg_id = SA_AALG_ID_HMAC_SHA1;
+       ad.hash_size = SHA1_DIGEST_SIZE;
+       ad.auth_ctrl = SA_AUTH_SW_CTRL_SHA1;
+
+       return sa_aead_setkey(authenc, key, keylen, &ad);
+}
+
+static int sa_aead_cbc_sha256_setkey(struct crypto_aead *authenc,
+                                    const u8 *key, unsigned int keylen)
+{
+       struct algo_data ad = { 0 };
+
+       ad.ealg_id = SA_EALG_ID_AES_CBC;
+       ad.aalg_id = SA_AALG_ID_HMAC_SHA2_256;
+       ad.hash_size = SHA256_DIGEST_SIZE;
+       ad.auth_ctrl = SA_AUTH_SW_CTRL_SHA256;
+
+       return sa_aead_setkey(authenc, key, keylen, &ad);
+}
+
+static int sa_aead_run(struct aead_request *req, u8 *iv, int enc)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct sa_tfm_ctx *ctx = crypto_aead_ctx(tfm);
+       struct sa_req sa_req = { 0 };
+       size_t auth_size, enc_size;
+
+       enc_size = req->cryptlen;
+       auth_size = req->assoclen + req->cryptlen;
+
+       if (!enc) {
+               enc_size -= crypto_aead_authsize(tfm);
+               auth_size -= crypto_aead_authsize(tfm);
+       }
+
+       if (auth_size > SA_MAX_DATA_SZ ||
+           (auth_size >= SA_UNSAFE_DATA_SZ_MIN &&
+            auth_size <= SA_UNSAFE_DATA_SZ_MAX)) {
+               struct aead_request *subreq = aead_request_ctx(req);
+               int ret;
+
+               aead_request_set_tfm(subreq, ctx->fallback.aead);
+               aead_request_set_callback(subreq, req->base.flags,
+                                         req->base.complete, req->base.data);
+               aead_request_set_crypt(subreq, req->src, req->dst,
+                                      req->cryptlen, req->iv);
+               aead_request_set_ad(subreq, req->assoclen);
+
+               ret = enc ? crypto_aead_encrypt(subreq) :
+                       crypto_aead_decrypt(subreq);
+               return ret;
+       }
+
+       sa_req.enc_offset = req->assoclen;
+       sa_req.enc_size = enc_size;
+       sa_req.auth_size = auth_size;
+       sa_req.size = auth_size;
+       sa_req.enc_iv = iv;
+       sa_req.type = CRYPTO_ALG_TYPE_AEAD;
+       sa_req.enc = enc;
+       sa_req.callback = sa_aead_dma_in_callback;
+       sa_req.mdata_size = 52;
+       sa_req.base = &req->base;
+       sa_req.ctx = ctx;
+       sa_req.src = req->src;
+       sa_req.dst = req->dst;
+
+       return sa_run(&sa_req);
+}
+
+/* AEAD algorithm encrypt interface function */
+static int sa_aead_encrypt(struct aead_request *req)
+{
+       return sa_aead_run(req, req->iv, 1);
+}
+
+/* AEAD algorithm decrypt interface function */
+static int sa_aead_decrypt(struct aead_request *req)
+{
+       return sa_aead_run(req, req->iv, 0);
+}
+
+static struct sa_alg_tmpl sa_algs[] = {
+       {
+               .type = CRYPTO_ALG_TYPE_SKCIPHER,
+               .alg.skcipher = {
+                       .base.cra_name          = "cbc(aes)",
+                       .base.cra_driver_name   = "cbc-aes-sa2ul",
+                       .base.cra_priority      = 30000,
+                       .base.cra_flags         = CRYPTO_ALG_TYPE_SKCIPHER |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                       .base.cra_blocksize     = AES_BLOCK_SIZE,
+                       .base.cra_ctxsize       = sizeof(struct sa_tfm_ctx),
+                       .base.cra_module        = THIS_MODULE,
+                       .init                   = sa_cipher_cra_init,
+                       .exit                   = sa_cipher_cra_exit,
+                       .min_keysize            = AES_MIN_KEY_SIZE,
+                       .max_keysize            = AES_MAX_KEY_SIZE,
+                       .ivsize                 = AES_BLOCK_SIZE,
+                       .setkey                 = sa_aes_cbc_setkey,
+                       .encrypt                = sa_encrypt,
+                       .decrypt                = sa_decrypt,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_SKCIPHER,
+               .alg.skcipher = {
+                       .base.cra_name          = "ecb(aes)",
+                       .base.cra_driver_name   = "ecb-aes-sa2ul",
+                       .base.cra_priority      = 30000,
+                       .base.cra_flags         = CRYPTO_ALG_TYPE_SKCIPHER |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                       .base.cra_blocksize     = AES_BLOCK_SIZE,
+                       .base.cra_ctxsize       = sizeof(struct sa_tfm_ctx),
+                       .base.cra_module        = THIS_MODULE,
+                       .init                   = sa_cipher_cra_init,
+                       .exit                   = sa_cipher_cra_exit,
+                       .min_keysize            = AES_MIN_KEY_SIZE,
+                       .max_keysize            = AES_MAX_KEY_SIZE,
+                       .setkey                 = sa_aes_ecb_setkey,
+                       .encrypt                = sa_encrypt,
+                       .decrypt                = sa_decrypt,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_SKCIPHER,
+               .alg.skcipher = {
+                       .base.cra_name          = "cbc(des3_ede)",
+                       .base.cra_driver_name   = "cbc-des3-sa2ul",
+                       .base.cra_priority      = 30000,
+                       .base.cra_flags         = CRYPTO_ALG_TYPE_SKCIPHER |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                       .base.cra_blocksize     = DES_BLOCK_SIZE,
+                       .base.cra_ctxsize       = sizeof(struct sa_tfm_ctx),
+                       .base.cra_module        = THIS_MODULE,
+                       .init                   = sa_cipher_cra_init,
+                       .exit                   = sa_cipher_cra_exit,
+                       .min_keysize            = 3 * DES_KEY_SIZE,
+                       .max_keysize            = 3 * DES_KEY_SIZE,
+                       .ivsize                 = DES_BLOCK_SIZE,
+                       .setkey                 = sa_3des_cbc_setkey,
+                       .encrypt                = sa_encrypt,
+                       .decrypt                = sa_decrypt,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_SKCIPHER,
+               .alg.skcipher = {
+                       .base.cra_name          = "ecb(des3_ede)",
+                       .base.cra_driver_name   = "ecb-des3-sa2ul",
+                       .base.cra_priority      = 30000,
+                       .base.cra_flags         = CRYPTO_ALG_TYPE_SKCIPHER |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                       .base.cra_blocksize     = DES_BLOCK_SIZE,
+                       .base.cra_ctxsize       = sizeof(struct sa_tfm_ctx),
+                       .base.cra_module        = THIS_MODULE,
+                       .init                   = sa_cipher_cra_init,
+                       .exit                   = sa_cipher_cra_exit,
+                       .min_keysize            = 3 * DES_KEY_SIZE,
+                       .max_keysize            = 3 * DES_KEY_SIZE,
+                       .setkey                 = sa_3des_ecb_setkey,
+                       .encrypt                = sa_encrypt,
+                       .decrypt                = sa_decrypt,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.ahash = {
+                       .halg.base = {
+                               .cra_name       = "sha1",
+                               .cra_driver_name        = "sha1-sa2ul",
+                               .cra_priority   = 400,
+                               .cra_flags      = CRYPTO_ALG_TYPE_AHASH |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                               .cra_blocksize  = SHA1_BLOCK_SIZE,
+                               .cra_ctxsize    = sizeof(struct sa_tfm_ctx),
+                               .cra_module     = THIS_MODULE,
+                               .cra_init       = sa_sha1_cra_init,
+                               .cra_exit       = sa_sha_cra_exit,
+                       },
+                       .halg.digestsize        = SHA1_DIGEST_SIZE,
+                       .halg.statesize         = sizeof(struct sa_sha_req_ctx) +
+                                                 sizeof(struct sha1_state),
+                       .init                   = sa_sha_init,
+                       .update                 = sa_sha_update,
+                       .final                  = sa_sha_final,
+                       .finup                  = sa_sha_finup,
+                       .digest                 = sa_sha_digest,
+                       .export                 = sa_sha_export,
+                       .import                 = sa_sha_import,
+               },
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.ahash = {
+                       .halg.base = {
+                               .cra_name       = "sha256",
+                               .cra_driver_name        = "sha256-sa2ul",
+                               .cra_priority   = 400,
+                               .cra_flags      = CRYPTO_ALG_TYPE_AHASH |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                               .cra_blocksize  = SHA256_BLOCK_SIZE,
+                               .cra_ctxsize    = sizeof(struct sa_tfm_ctx),
+                               .cra_module     = THIS_MODULE,
+                               .cra_init       = sa_sha256_cra_init,
+                               .cra_exit       = sa_sha_cra_exit,
+                       },
+                       .halg.digestsize        = SHA256_DIGEST_SIZE,
+                       .halg.statesize         = sizeof(struct sa_sha_req_ctx) +
+                                                 sizeof(struct sha256_state),
+                       .init                   = sa_sha_init,
+                       .update                 = sa_sha_update,
+                       .final                  = sa_sha_final,
+                       .finup                  = sa_sha_finup,
+                       .digest                 = sa_sha_digest,
+                       .export                 = sa_sha_export,
+                       .import                 = sa_sha_import,
+               },
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.ahash = {
+                       .halg.base = {
+                               .cra_name       = "sha512",
+                               .cra_driver_name        = "sha512-sa2ul",
+                               .cra_priority   = 400,
+                               .cra_flags      = CRYPTO_ALG_TYPE_AHASH |
+                                                 CRYPTO_ALG_ASYNC |
+                                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                 CRYPTO_ALG_NEED_FALLBACK,
+                               .cra_blocksize  = SHA512_BLOCK_SIZE,
+                               .cra_ctxsize    = sizeof(struct sa_tfm_ctx),
+                               .cra_module     = THIS_MODULE,
+                               .cra_init       = sa_sha512_cra_init,
+                               .cra_exit       = sa_sha_cra_exit,
+                       },
+                       .halg.digestsize        = SHA512_DIGEST_SIZE,
+                       .halg.statesize         = sizeof(struct sa_sha_req_ctx) +
+                                                 sizeof(struct sha512_state),
+                       .init                   = sa_sha_init,
+                       .update                 = sa_sha_update,
+                       .final                  = sa_sha_final,
+                       .finup                  = sa_sha_finup,
+                       .digest                 = sa_sha_digest,
+                       .export                 = sa_sha_export,
+                       .import                 = sa_sha_import,
+               },
+       },
+       {
+               .type   = CRYPTO_ALG_TYPE_AEAD,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(hmac(sha1),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc(hmac(sha1),cbc(aes))-sa2ul",
+                               .cra_blocksize = AES_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AEAD |
+                                       CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                       CRYPTO_ALG_ASYNC |
+                                       CRYPTO_ALG_NEED_FALLBACK,
+                               .cra_ctxsize = sizeof(struct sa_tfm_ctx),
+                               .cra_module = THIS_MODULE,
+                               .cra_priority = 3000,
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize = SHA1_DIGEST_SIZE,
+
+                       .init = sa_cra_init_aead_sha1,
+                       .exit = sa_exit_tfm_aead,
+                       .setkey = sa_aead_cbc_sha1_setkey,
+                       .setauthsize = sa_aead_setauthsize,
+                       .encrypt = sa_aead_encrypt,
+                       .decrypt = sa_aead_decrypt,
+               },
+       },
+       {
+               .type   = CRYPTO_ALG_TYPE_AEAD,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(hmac(sha256),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc(hmac(sha256),cbc(aes))-sa2ul",
+                               .cra_blocksize = AES_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AEAD |
+                                       CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                       CRYPTO_ALG_ASYNC |
+                                       CRYPTO_ALG_NEED_FALLBACK,
+                               .cra_ctxsize = sizeof(struct sa_tfm_ctx),
+                               .cra_module = THIS_MODULE,
+                               .cra_alignmask = 0,
+                               .cra_priority = 3000,
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize = SHA256_DIGEST_SIZE,
+
+                       .init = sa_cra_init_aead_sha256,
+                       .exit = sa_exit_tfm_aead,
+                       .setkey = sa_aead_cbc_sha256_setkey,
+                       .setauthsize = sa_aead_setauthsize,
+                       .encrypt = sa_aead_encrypt,
+                       .decrypt = sa_aead_decrypt,
+               },
+       },
+};
+
+/* Register the algorithms in crypto framework */
+static void sa_register_algos(const struct device *dev)
+{
+       char *alg_name;
+       u32 type;
+       int i, err;
+
+       for (i = 0; i < ARRAY_SIZE(sa_algs); i++) {
+               type = sa_algs[i].type;
+               if (type == CRYPTO_ALG_TYPE_SKCIPHER) {
+                       alg_name = sa_algs[i].alg.skcipher.base.cra_name;
+                       err = crypto_register_skcipher(&sa_algs[i].alg.skcipher);
+               } else if (type == CRYPTO_ALG_TYPE_AHASH) {
+                       alg_name = sa_algs[i].alg.ahash.halg.base.cra_name;
+                       err = crypto_register_ahash(&sa_algs[i].alg.ahash);
+               } else if (type == CRYPTO_ALG_TYPE_AEAD) {
+                       alg_name = sa_algs[i].alg.aead.base.cra_name;
+                       err = crypto_register_aead(&sa_algs[i].alg.aead);
+               } else {
+                       dev_err(dev,
+                               "un-supported crypto algorithm (%d)",
+                               sa_algs[i].type);
+                       continue;
+               }
+
+               if (err)
+                       dev_err(dev, "Failed to register '%s'\n", alg_name);
+               else
+                       sa_algs[i].registered = true;
+       }
+}
+
+/* Unregister the algorithms in crypto framework */
+static void sa_unregister_algos(const struct device *dev)
+{
+       u32 type;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sa_algs); i++) {
+               type = sa_algs[i].type;
+               if (!sa_algs[i].registered)
+                       continue;
+               if (type == CRYPTO_ALG_TYPE_SKCIPHER)
+                       crypto_unregister_skcipher(&sa_algs[i].alg.skcipher);
+               else if (type == CRYPTO_ALG_TYPE_AHASH)
+                       crypto_unregister_ahash(&sa_algs[i].alg.ahash);
+               else if (type == CRYPTO_ALG_TYPE_AEAD)
+                       crypto_unregister_aead(&sa_algs[i].alg.aead);
+
+               sa_algs[i].registered = false;
+       }
+}
+
+static int sa_init_mem(struct sa_crypto_data *dev_data)
+{
+       struct device *dev = &dev_data->pdev->dev;
+       /* Setup dma pool for security context buffers */
+       dev_data->sc_pool = dma_pool_create("keystone-sc", dev,
+                                           SA_CTX_MAX_SZ, 64, 0);
+       if (!dev_data->sc_pool) {
+               dev_err(dev, "Failed to create dma pool");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int sa_dma_init(struct sa_crypto_data *dd)
+{
+       int ret;
+       struct dma_slave_config cfg;
+
+       dd->dma_rx1 = NULL;
+       dd->dma_tx = NULL;
+       dd->dma_rx2 = NULL;
+
+       ret = dma_coerce_mask_and_coherent(dd->dev, DMA_BIT_MASK(48));
+       if (ret)
+               return ret;
+
+       dd->dma_rx1 = dma_request_chan(dd->dev, "rx1");
+       if (IS_ERR(dd->dma_rx1)) {
+               if (PTR_ERR(dd->dma_rx1) != -EPROBE_DEFER)
+                       dev_err(dd->dev, "Unable to request rx1 DMA channel\n");
+               return PTR_ERR(dd->dma_rx1);
+       }
+
+       dd->dma_rx2 = dma_request_chan(dd->dev, "rx2");
+       if (IS_ERR(dd->dma_rx2)) {
+               dma_release_channel(dd->dma_rx1);
+               if (PTR_ERR(dd->dma_rx2) != -EPROBE_DEFER)
+                       dev_err(dd->dev, "Unable to request rx2 DMA channel\n");
+               return PTR_ERR(dd->dma_rx2);
+       }
+
+       dd->dma_tx = dma_request_chan(dd->dev, "tx");
+       if (IS_ERR(dd->dma_tx)) {
+               if (PTR_ERR(dd->dma_tx) != -EPROBE_DEFER)
+                       dev_err(dd->dev, "Unable to request tx DMA channel\n");
+               ret = PTR_ERR(dd->dma_tx);
+               goto err_dma_tx;
+       }
+
+       memzero_explicit(&cfg, sizeof(cfg));
+
+       cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       cfg.src_maxburst = 4;
+       cfg.dst_maxburst = 4;
+
+       ret = dmaengine_slave_config(dd->dma_rx1, &cfg);
+       if (ret) {
+               dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
+                       ret);
+               return ret;
+       }
+
+       ret = dmaengine_slave_config(dd->dma_rx2, &cfg);
+       if (ret) {
+               dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
+                       ret);
+               return ret;
+       }
+
+       ret = dmaengine_slave_config(dd->dma_tx, &cfg);
+       if (ret) {
+               dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
+                       ret);
+               return ret;
+       }
+
+       return 0;
+
+err_dma_tx:
+       dma_release_channel(dd->dma_rx1);
+       dma_release_channel(dd->dma_rx2);
+
+       return ret;
+}
+
+static int sa_link_child(struct device *dev, void *data)
+{
+       struct device *parent = data;
+
+       device_link_add(dev, parent, DL_FLAG_AUTOPROBE_CONSUMER);
+
+       return 0;
+}
+
+static int sa_ul_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *node = dev->of_node;
+       struct resource *res;
+       static void __iomem *saul_base;
+       struct sa_crypto_data *dev_data;
+       u32 val;
+       int ret;
+
+       dev_data = devm_kzalloc(dev, sizeof(*dev_data), GFP_KERNEL);
+       if (!dev_data)
+               return -ENOMEM;
+
+       sa_k3_dev = dev;
+       dev_data->dev = dev;
+       dev_data->pdev = pdev;
+       platform_set_drvdata(pdev, dev_data);
+       dev_set_drvdata(sa_k3_dev, dev_data);
+
+       pm_runtime_enable(dev);
+       ret = pm_runtime_get_sync(dev);
+       if (ret) {
+               dev_err(&pdev->dev, "%s: failed to get sync: %d\n", __func__,
+                       ret);
+               return ret;
+       }
+
+       sa_init_mem(dev_data);
+       ret = sa_dma_init(dev_data);
+       if (ret)
+               goto disable_pm_runtime;
+
+       spin_lock_init(&dev_data->scid_lock);
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       saul_base = devm_ioremap_resource(dev, res);
+
+       dev_data->base = saul_base;
+       val = SA_EEC_ENCSS_EN | SA_EEC_AUTHSS_EN | SA_EEC_CTXCACH_EN |
+           SA_EEC_CPPI_PORT_IN_EN | SA_EEC_CPPI_PORT_OUT_EN |
+           SA_EEC_TRNG_EN;
+
+       writel_relaxed(val, saul_base + SA_ENGINE_ENABLE_CONTROL);
+
+       sa_register_algos(dev);
+
+       ret = of_platform_populate(node, NULL, NULL, &pdev->dev);
+       if (ret)
+               goto release_dma;
+
+       device_for_each_child(&pdev->dev, &pdev->dev, sa_link_child);
+
+       return 0;
+
+release_dma:
+       sa_unregister_algos(&pdev->dev);
+
+       dma_release_channel(dev_data->dma_rx2);
+       dma_release_channel(dev_data->dma_rx1);
+       dma_release_channel(dev_data->dma_tx);
+
+       dma_pool_destroy(dev_data->sc_pool);
+
+disable_pm_runtime:
+       pm_runtime_put_sync(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+
+       return ret;
+}
+
+static int sa_ul_remove(struct platform_device *pdev)
+{
+       struct sa_crypto_data *dev_data = platform_get_drvdata(pdev);
+
+       sa_unregister_algos(&pdev->dev);
+
+       dma_release_channel(dev_data->dma_rx2);
+       dma_release_channel(dev_data->dma_rx1);
+       dma_release_channel(dev_data->dma_tx);
+
+       dma_pool_destroy(dev_data->sc_pool);
+
+       platform_set_drvdata(pdev, NULL);
+
+       pm_runtime_put_sync(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+
+       return 0;
+}
+
+static const struct of_device_id of_match[] = {
+       {.compatible = "ti,j721e-sa2ul",},
+       {.compatible = "ti,am654-sa2ul",},
+       {},
+};
+MODULE_DEVICE_TABLE(of, of_match);
+
+static struct platform_driver sa_ul_driver = {
+       .probe = sa_ul_probe,
+       .remove = sa_ul_remove,
+       .driver = {
+                  .name = "saul-crypto",
+                  .of_match_table = of_match,
+                  },
+};
+module_platform_driver(sa_ul_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/sa2ul.h b/drivers/crypto/sa2ul.h
new file mode 100644 (file)
index 0000000..7f7e3fe
--- /dev/null
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * K3 SA2UL crypto accelerator driver
+ *
+ * Copyright (C) 2018-2020 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors:    Keerthy
+ *             Vitaly Andrianov
+ *             Tero Kristo
+ */
+
+#ifndef _K3_SA2UL_
+#define _K3_SA2UL_
+
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/hw_random.h>
+#include <crypto/aes.h>
+
+#define SA_ENGINE_ENABLE_CONTROL       0x1000
+
+struct sa_tfm_ctx;
+/*
+ * SA_ENGINE_ENABLE_CONTROL register bits
+ */
+#define SA_EEC_ENCSS_EN                        0x00000001
+#define SA_EEC_AUTHSS_EN               0x00000002
+#define SA_EEC_TRNG_EN                 0x00000008
+#define SA_EEC_PKA_EN                  0x00000010
+#define SA_EEC_CTXCACH_EN              0x00000080
+#define SA_EEC_CPPI_PORT_IN_EN         0x00000200
+#define SA_EEC_CPPI_PORT_OUT_EN                0x00000800
+
+/*
+ * Encoding used to identify the typo of crypto operation
+ * performed on the packet when the packet is returned
+ * by SA
+ */
+#define SA_REQ_SUBTYPE_ENC     0x0001
+#define SA_REQ_SUBTYPE_DEC     0x0002
+#define SA_REQ_SUBTYPE_SHIFT   16
+#define SA_REQ_SUBTYPE_MASK    0xffff
+
+/* Number of 32 bit words in EPIB  */
+#define SA_DMA_NUM_EPIB_WORDS   4
+
+/* Number of 32 bit words in PS data  */
+#define SA_DMA_NUM_PS_WORDS     16
+#define NKEY_SZ                        3
+#define MCI_SZ                 27
+
+/*
+ * Maximum number of simultaeneous security contexts
+ * supported by the driver
+ */
+#define SA_MAX_NUM_CTX 512
+
+/*
+ * Assumption: CTX size is multiple of 32
+ */
+#define SA_CTX_SIZE_TO_DMA_SIZE(ctx_sz) \
+               ((ctx_sz) ? ((ctx_sz) / 32 - 1) : 0)
+
+#define SA_CTX_ENC_KEY_OFFSET   32
+#define SA_CTX_ENC_AUX1_OFFSET  64
+#define SA_CTX_ENC_AUX2_OFFSET  96
+#define SA_CTX_ENC_AUX3_OFFSET  112
+#define SA_CTX_ENC_AUX4_OFFSET  128
+
+/* Next Engine Select code in CP_ACE */
+#define SA_ENG_ID_EM1   2       /* Enc/Dec engine with AES/DEC core */
+#define SA_ENG_ID_EM2   3       /* Encryption/Decryption enginefor pass 2 */
+#define SA_ENG_ID_AM1   4       /* Auth. engine with SHA1/MD5/SHA2 core */
+#define SA_ENG_ID_AM2   5       /*  Authentication engine for pass 2 */
+#define SA_ENG_ID_OUTPORT2 20   /*  Egress module 2  */
+
+/*
+ * Command Label Definitions
+ */
+#define SA_CMDL_OFFSET_NESC           0      /* Next Engine Select Code */
+#define SA_CMDL_OFFSET_LABEL_LEN      1      /* Engine Command Label Length */
+/* 16-bit Length of Data to be processed */
+#define SA_CMDL_OFFSET_DATA_LEN       2
+#define SA_CMDL_OFFSET_DATA_OFFSET    4      /* Stat Data Offset */
+#define SA_CMDL_OFFSET_OPTION_CTRL1   5      /* Option Control Byte 1 */
+#define SA_CMDL_OFFSET_OPTION_CTRL2   6      /* Option Control Byte 2 */
+#define SA_CMDL_OFFSET_OPTION_CTRL3   7      /* Option Control Byte 3 */
+#define SA_CMDL_OFFSET_OPTION_BYTE    8
+
+#define SA_CMDL_HEADER_SIZE_BYTES      8
+
+#define SA_CMDL_OPTION_BYTES_MAX_SIZE     72
+#define SA_CMDL_MAX_SIZE_BYTES (SA_CMDL_HEADER_SIZE_BYTES + \
+                               SA_CMDL_OPTION_BYTES_MAX_SIZE)
+
+/* SWINFO word-0 flags */
+#define SA_SW_INFO_FLAG_EVICT   0x0001
+#define SA_SW_INFO_FLAG_TEAR    0x0002
+#define SA_SW_INFO_FLAG_NOPD    0x0004
+
+/*
+ * This type represents the various packet types to be processed
+ * by the PHP engine in SA.
+ * It is used to identify the corresponding PHP processing function.
+ */
+#define SA_CTX_PE_PKT_TYPE_3GPP_AIR    0    /* 3GPP Air Cipher */
+#define SA_CTX_PE_PKT_TYPE_SRTP        1    /* SRTP */
+#define SA_CTX_PE_PKT_TYPE_IPSEC_AH    2    /* IPSec Authentication Header */
+/* IPSec Encapsulating Security Payload */
+#define SA_CTX_PE_PKT_TYPE_IPSEC_ESP   3
+/* Indicates that it is in data mode, It may not be used by PHP */
+#define SA_CTX_PE_PKT_TYPE_NONE        4
+#define SA_CTX_ENC_TYPE1_SZ     64      /* Encryption SC with Key only */
+#define SA_CTX_ENC_TYPE2_SZ     96      /* Encryption SC with Key and Aux1 */
+
+#define SA_CTX_AUTH_TYPE1_SZ    64      /* Auth SC with Key only */
+#define SA_CTX_AUTH_TYPE2_SZ    96      /* Auth SC with Key and Aux1 */
+/* Size of security context for PHP engine */
+#define SA_CTX_PHP_PE_CTX_SZ    64
+
+#define SA_CTX_MAX_SZ (64 + SA_CTX_ENC_TYPE2_SZ + SA_CTX_AUTH_TYPE2_SZ)
+
+/*
+ * Encoding of F/E control in SCCTL
+ *  Bit 0-1: Fetch PHP Bytes
+ *  Bit 2-3: Fetch Encryption/Air Ciphering Bytes
+ *  Bit 4-5: Fetch Authentication Bytes or Encr pass 2
+ *  Bit 6-7: Evict PHP Bytes
+ *
+ *  where   00 = 0 bytes
+ *          01 = 64 bytes
+ *          10 = 96 bytes
+ *          11 = 128 bytes
+ */
+#define SA_CTX_DMA_SIZE_0       0
+#define SA_CTX_DMA_SIZE_64      1
+#define SA_CTX_DMA_SIZE_96      2
+#define SA_CTX_DMA_SIZE_128     3
+
+/*
+ * Byte offset of the owner word in SCCTL
+ * in the security context
+ */
+#define SA_CTX_SCCTL_OWNER_OFFSET 0
+
+#define SA_CTX_ENC_KEY_OFFSET   32
+#define SA_CTX_ENC_AUX1_OFFSET  64
+#define SA_CTX_ENC_AUX2_OFFSET  96
+#define SA_CTX_ENC_AUX3_OFFSET  112
+#define SA_CTX_ENC_AUX4_OFFSET  128
+
+#define SA_SCCTL_FE_AUTH_ENC   0x65
+#define SA_SCCTL_FE_ENC                0x8D
+
+#define SA_ALIGN_MASK          (sizeof(u32) - 1)
+#define SA_ALIGNED             __aligned(32)
+
+#define SA_AUTH_SW_CTRL_MD5    1
+#define SA_AUTH_SW_CTRL_SHA1   2
+#define SA_AUTH_SW_CTRL_SHA224 3
+#define SA_AUTH_SW_CTRL_SHA256 4
+#define SA_AUTH_SW_CTRL_SHA384 5
+#define SA_AUTH_SW_CTRL_SHA512 6
+
+/* SA2UL can only handle maximum data size of 64KB */
+#define SA_MAX_DATA_SZ         U16_MAX
+
+/*
+ * SA2UL can provide unpredictable results with packet sizes that fall
+ * the following range, so avoid using it.
+ */
+#define SA_UNSAFE_DATA_SZ_MIN  240
+#define SA_UNSAFE_DATA_SZ_MAX  256
+
+/**
+ * struct sa_crypto_data - Crypto driver instance data
+ * @base: Base address of the register space
+ * @pdev: Platform device pointer
+ * @sc_pool: security context pool
+ * @dev: Device pointer
+ * @scid_lock: secure context ID lock
+ * @sc_id_start: starting index for SC ID
+ * @sc_id_end: Ending index for SC ID
+ * @sc_id: Security Context ID
+ * @ctx_bm: Bitmap to keep track of Security context ID's
+ * @ctx: SA tfm context pointer
+ * @dma_rx1: Pointer to DMA rx channel for sizes < 256 Bytes
+ * @dma_rx2: Pointer to DMA rx channel for sizes > 256 Bytes
+ * @dma_tx: Pointer to DMA TX channel
+ */
+struct sa_crypto_data {
+       void __iomem *base;
+       struct platform_device  *pdev;
+       struct dma_pool         *sc_pool;
+       struct device *dev;
+       spinlock_t      scid_lock; /* lock for SC-ID allocation */
+       /* Security context data */
+       u16             sc_id_start;
+       u16             sc_id_end;
+       u16             sc_id;
+       unsigned long   ctx_bm[DIV_ROUND_UP(SA_MAX_NUM_CTX,
+                               BITS_PER_LONG)];
+       struct sa_tfm_ctx       *ctx;
+       struct dma_chan         *dma_rx1;
+       struct dma_chan         *dma_rx2;
+       struct dma_chan         *dma_tx;
+};
+
+/**
+ * struct sa_cmdl_param_info: Command label parameters info
+ * @index: Index of the parameter in the command label format
+ * @offset: the offset of the parameter
+ * @size: Size of the parameter
+ */
+struct sa_cmdl_param_info {
+       u16     index;
+       u16     offset;
+       u16     size;
+};
+
+/* Maximum length of Auxiliary data in 32bit words */
+#define SA_MAX_AUX_DATA_WORDS  8
+
+/**
+ * struct sa_cmdl_upd_info: Command label updation info
+ * @flags: flags in command label
+ * @submode: Encryption submodes
+ * @enc_size: Size of first pass encryption size
+ * @enc_size2: Size of second pass encryption size
+ * @enc_offset: Encryption payload offset in the packet
+ * @enc_iv: Encryption initialization vector for pass2
+ * @enc_iv2: Encryption initialization vector for pass2
+ * @aad: Associated data
+ * @payload: Payload info
+ * @auth_size: Authentication size for pass 1
+ * @auth_size2: Authentication size for pass 2
+ * @auth_offset: Authentication payload offset
+ * @auth_iv: Authentication initialization vector
+ * @aux_key_info: Authentication aux key information
+ * @aux_key: Aux key for authentication
+ */
+struct sa_cmdl_upd_info {
+       u16     flags;
+       u16     submode;
+       struct sa_cmdl_param_info       enc_size;
+       struct sa_cmdl_param_info       enc_size2;
+       struct sa_cmdl_param_info       enc_offset;
+       struct sa_cmdl_param_info       enc_iv;
+       struct sa_cmdl_param_info       enc_iv2;
+       struct sa_cmdl_param_info       aad;
+       struct sa_cmdl_param_info       payload;
+       struct sa_cmdl_param_info       auth_size;
+       struct sa_cmdl_param_info       auth_size2;
+       struct sa_cmdl_param_info       auth_offset;
+       struct sa_cmdl_param_info       auth_iv;
+       struct sa_cmdl_param_info       aux_key_info;
+       u32                             aux_key[SA_MAX_AUX_DATA_WORDS];
+};
+
+/*
+ * Number of 32bit words appended after the command label
+ * in PSDATA to identify the crypto request context.
+ * word-0: Request type
+ * word-1: pointer to request
+ */
+#define SA_PSDATA_CTX_WORDS 4
+
+/* Maximum size of Command label in 32 words */
+#define SA_MAX_CMDL_WORDS (SA_DMA_NUM_PS_WORDS - SA_PSDATA_CTX_WORDS)
+
+/**
+ * struct sa_ctx_info: SA context information
+ * @sc: Pointer to security context
+ * @sc_phys: Security context physical address that is passed on to SA2UL
+ * @sc_id: Security context ID
+ * @cmdl_size: Command label size
+ * @cmdl: Command label for a particular iteration
+ * @cmdl_upd_info: structure holding command label updation info
+ * @epib: Extended protocol information block words
+ */
+struct sa_ctx_info {
+       u8              *sc;
+       dma_addr_t      sc_phys;
+       u16             sc_id;
+       u16             cmdl_size;
+       u32             cmdl[SA_MAX_CMDL_WORDS];
+       struct sa_cmdl_upd_info cmdl_upd_info;
+       /* Store Auxiliary data such as K2/K3 subkeys in AES-XCBC */
+       u32             epib[SA_DMA_NUM_EPIB_WORDS];
+};
+
+/**
+ * struct sa_tfm_ctx: TFM context structure
+ * @dev_data: struct sa_crypto_data pointer
+ * @enc: struct sa_ctx_info for encryption
+ * @dec: struct sa_ctx_info for decryption
+ * @keylen: encrption/decryption keylength
+ * @iv_idx: Initialization vector index
+ * @key: encryption key
+ * @fallback: SW fallback algorithm
+ */
+struct sa_tfm_ctx {
+       struct sa_crypto_data *dev_data;
+       struct sa_ctx_info enc;
+       struct sa_ctx_info dec;
+       struct sa_ctx_info auth;
+       int keylen;
+       int iv_idx;
+       u32 key[AES_KEYSIZE_256 / sizeof(u32)];
+       u8 authkey[SHA512_BLOCK_SIZE];
+       struct crypto_shash     *shash;
+       /* for fallback */
+       union {
+               struct crypto_sync_skcipher     *skcipher;
+               struct crypto_ahash             *ahash;
+               struct crypto_aead              *aead;
+       } fallback;
+};
+
+/**
+ * struct sa_sha_req_ctx: Structure used for sha request
+ * @dev_data: struct sa_crypto_data pointer
+ * @cmdl: Complete command label with psdata and epib included
+ * @fallback_req: SW fallback request container
+ */
+struct sa_sha_req_ctx {
+       struct sa_crypto_data   *dev_data;
+       u32                     cmdl[SA_MAX_CMDL_WORDS + SA_PSDATA_CTX_WORDS];
+       struct ahash_request    fallback_req;
+};
+
+enum sa_submode {
+       SA_MODE_GEN = 0,
+       SA_MODE_CCM,
+       SA_MODE_GCM,
+       SA_MODE_GMAC
+};
+
+/* Encryption algorithms */
+enum sa_ealg_id {
+       SA_EALG_ID_NONE = 0,        /* No encryption */
+       SA_EALG_ID_NULL,            /* NULL encryption */
+       SA_EALG_ID_AES_CTR,         /* AES Counter mode */
+       SA_EALG_ID_AES_F8,          /* AES F8 mode */
+       SA_EALG_ID_AES_CBC,         /* AES CBC mode */
+       SA_EALG_ID_DES_CBC,         /* DES CBC mode */
+       SA_EALG_ID_3DES_CBC,        /* 3DES CBC mode */
+       SA_EALG_ID_CCM,             /* Counter with CBC-MAC mode */
+       SA_EALG_ID_GCM,             /* Galois Counter mode */
+       SA_EALG_ID_AES_ECB,
+       SA_EALG_ID_LAST
+};
+
+/* Authentication algorithms */
+enum sa_aalg_id {
+       SA_AALG_ID_NONE = 0,      /* No Authentication  */
+       SA_AALG_ID_NULL = SA_EALG_ID_LAST, /* NULL Authentication  */
+       SA_AALG_ID_MD5,           /* MD5 mode */
+       SA_AALG_ID_SHA1,          /* SHA1 mode */
+       SA_AALG_ID_SHA2_224,      /* 224-bit SHA2 mode */
+       SA_AALG_ID_SHA2_256,      /* 256-bit SHA2 mode */
+       SA_AALG_ID_SHA2_512,      /* 512-bit SHA2 mode */
+       SA_AALG_ID_HMAC_MD5,      /* HMAC with MD5 mode */
+       SA_AALG_ID_HMAC_SHA1,     /* HMAC with SHA1 mode */
+       SA_AALG_ID_HMAC_SHA2_224, /* HMAC with 224-bit SHA2 mode */
+       SA_AALG_ID_HMAC_SHA2_256, /* HMAC with 256-bit SHA2 mode */
+       SA_AALG_ID_GMAC,          /* Galois Message Auth. Code mode */
+       SA_AALG_ID_CMAC,          /* Cipher-based Mes. Auth. Code mode */
+       SA_AALG_ID_CBC_MAC,       /* Cipher Block Chaining */
+       SA_AALG_ID_AES_XCBC       /* AES Extended Cipher Block Chaining */
+};
+
+/*
+ * Mode control engine algorithms used to index the
+ * mode control instruction tables
+ */
+enum sa_eng_algo_id {
+       SA_ENG_ALGO_ECB = 0,
+       SA_ENG_ALGO_CBC,
+       SA_ENG_ALGO_CFB,
+       SA_ENG_ALGO_OFB,
+       SA_ENG_ALGO_CTR,
+       SA_ENG_ALGO_F8,
+       SA_ENG_ALGO_F8F9,
+       SA_ENG_ALGO_GCM,
+       SA_ENG_ALGO_GMAC,
+       SA_ENG_ALGO_CCM,
+       SA_ENG_ALGO_CMAC,
+       SA_ENG_ALGO_CBCMAC,
+       SA_NUM_ENG_ALGOS
+};
+
+/**
+ * struct sa_eng_info: Security accelerator engine info
+ * @eng_id: Engine ID
+ * @sc_size: security context size
+ */
+struct sa_eng_info {
+       u8      eng_id;
+       u16     sc_size;
+};
+
+#endif /* _K3_SA2UL_ */
index 466e30b..0c8cb23 100644 (file)
@@ -146,11 +146,12 @@ struct sahara_ctx {
        /* AES-specific context */
        int keylen;
        u8 key[AES_KEYSIZE_128];
-       struct crypto_sync_skcipher *fallback;
+       struct crypto_skcipher *fallback;
 };
 
 struct sahara_aes_reqctx {
        unsigned long mode;
+       struct skcipher_request fallback_req;   // keep at the end
 };
 
 /*
@@ -617,10 +618,10 @@ static int sahara_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
        /*
         * The requested key size is not supported by HW, do a fallback.
         */
-       crypto_sync_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
-       crypto_sync_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+       crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
                                                 CRYPTO_TFM_REQ_MASK);
-       return crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+       return crypto_skcipher_setkey(ctx->fallback, key, keylen);
 }
 
 static int sahara_aes_crypt(struct skcipher_request *req, unsigned long mode)
@@ -651,21 +652,19 @@ static int sahara_aes_crypt(struct skcipher_request *req, unsigned long mode)
 
 static int sahara_aes_ecb_encrypt(struct skcipher_request *req)
 {
+       struct sahara_aes_reqctx *rctx = skcipher_request_ctx(req);
        struct sahara_ctx *ctx = crypto_skcipher_ctx(
                crypto_skcipher_reqtfm(req));
-       int err;
 
        if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-               skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-               skcipher_request_set_callback(subreq, req->base.flags,
-                                             NULL, NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
-               err = crypto_skcipher_encrypt(subreq);
-               skcipher_request_zero(subreq);
-               return err;
+               skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
+               return crypto_skcipher_encrypt(&rctx->fallback_req);
        }
 
        return sahara_aes_crypt(req, FLAGS_ENCRYPT);
@@ -673,21 +672,19 @@ static int sahara_aes_ecb_encrypt(struct skcipher_request *req)
 
 static int sahara_aes_ecb_decrypt(struct skcipher_request *req)
 {
+       struct sahara_aes_reqctx *rctx = skcipher_request_ctx(req);
        struct sahara_ctx *ctx = crypto_skcipher_ctx(
                crypto_skcipher_reqtfm(req));
-       int err;
 
        if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-               skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-               skcipher_request_set_callback(subreq, req->base.flags,
-                                             NULL, NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
-               err = crypto_skcipher_decrypt(subreq);
-               skcipher_request_zero(subreq);
-               return err;
+               skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
+               return crypto_skcipher_decrypt(&rctx->fallback_req);
        }
 
        return sahara_aes_crypt(req, 0);
@@ -695,21 +692,19 @@ static int sahara_aes_ecb_decrypt(struct skcipher_request *req)
 
 static int sahara_aes_cbc_encrypt(struct skcipher_request *req)
 {
+       struct sahara_aes_reqctx *rctx = skcipher_request_ctx(req);
        struct sahara_ctx *ctx = crypto_skcipher_ctx(
                crypto_skcipher_reqtfm(req));
-       int err;
 
        if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-               skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-               skcipher_request_set_callback(subreq, req->base.flags,
-                                             NULL, NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
-               err = crypto_skcipher_encrypt(subreq);
-               skcipher_request_zero(subreq);
-               return err;
+               skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
+               return crypto_skcipher_encrypt(&rctx->fallback_req);
        }
 
        return sahara_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
@@ -717,21 +712,19 @@ static int sahara_aes_cbc_encrypt(struct skcipher_request *req)
 
 static int sahara_aes_cbc_decrypt(struct skcipher_request *req)
 {
+       struct sahara_aes_reqctx *rctx = skcipher_request_ctx(req);
        struct sahara_ctx *ctx = crypto_skcipher_ctx(
                crypto_skcipher_reqtfm(req));
-       int err;
 
        if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
-               SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
-
-               skcipher_request_set_sync_tfm(subreq, ctx->fallback);
-               skcipher_request_set_callback(subreq, req->base.flags,
-                                             NULL, NULL);
-               skcipher_request_set_crypt(subreq, req->src, req->dst,
-                                          req->cryptlen, req->iv);
-               err = crypto_skcipher_decrypt(subreq);
-               skcipher_request_zero(subreq);
-               return err;
+               skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+               skcipher_request_set_callback(&rctx->fallback_req,
+                                             req->base.flags,
+                                             req->base.complete,
+                                             req->base.data);
+               skcipher_request_set_crypt(&rctx->fallback_req, req->src,
+                                          req->dst, req->cryptlen, req->iv);
+               return crypto_skcipher_decrypt(&rctx->fallback_req);
        }
 
        return sahara_aes_crypt(req, FLAGS_CBC);
@@ -742,14 +735,15 @@ static int sahara_aes_init_tfm(struct crypto_skcipher *tfm)
        const char *name = crypto_tfm_alg_name(&tfm->base);
        struct sahara_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
+       ctx->fallback = crypto_alloc_skcipher(name, 0,
                                              CRYPTO_ALG_NEED_FALLBACK);
        if (IS_ERR(ctx->fallback)) {
                pr_err("Error allocating fallback algo %s\n", name);
                return PTR_ERR(ctx->fallback);
        }
 
-       crypto_skcipher_set_reqsize(tfm, sizeof(struct sahara_aes_reqctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct sahara_aes_reqctx) +
+                                        crypto_skcipher_reqsize(ctx->fallback));
 
        return 0;
 }
@@ -758,7 +752,7 @@ static void sahara_aes_exit_tfm(struct crypto_skcipher *tfm)
 {
        struct sahara_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       crypto_free_sync_skcipher(ctx->fallback);
+       crypto_free_skcipher(ctx->fallback);
 }
 
 static u32 sahara_sha_init_hdr(struct sahara_dev *dev,
index 9c6db7f..7c54735 100644 (file)
@@ -2264,7 +2264,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha1-"
                                                   "cbc-aes-talitos",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
@@ -2285,7 +2286,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha1-"
                                                   "cbc-aes-talitos-hsna",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
@@ -2306,7 +2308,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha1-"
                                                   "cbc-3des-talitos",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
@@ -2330,7 +2333,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha1-"
                                                   "cbc-3des-talitos-hsna",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
@@ -2352,7 +2356,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha224-"
                                                   "cbc-aes-talitos",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
@@ -2373,7 +2378,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha224-"
                                                   "cbc-aes-talitos-hsna",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
@@ -2394,7 +2400,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha224-"
                                                   "cbc-3des-talitos",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
@@ -2418,7 +2425,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha224-"
                                                   "cbc-3des-talitos-hsna",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
@@ -2440,7 +2448,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha256-"
                                                   "cbc-aes-talitos",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
@@ -2461,7 +2470,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha256-"
                                                   "cbc-aes-talitos-hsna",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
@@ -2482,7 +2492,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha256-"
                                                   "cbc-3des-talitos",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
@@ -2506,7 +2517,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha256-"
                                                   "cbc-3des-talitos-hsna",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
@@ -2528,7 +2540,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha384-"
                                                   "cbc-aes-talitos",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA384_DIGEST_SIZE,
@@ -2549,7 +2562,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha384-"
                                                   "cbc-3des-talitos",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA384_DIGEST_SIZE,
@@ -2571,7 +2585,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha512-"
                                                   "cbc-aes-talitos",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA512_DIGEST_SIZE,
@@ -2592,7 +2607,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-sha512-"
                                                   "cbc-3des-talitos",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA512_DIGEST_SIZE,
@@ -2614,7 +2630,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-md5-"
                                                   "cbc-aes-talitos",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
@@ -2635,7 +2652,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-md5-"
                                                   "cbc-aes-talitos-hsna",
                                .cra_blocksize = AES_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
@@ -2655,7 +2673,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-md5-"
                                                   "cbc-3des-talitos",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
@@ -2678,7 +2697,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_driver_name = "authenc-hmac-md5-"
                                                   "cbc-3des-talitos-hsna",
                                .cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        },
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
@@ -2699,7 +2719,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "ecb(aes)",
                        .base.cra_driver_name = "ecb-aes-talitos",
                        .base.cra_blocksize = AES_BLOCK_SIZE,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = AES_MIN_KEY_SIZE,
                        .max_keysize = AES_MAX_KEY_SIZE,
                        .setkey = skcipher_aes_setkey,
@@ -2712,7 +2733,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "cbc(aes)",
                        .base.cra_driver_name = "cbc-aes-talitos",
                        .base.cra_blocksize = AES_BLOCK_SIZE,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = AES_MIN_KEY_SIZE,
                        .max_keysize = AES_MAX_KEY_SIZE,
                        .ivsize = AES_BLOCK_SIZE,
@@ -2727,7 +2749,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "ctr(aes)",
                        .base.cra_driver_name = "ctr-aes-talitos",
                        .base.cra_blocksize = 1,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = AES_MIN_KEY_SIZE,
                        .max_keysize = AES_MAX_KEY_SIZE,
                        .ivsize = AES_BLOCK_SIZE,
@@ -2742,7 +2765,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "ecb(des)",
                        .base.cra_driver_name = "ecb-des-talitos",
                        .base.cra_blocksize = DES_BLOCK_SIZE,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = DES_KEY_SIZE,
                        .max_keysize = DES_KEY_SIZE,
                        .setkey = skcipher_des_setkey,
@@ -2755,7 +2779,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "cbc(des)",
                        .base.cra_driver_name = "cbc-des-talitos",
                        .base.cra_blocksize = DES_BLOCK_SIZE,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = DES_KEY_SIZE,
                        .max_keysize = DES_KEY_SIZE,
                        .ivsize = DES_BLOCK_SIZE,
@@ -2770,7 +2795,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "ecb(des3_ede)",
                        .base.cra_driver_name = "ecb-3des-talitos",
                        .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = DES3_EDE_KEY_SIZE,
                        .max_keysize = DES3_EDE_KEY_SIZE,
                        .setkey = skcipher_des3_setkey,
@@ -2784,7 +2810,8 @@ static struct talitos_alg_template driver_algs[] = {
                        .base.cra_name = "cbc(des3_ede)",
                        .base.cra_driver_name = "cbc-3des-talitos",
                        .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-                       .base.cra_flags = CRYPTO_ALG_ASYNC,
+                       .base.cra_flags = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                        .min_keysize = DES3_EDE_KEY_SIZE,
                        .max_keysize = DES3_EDE_KEY_SIZE,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
@@ -2804,7 +2831,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "md5",
                                .cra_driver_name = "md5-talitos",
                                .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2819,7 +2847,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "sha1",
                                .cra_driver_name = "sha1-talitos",
                                .cra_blocksize = SHA1_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2834,7 +2863,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "sha224",
                                .cra_driver_name = "sha224-talitos",
                                .cra_blocksize = SHA224_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2849,7 +2879,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "sha256",
                                .cra_driver_name = "sha256-talitos",
                                .cra_blocksize = SHA256_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2864,7 +2895,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "sha384",
                                .cra_driver_name = "sha384-talitos",
                                .cra_blocksize = SHA384_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2879,7 +2911,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "sha512",
                                .cra_driver_name = "sha512-talitos",
                                .cra_blocksize = SHA512_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2894,7 +2927,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "hmac(md5)",
                                .cra_driver_name = "hmac-md5-talitos",
                                .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2909,7 +2943,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "hmac(sha1)",
                                .cra_driver_name = "hmac-sha1-talitos",
                                .cra_blocksize = SHA1_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2924,7 +2959,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "hmac(sha224)",
                                .cra_driver_name = "hmac-sha224-talitos",
                                .cra_blocksize = SHA224_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2939,7 +2975,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "hmac(sha256)",
                                .cra_driver_name = "hmac-sha256-talitos",
                                .cra_blocksize = SHA256_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2954,7 +2991,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "hmac(sha384)",
                                .cra_driver_name = "hmac-sha384-talitos",
                                .cra_blocksize = SHA384_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2969,7 +3007,8 @@ static struct talitos_alg_template driver_algs[] = {
                                .cra_name = "hmac(sha512)",
                                .cra_driver_name = "hmac-sha512-talitos",
                                .cra_blocksize = SHA512_BLOCK_SIZE,
-                               .cra_flags = CRYPTO_ALG_ASYNC,
+                               .cra_flags = CRYPTO_ALG_ASYNC |
+                                            CRYPTO_ALG_ALLOCATES_MEMORY,
                        }
                },
                .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
index c24f2db..a5ee8c2 100644 (file)
@@ -545,7 +545,7 @@ static bool hash_dma_valid_data(struct scatterlist *sg, int datasize)
  *
  * Initialize structures.
  */
-static int hash_init(struct ahash_request *req)
+static int ux500_hash_init(struct ahash_request *req)
 {
        struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
        struct hash_ctx *ctx = crypto_ahash_ctx(tfm);
@@ -1359,7 +1359,7 @@ static int ahash_sha1_init(struct ahash_request *req)
        ctx->config.oper_mode = HASH_OPER_MODE_HASH;
        ctx->digestsize = SHA1_DIGEST_SIZE;
 
-       return hash_init(req);
+       return ux500_hash_init(req);
 }
 
 static int ahash_sha256_init(struct ahash_request *req)
@@ -1372,7 +1372,7 @@ static int ahash_sha256_init(struct ahash_request *req)
        ctx->config.oper_mode = HASH_OPER_MODE_HASH;
        ctx->digestsize = SHA256_DIGEST_SIZE;
 
-       return hash_init(req);
+       return ux500_hash_init(req);
 }
 
 static int ahash_sha1_digest(struct ahash_request *req)
@@ -1425,7 +1425,7 @@ static int hmac_sha1_init(struct ahash_request *req)
        ctx->config.oper_mode   = HASH_OPER_MODE_HMAC;
        ctx->digestsize         = SHA1_DIGEST_SIZE;
 
-       return hash_init(req);
+       return ux500_hash_init(req);
 }
 
 static int hmac_sha256_init(struct ahash_request *req)
@@ -1438,7 +1438,7 @@ static int hmac_sha256_init(struct ahash_request *req)
        ctx->config.oper_mode   = HASH_OPER_MODE_HMAC;
        ctx->digestsize         = SHA256_DIGEST_SIZE;
 
-       return hash_init(req);
+       return ux500_hash_init(req);
 }
 
 static int hmac_sha1_digest(struct ahash_request *req)
@@ -1515,7 +1515,7 @@ static struct hash_algo_template hash_algs[] = {
                .conf.algorithm = HASH_ALGO_SHA1,
                .conf.oper_mode = HASH_OPER_MODE_HASH,
                .hash = {
-                       .init = hash_init,
+                       .init = ux500_hash_init,
                        .update = ahash_update,
                        .final = ahash_final,
                        .digest = ahash_sha1_digest,
@@ -1538,7 +1538,7 @@ static struct hash_algo_template hash_algs[] = {
                .conf.algorithm = HASH_ALGO_SHA256,
                .conf.oper_mode = HASH_OPER_MODE_HASH,
                .hash = {
-                       .init = hash_init,
+                       .init = ux500_hash_init,
                        .update = ahash_update,
                        .final = ahash_final,
                        .digest = ahash_sha256_digest,
@@ -1561,7 +1561,7 @@ static struct hash_algo_template hash_algs[] = {
                .conf.algorithm = HASH_ALGO_SHA1,
                .conf.oper_mode = HASH_OPER_MODE_HMAC,
                        .hash = {
-                       .init = hash_init,
+                       .init = ux500_hash_init,
                        .update = ahash_update,
                        .final = ahash_final,
                        .digest = hmac_sha1_digest,
@@ -1585,7 +1585,7 @@ static struct hash_algo_template hash_algs[] = {
                .conf.algorithm = HASH_ALGO_SHA256,
                .conf.oper_mode = HASH_OPER_MODE_HMAC,
                .hash = {
-                       .init = hash_init,
+                       .init = ux500_hash_init,
                        .update = ahash_update,
                        .final = ahash_final,
                        .digest = hmac_sha256_digest,
index cb8a6ea..b260195 100644 (file)
@@ -597,7 +597,8 @@ static struct virtio_crypto_algo virtio_crypto_algs[] = { {
                .base.cra_name          = "cbc(aes)",
                .base.cra_driver_name   = "virtio_crypto_aes_cbc",
                .base.cra_priority      = 150,
-               .base.cra_flags         = CRYPTO_ALG_ASYNC,
+               .base.cra_flags         = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY,
                .base.cra_blocksize     = AES_BLOCK_SIZE,
                .base.cra_ctxsize       = sizeof(struct virtio_crypto_skcipher_ctx),
                .base.cra_module        = THIS_MODULE,
index c8a962c..77e744e 100644 (file)
@@ -498,11 +498,11 @@ free_vqs:
 }
 #endif
 
-static unsigned int features[] = {
+static const unsigned int features[] = {
        /* none */
 };
 
-static struct virtio_device_id id_table[] = {
+static const struct virtio_device_id id_table[] = {
        { VIRTIO_ID_CRYPTO, VIRTIO_DEV_ANY_ID },
        { 0 },
 };
index cd11558..2707935 100644 (file)
@@ -364,6 +364,7 @@ static struct zynqmp_aead_drv_ctx aes_drv_ctx = {
                .cra_priority           = 200,
                .cra_flags              = CRYPTO_ALG_TYPE_AEAD |
                                          CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_ALLOCATES_MEMORY |
                                          CRYPTO_ALG_KERN_DRIVER_ONLY |
                                          CRYPTO_ALG_NEED_FALLBACK,
                .cra_blocksize          = ZYNQMP_AES_BLK_SIZE,
index 8e32345..f508285 100644 (file)
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(bdev_dax_pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
-       if (!blk_queue_dax(bdev->bd_queue))
+       if (!blk_queue_dax(bdev->bd_disk->queue))
                return NULL;
        return dax_get_by_host(bdev->bd_disk->disk_name);
 }
index a1b199d..e97a9c9 100644 (file)
@@ -37,9 +37,8 @@ static const struct dmi_system_id * const embedded_fw_table[] = {
 static int __init efi_check_md_for_embedded_firmware(
        efi_memory_desc_t *md, const struct efi_embedded_fw_desc *desc)
 {
-       struct sha256_state sctx;
        struct efi_embedded_fw *fw;
-       u8 sha256[32];
+       u8 hash[32];
        u64 i, size;
        u8 *map;
 
@@ -54,10 +53,8 @@ static int __init efi_check_md_for_embedded_firmware(
                if (memcmp(map + i, desc->prefix, EFI_EMBEDDED_FW_PREFIX_LEN))
                        continue;
 
-               sha256_init(&sctx);
-               sha256_update(&sctx, map + i, desc->length);
-               sha256_final(&sctx, sha256);
-               if (memcmp(sha256, desc->sha256, 32) == 0)
+               sha256(map + i, desc->length, hash);
+               if (memcmp(hash, desc->sha256, 32) == 0)
                        break;
        }
        if ((i + desc->length) > size) {
index 039e0f9..6945c3c 100644 (file)
@@ -605,8 +605,10 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
        /* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
        err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
                                   fw_cfg_sel_ko, "%d", entry->select);
-       if (err)
-               goto err_register;
+       if (err) {
+               kobject_put(&entry->kobj);
+               return err;
+       }
 
        /* add raw binary content access */
        err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw);
@@ -622,7 +624,6 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
 
 err_add_raw:
        kobject_del(&entry->kobj);
-err_register:
        kfree(entry);
        return err;
 }
index d7e17e3..2129209 100644 (file)
@@ -692,9 +692,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                return n ? -EFAULT : 0;
        }
        case AMDGPU_INFO_DEV_INFO: {
-               struct drm_amdgpu_info_device dev_info = {};
+               struct drm_amdgpu_info_device dev_info;
                uint64_t vm_size;
 
+               memset(&dev_info, 0, sizeof(dev_info));
                dev_info.device_id = dev->pdev->device;
                dev_info.chip_rev = adev->rev_id;
                dev_info.external_rev = adev->external_rev_id;
index ebb8a28..02e6f8c 100644 (file)
@@ -778,7 +778,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
                tmp_str++;
        while (isspace(*++tmp_str));
 
-       while ((sub_str = strsep(&tmp_str, delimiter)) != NULL) {
+       while (tmp_str[0]) {
+               sub_str = strsep(&tmp_str, delimiter);
                ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
                if (ret)
                        return -EINVAL;
@@ -1038,7 +1039,8 @@ static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask)
        memcpy(buf_cpy, buf, bytes);
        buf_cpy[bytes] = '\0';
        tmp = buf_cpy;
-       while ((sub_str = strsep(&tmp, delimiter)) != NULL) {
+       while (tmp[0]) {
+               sub_str = strsep(&tmp, delimiter);
                if (strlen(sub_str)) {
                        ret = kstrtol(sub_str, 0, &level);
                        if (ret)
@@ -1635,7 +1637,8 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
                        i++;
                memcpy(buf_cpy, buf, count-i);
                tmp_str = buf_cpy;
-               while ((sub_str = strsep(&tmp_str, delimiter)) != NULL) {
+               while (tmp_str[0]) {
+                       sub_str = strsep(&tmp_str, delimiter);
                        ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
                        if (ret)
                                return -EINVAL;
index 86ffa0c..710edc7 100644 (file)
@@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
                 * the same resource. If we have a new DC context as part of
                 * the DM atomic state from validation we need to free it and
                 * retain the existing one instead.
+                *
+                * Furthermore, since the DM atomic state only contains the DC
+                * context and can safely be annulled, we can free the state
+                * and clear the associated private object now to free
+                * some memory and avoid a possible use-after-free later.
                 */
-               struct dm_atomic_state *new_dm_state, *old_dm_state;
 
-               new_dm_state = dm_atomic_get_new_state(state);
-               old_dm_state = dm_atomic_get_old_state(state);
+               for (i = 0; i < state->num_private_objs; i++) {
+                       struct drm_private_obj *obj = state->private_objs[i].ptr;
 
-               if (new_dm_state && old_dm_state) {
-                       if (new_dm_state->context)
-                               dc_release_state(new_dm_state->context);
+                       if (obj->funcs == adev->dm.atomic_obj.funcs) {
+                               int j = state->num_private_objs-1;
 
-                       new_dm_state->context = old_dm_state->context;
+                               dm_atomic_destroy_state(obj,
+                                               state->private_objs[i].state);
+
+                               /* If i is not at the end of the array then the
+                                * last element needs to be moved to where i was
+                                * before the array can safely be truncated.
+                                */
+                               if (i != j)
+                                       state->private_objs[i] =
+                                               state->private_objs[j];
 
-                       if (old_dm_state->context)
-                               dc_retain_state(old_dm_state->context);
+                               state->private_objs[j].ptr = NULL;
+                               state->private_objs[j].state = NULL;
+                               state->private_objs[j].old_state = NULL;
+                               state->private_objs[j].new_state = NULL;
+
+                               state->num_private_objs = j;
+                               break;
+                       }
                }
        }
 
index 05d8373..079f46f 100644 (file)
@@ -146,6 +146,7 @@ int bochs_kms_init(struct bochs_device *bochs)
        bochs->dev->mode_config.preferred_depth = 24;
        bochs->dev->mode_config.prefer_shadow = 0;
        bochs->dev->mode_config.prefer_shadow_fbdev = 1;
+       bochs->dev->mode_config.fbdev_use_iomem = true;
        bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true;
 
        bochs->dev->mode_config.funcs = &bochs_mode_funcs;
index 87b58c1..648eb23 100644 (file)
@@ -1224,6 +1224,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
        adv7511->bridge.funcs = &adv7511_bridge_funcs;
        adv7511->bridge.of_node = dev->of_node;
+       adv7511->bridge.type = DRM_MODE_CONNECTOR_HDMIA;
 
        drm_bridge_add(&adv7511->bridge);
 
index b14d725..c7bc194 100644 (file)
@@ -917,11 +917,6 @@ static int nwl_dsi_bridge_attach(struct drm_bridge *bridge,
        struct drm_panel *panel;
        int ret;
 
-       if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) {
-               DRM_ERROR("Fix bridge driver to make connector optional!");
-               return -EINVAL;
-       }
-
        ret = drm_of_find_panel_or_bridge(dsi->dev->of_node, 1, 0, &panel,
                                          &panel_bridge);
        if (ret)
index 5609e16..89cfd68 100644 (file)
@@ -399,7 +399,11 @@ static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper,
        unsigned int y;
 
        for (y = clip->y1; y < clip->y2; y++) {
-               memcpy(dst, src, len);
+               if (!fb_helper->dev->mode_config.fbdev_use_iomem)
+                       memcpy(dst, src, len);
+               else
+                       memcpy_toio((void __iomem *)dst, src, len);
+
                src += fb->pitches[0];
                dst += fb->pitches[0];
        }
index 7bf628e..ee2058a 100644 (file)
@@ -871,9 +871,6 @@ err:
  * @file_priv: drm file-private structure
  *
  * Open an object using the global name, returning a handle and the size.
- *
- * This handle (of course) holds a reference to the object, so the object
- * will not go away until the handle is deleted.
  */
 int
 drm_gem_open_ioctl(struct drm_device *dev, void *data,
@@ -898,14 +895,15 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data,
 
        /* drm_gem_handle_create_tail unlocks dev->object_name_lock. */
        ret = drm_gem_handle_create_tail(file_priv, obj, &handle);
-       drm_gem_object_put_unlocked(obj);
        if (ret)
-               return ret;
+               goto err;
 
        args->handle = handle;
        args->size = obj->size;
 
-       return 0;
+err:
+       drm_gem_object_put_unlocked(obj);
+       return ret;
 }
 
 /**
index bb27c82..bf7888a 100644 (file)
@@ -923,7 +923,7 @@ static int mipi_dbi_spi1_transfer(struct mipi_dbi *dbi, int dc,
                        }
                }
 
-               tr.len = chunk;
+               tr.len = chunk * 2;
                len -= chunk;
 
                ret = spi_sync(spi, &m);
index b50b44e..8fc3f67 100644 (file)
@@ -322,10 +322,8 @@ static int drm_of_lvds_get_remote_pixels_type(
                 * configurations by passing the endpoints explicitly to
                 * drm_of_lvds_get_dual_link_pixel_order().
                 */
-               if (!current_pt || pixels_type != current_pt) {
-                       of_node_put(remote_port);
+               if (!current_pt || pixels_type != current_pt)
                        return -EINVAL;
-               }
        }
 
        return pixels_type;
index 08802e5..4d2290f 100644 (file)
@@ -1060,9 +1060,14 @@ static void mcde_display_update(struct drm_simple_display_pipe *pipe,
         */
        if (fb) {
                mcde_set_extsrc(mcde, drm_fb_cma_get_gem_addr(fb, pstate, 0));
-               if (!mcde->video_mode)
-                       /* Send a single frame using software sync */
-                       mcde_display_send_one_frame(mcde);
+               if (!mcde->video_mode) {
+                       /*
+                        * Send a single frame using software sync if the flow
+                        * is not active yet.
+                        */
+                       if (mcde->flow_active == 0)
+                               mcde_display_send_one_frame(mcde);
+               }
                dev_info_once(mcde->dev, "sent first display update\n");
        } else {
                /*
index 519f998..800b775 100644 (file)
@@ -2073,7 +2073,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
         */
        if (core->assign_windows) {
                core->func->wndw.owner(core);
-               core->func->update(core, interlock, false);
+               nv50_disp_atomic_commit_core(state, interlock);
                core->assign_windows = false;
                interlock[NV50_DISP_INTERLOCK_CORE] = 0;
        }
@@ -2506,7 +2506,7 @@ nv50_display_create(struct drm_device *dev)
        if (disp->disp->object.oclass >= TU102_DISP)
                nouveau_display(dev)->format_modifiers = wndwc57e_modifiers;
        else
-       if (disp->disp->object.oclass >= GF110_DISP)
+       if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
                nouveau_display(dev)->format_modifiers = disp90xx_modifiers;
        else
                nouveau_display(dev)->format_modifiers = disp50xx_modifiers;
index 496c462..07373bb 100644 (file)
@@ -191,6 +191,7 @@ nouveau_decode_mod(struct nouveau_drm *drm,
                   uint32_t *tile_mode,
                   uint8_t *kind)
 {
+       struct nouveau_display *disp = nouveau_display(drm->dev);
        BUG_ON(!tile_mode || !kind);
 
        if (modifier == DRM_FORMAT_MOD_LINEAR) {
@@ -202,6 +203,12 @@ nouveau_decode_mod(struct nouveau_drm *drm,
                 * Extract the block height and kind from the corresponding
                 * modifier fields.  See drm_fourcc.h for details.
                 */
+
+               if ((modifier & (0xffull << 12)) == 0ull) {
+                       /* Legacy modifier.  Translate to this dev's 'kind.' */
+                       modifier |= disp->format_modifiers[0] & (0xffull << 12);
+               }
+
                *tile_mode = (uint32_t)(modifier & 0xF);
                *kind = (uint8_t)((modifier >> 12) & 0xFF);
 
@@ -227,6 +234,16 @@ nouveau_framebuffer_get_layout(struct drm_framebuffer *fb,
        }
 }
 
+static const u64 legacy_modifiers[] = {
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0),
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1),
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2),
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3),
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4),
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5),
+       DRM_FORMAT_MOD_INVALID
+};
+
 static int
 nouveau_validate_decode_mod(struct nouveau_drm *drm,
                            uint64_t modifier,
@@ -247,8 +264,14 @@ nouveau_validate_decode_mod(struct nouveau_drm *drm,
             (disp->format_modifiers[mod] != modifier);
             mod++);
 
-       if (disp->format_modifiers[mod] == DRM_FORMAT_MOD_INVALID)
-               return -EINVAL;
+       if (disp->format_modifiers[mod] == DRM_FORMAT_MOD_INVALID) {
+               for (mod = 0;
+                    (legacy_modifiers[mod] != DRM_FORMAT_MOD_INVALID) &&
+                    (legacy_modifiers[mod] != modifier);
+                    mod++);
+               if (legacy_modifiers[mod] == DRM_FORMAT_MOD_INVALID)
+                       return -EINVAL;
+       }
 
        nouveau_decode_mod(drm, modifier, tile_mode, kind);
 
index 3d11b84..d5c23d1 100644 (file)
@@ -315,7 +315,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
        struct drm_framebuffer *fb;
        struct nouveau_channel *chan;
        struct nouveau_bo *nvbo;
-       struct drm_mode_fb_cmd2 mode_cmd;
+       struct drm_mode_fb_cmd2 mode_cmd = {};
        int ret;
 
        mode_cmd.width = sizes->surface_width;
@@ -590,6 +590,7 @@ fini:
        drm_fb_helper_fini(&fbcon->helper);
 free:
        kfree(fbcon);
+       drm->fbcon = NULL;
        return ret;
 }
 
index dcf0824..dffcac2 100644 (file)
@@ -117,15 +117,6 @@ nvkm_outp_acquire_hda(struct nvkm_outp *outp, enum nvkm_ior_type type,
 {
        struct nvkm_ior *ior;
 
-       /* First preference is to reuse the OR that is currently armed
-        * on HW, if any, in order to prevent unnecessary switching.
-        */
-       list_for_each_entry(ior, &outp->disp->ior, head) {
-               if (!ior->identity && !!ior->func->hda.hpd == hda &&
-                   !ior->asy.outp && ior->arm.outp == outp)
-                       return nvkm_outp_acquire_ior(outp, user, ior);
-       }
-
        /* Failing that, a completely unused OR is the next best thing. */
        list_for_each_entry(ior, &outp->disp->ior, head) {
                if (!ior->identity && !!ior->func->hda.hpd == hda &&
@@ -173,6 +164,27 @@ nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda)
                return nvkm_outp_acquire_ior(outp, user, ior);
        }
 
+       /* First preference is to reuse the OR that is currently armed
+        * on HW, if any, in order to prevent unnecessary switching.
+        */
+       list_for_each_entry(ior, &outp->disp->ior, head) {
+               if (!ior->identity && !ior->asy.outp && ior->arm.outp == outp) {
+                       /*XXX: For various complicated reasons, we can't outright switch
+                        *     the boot-time OR on the first modeset without some fairly
+                        *     invasive changes.
+                        *
+                        *     The systems that were fixed by modifying the OR selection
+                        *     code to account for HDA support shouldn't regress here as
+                        *     the HDA-enabled ORs match the relevant output's pad macro
+                        *     index, and the firmware seems to select an OR this way.
+                        *
+                        *     This warning is to make it obvious if that proves wrong.
+                        */
+                       WARN_ON(hda && !ior->func->hda.hpd);
+                       return nvkm_outp_acquire_ior(outp, user, ior);
+               }
+       }
+
        /* If we don't need HDA, first try to acquire an OR that doesn't
         * support it to leave free the ones that do.
         */
index 46fe180..2649469 100644 (file)
@@ -615,9 +615,9 @@ static const struct panel_desc boe_tv101wum_nl6_desc = {
 static const struct drm_display_mode auo_kd101n80_45na_default_mode = {
        .clock = 157000,
        .hdisplay = 1200,
-       .hsync_start = 1200 + 80,
-       .hsync_end = 1200 + 80 + 24,
-       .htotal = 1200 + 80 + 24 + 36,
+       .hsync_start = 1200 + 60,
+       .hsync_end = 1200 + 60 + 24,
+       .htotal = 1200 + 60 + 24 + 56,
        .vdisplay = 1920,
        .vsync_start = 1920 + 16,
        .vsync_end = 1920 + 16 + 4,
index 5178f87..4aeb960 100644 (file)
@@ -1250,7 +1250,21 @@ static const struct panel_desc boe_nv133fhm_n61 = {
                .height = 165,
        },
        .delay = {
-               .hpd_absent_delay = 200,
+               /*
+                * When power is first given to the panel there's a short
+                * spike on the HPD line.  It was explained that this spike
+                * was until the TCON data download was complete.  On
+                * one system this was measured at 8 ms.  We'll put 15 ms
+                * in the prepare delay just to be safe and take it away
+                * from the hpd_absent_delay (which would otherwise be 200 ms)
+                * to handle this.  That means:
+                * - If HPD isn't hooked up you still have 200 ms delay.
+                * - If HPD is hooked up we won't try to look at it for the
+                *   first 15 ms.
+                */
+               .prepare = 15,
+               .hpd_absent_delay = 185,
+
                .unprepare = 500,
        },
        .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
index 267eac0..29f5fed 100644 (file)
@@ -41,10 +41,6 @@ MODULE_LICENSE("GPL");
 /* set maximum interval as 1 second */
 #define MAX_INTERVAL                   1000
 
-#define MSR_F15H_CU_PWR_ACCUMULATOR    0xc001007a
-#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR        0xc001007b
-#define MSR_F15H_PTSC                  0xc0010280
-
 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
 
 struct fam15h_power_data {
index 735bf31..88639e5 100644 (file)
@@ -866,17 +866,6 @@ config I2C_PNX
          This driver can also be built as a module.  If so, the module
          will be called i2c-pnx.
 
-config I2C_PUV3
-       tristate "PKUnity v3 I2C bus support"
-       depends on UNICORE32 && ARCH_PUV3
-       select I2C_ALGOBIT
-       help
-         This driver supports the I2C IP inside the PKUnity-v3 SoC.
-         This I2C bus controller is under AMBA/AXI bus.
-
-         This driver can also be built as a module.  If so, the module
-         will be called i2c-puv3.
-
 config I2C_PXA
        tristate "Intel PXA2XX I2C adapter"
        depends on ARCH_PXA || ARCH_MMP || ARCH_MVEBU || (X86_32 && PCI && OF) || COMPILE_TEST
index 306d5dc..19aff0e 100644 (file)
@@ -88,7 +88,6 @@ obj-$(CONFIG_I2C_PASEMI)      += i2c-pasemi.o
 obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
 obj-$(CONFIG_I2C_PMCMSP)       += i2c-pmcmsp.o
 obj-$(CONFIG_I2C_PNX)          += i2c-pnx.o
-obj-$(CONFIG_I2C_PUV3)         += i2c-puv3.o
 obj-$(CONFIG_I2C_PXA)          += i2c-pxa.o
 obj-$(CONFIG_I2C_PXA_PCI)      += i2c-pxa-pci.o
 obj-$(CONFIG_I2C_QCOM_CCI)     += i2c-qcom-cci.o
diff --git a/drivers/i2c/busses/i2c-puv3.c b/drivers/i2c/busses/i2c-puv3.c
deleted file mode 100644 (file)
index 5cec5a3..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * I2C driver for PKUnity-v3 SoC
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/i2c.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <mach/hardware.h>
-
-/*
- * Poll the i2c status register until the specified bit is set.
- * Returns 0 if timed out (100 msec).
- */
-static short poll_status(unsigned long bit)
-{
-       int loop_cntr = 1000;
-
-       if (bit & I2C_STATUS_TFNF) {
-               do {
-                       udelay(10);
-               } while (!(readl(I2C_STATUS) & bit) && (--loop_cntr > 0));
-       } else {
-               /* RXRDY handler */
-               do {
-                       if (readl(I2C_TAR) == I2C_TAR_EEPROM)
-                               msleep(20);
-                       else
-                               udelay(10);
-               } while (!(readl(I2C_RXFLR) & 0xf) && (--loop_cntr > 0));
-       }
-
-       return (loop_cntr > 0);
-}
-
-static int xfer_read(struct i2c_adapter *adap, unsigned char *buf, int length)
-{
-       int i2c_reg = *buf;
-
-       /* Read data */
-       while (length--) {
-               if (!poll_status(I2C_STATUS_TFNF)) {
-                       dev_dbg(&adap->dev, "Tx FIFO Not Full timeout\n");
-                       return -ETIMEDOUT;
-               }
-
-               /* send addr */
-               writel(i2c_reg | I2C_DATACMD_WRITE, I2C_DATACMD);
-
-               /* get ready to next write */
-               i2c_reg++;
-
-               /* send read CMD */
-               writel(I2C_DATACMD_READ, I2C_DATACMD);
-
-               /* wait until the Rx FIFO have available */
-               if (!poll_status(I2C_STATUS_RFNE)) {
-                       dev_dbg(&adap->dev, "RXRDY timeout\n");
-                       return -ETIMEDOUT;
-               }
-
-               /* read the data to buf */
-               *buf = (readl(I2C_DATACMD) & I2C_DATACMD_DAT_MASK);
-               buf++;
-       }
-
-       return 0;
-}
-
-static int xfer_write(struct i2c_adapter *adap, unsigned char *buf, int length)
-{
-       int i2c_reg = *buf;
-
-       /* Do nothing but storing the reg_num to a static variable */
-       if (i2c_reg == -1) {
-               printk(KERN_WARNING "Error i2c reg\n");
-               return -ETIMEDOUT;
-       }
-
-       if (length == 1)
-               return 0;
-
-       buf++;
-       length--;
-       while (length--) {
-               /* send addr */
-               writel(i2c_reg | I2C_DATACMD_WRITE, I2C_DATACMD);
-
-               /* send write CMD */
-               writel(*buf | I2C_DATACMD_WRITE, I2C_DATACMD);
-
-               /* wait until the Rx FIFO have available */
-               msleep(20);
-
-               /* read the data to buf */
-               i2c_reg++;
-               buf++;
-       }
-
-       return 0;
-}
-
-/*
- * Generic i2c master transfer entrypoint.
- *
- */
-static int puv3_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *pmsg,
-               int num)
-{
-       int i, ret;
-       unsigned char swap;
-
-       /* Disable i2c */
-       writel(I2C_ENABLE_DISABLE, I2C_ENABLE);
-
-       /* Set the work mode and speed*/
-       writel(I2C_CON_MASTER | I2C_CON_SPEED_STD | I2C_CON_SLAVEDISABLE, I2C_CON);
-
-       writel(pmsg->addr, I2C_TAR);
-
-       /* Enable i2c */
-       writel(I2C_ENABLE_ENABLE, I2C_ENABLE);
-
-       dev_dbg(&adap->dev, "puv3_i2c_xfer: processing %d messages:\n", num);
-
-       for (i = 0; i < num; i++) {
-               dev_dbg(&adap->dev, " #%d: %sing %d byte%s %s 0x%02x\n", i,
-                       pmsg->flags & I2C_M_RD ? "read" : "writ",
-                       pmsg->len, pmsg->len > 1 ? "s" : "",
-                       pmsg->flags & I2C_M_RD ? "from" : "to", pmsg->addr);
-
-               if (pmsg->len && pmsg->buf) {   /* sanity check */
-                       if (pmsg->flags & I2C_M_RD)
-                               ret = xfer_read(adap, pmsg->buf, pmsg->len);
-                       else
-                               ret = xfer_write(adap, pmsg->buf, pmsg->len);
-
-                       if (ret)
-                               return ret;
-
-               }
-               dev_dbg(&adap->dev, "transfer complete\n");
-               pmsg++;         /* next message */
-       }
-
-       /* XXX: fixup be16_to_cpu in bq27x00_battery.c */
-       if (pmsg->addr == I2C_TAR_PWIC) {
-               swap = pmsg->buf[0];
-               pmsg->buf[0] = pmsg->buf[1];
-               pmsg->buf[1] = swap;
-       }
-
-       return i;
-}
-
-/*
- * Return list of supported functionality.
- */
-static u32 puv3_i2c_func(struct i2c_adapter *adapter)
-{
-       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static const struct i2c_algorithm puv3_i2c_algorithm = {
-       .master_xfer    = puv3_i2c_xfer,
-       .functionality  = puv3_i2c_func,
-};
-
-/*
- * Main initialization routine.
- */
-static int puv3_i2c_probe(struct platform_device *pdev)
-{
-       struct i2c_adapter *adapter;
-       struct resource *mem;
-       int rc;
-
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!mem)
-               return -ENODEV;
-
-       if (!request_mem_region(mem->start, resource_size(mem), "puv3_i2c"))
-               return -EBUSY;
-
-       adapter = kzalloc(sizeof(struct i2c_adapter), GFP_KERNEL);
-       if (adapter == NULL) {
-               dev_err(&pdev->dev, "can't allocate interface!\n");
-               rc = -ENOMEM;
-               goto fail_nomem;
-       }
-       snprintf(adapter->name, sizeof(adapter->name), "PUV3-I2C at 0x%08x",
-                       mem->start);
-       adapter->algo = &puv3_i2c_algorithm;
-       adapter->class = I2C_CLASS_HWMON;
-       adapter->dev.parent = &pdev->dev;
-
-       platform_set_drvdata(pdev, adapter);
-
-       adapter->nr = pdev->id;
-       rc = i2c_add_numbered_adapter(adapter);
-       if (rc)
-               goto fail_add_adapter;
-
-       dev_info(&pdev->dev, "PKUnity v3 i2c bus adapter.\n");
-       return 0;
-
-fail_add_adapter:
-       kfree(adapter);
-fail_nomem:
-       release_mem_region(mem->start, resource_size(mem));
-
-       return rc;
-}
-
-static int puv3_i2c_remove(struct platform_device *pdev)
-{
-       struct i2c_adapter *adapter = platform_get_drvdata(pdev);
-       struct resource *mem;
-
-       i2c_del_adapter(adapter);
-
-       put_device(&pdev->dev);
-
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       release_mem_region(mem->start, resource_size(mem));
-
-       return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int puv3_i2c_suspend(struct device *dev)
-{
-       int poll_count;
-       /* Disable the IIC */
-       writel(I2C_ENABLE_DISABLE, I2C_ENABLE);
-       for (poll_count = 0; poll_count < 50; poll_count++) {
-               if (readl(I2C_ENSTATUS) & I2C_ENSTATUS_ENABLE)
-                       udelay(25);
-       }
-
-       return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(puv3_i2c_pm, puv3_i2c_suspend, NULL);
-#define PUV3_I2C_PM    (&puv3_i2c_pm)
-
-#else
-#define PUV3_I2C_PM    NULL
-#endif
-
-static struct platform_driver puv3_i2c_driver = {
-       .probe          = puv3_i2c_probe,
-       .remove         = puv3_i2c_remove,
-       .driver         = {
-               .name   = "PKUnity-v3-I2C",
-               .pm     = PUV3_I2C_PM,
-       }
-};
-
-module_platform_driver(puv3_i2c_driver);
-
-MODULE_DESCRIPTION("PKUnity v3 I2C driver");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:puv3_i2c");
index 5427f04..1589179 100644 (file)
@@ -18,10 +18,8 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 {
        int ret;
 
-       if (!client || !slave_cb) {
-               WARN(1, "insufficient data\n");
+       if (WARN(IS_ERR_OR_NULL(client) || !slave_cb, "insufficient data\n"))
                return -EINVAL;
-       }
 
        if (!(client->flags & I2C_CLIENT_SLAVE))
                dev_warn(&client->dev, "%s: client slave flag not set. You might see address collisions\n",
@@ -60,6 +58,9 @@ int i2c_slave_unregister(struct i2c_client *client)
 {
        int ret;
 
+       if (IS_ERR_OR_NULL(client))
+               return -EINVAL;
+
        if (!client->adapter->algo->unreg_slave) {
                dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
                return -EOPNOTSUPP;
index 655795b..513825e 100644 (file)
@@ -72,6 +72,15 @@ static void rdma_dim_init(struct ib_cq *cq)
        INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
 }
 
+static void rdma_dim_destroy(struct ib_cq *cq)
+{
+       if (!cq->dim)
+               return;
+
+       cancel_work_sync(&cq->dim->work);
+       kfree(cq->dim);
+}
+
 static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
 {
        int rc;
@@ -266,6 +275,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
        return cq;
 
 out_destroy_cq:
+       rdma_dim_destroy(cq);
        rdma_restrack_del(&cq->res);
        cq->device->ops.destroy_cq(cq, udata);
 out_free_wc:
@@ -331,12 +341,10 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
                WARN_ON_ONCE(1);
        }
 
+       rdma_dim_destroy(cq);
        trace_cq_free(cq);
        rdma_restrack_del(&cq->res);
        cq->device->ops.destroy_cq(cq, udata);
-       if (cq->dim)
-               cancel_work_sync(&cq->dim->work);
-       kfree(cq->dim);
        kfree(cq->wc);
        kfree(cq);
 }
index 5b87eee..d03daca 100644 (file)
@@ -1084,6 +1084,8 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
        size_t in_size;
        int ret;
 
+       if (in_len < offsetofend(typeof(cmd), reserved))
+               return -EINVAL;
        in_size = min_t(size_t, in_len, sizeof(cmd));
        if (copy_from_user(&cmd, inbuf, in_size))
                return -EFAULT;
@@ -1141,6 +1143,8 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
        size_t in_size;
        int ret;
 
+       if (in_len < offsetofend(typeof(cmd), reserved))
+               return -EINVAL;
        in_size = min_t(size_t, in_len, sizeof(cmd));
        if (copy_from_user(&cmd, inbuf, in_size))
                return -EFAULT;
index 1ab676b..77dca1e 100644 (file)
@@ -1797,9 +1797,7 @@ static bool init_prefetch_work(struct ib_pd *pd,
                work->frags[i].mr =
                        get_prefetchable_mr(pd, advice, sg_list[i].lkey);
                if (!work->frags[i].mr) {
-                       work->num_sge = i - 1;
-                       if (i)
-                               destroy_prefetch_work(work);
+                       work->num_sge = i;
                        return false;
                }
 
@@ -1865,6 +1863,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
        srcu_key = srcu_read_lock(&dev->odp_srcu);
        if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
                srcu_read_unlock(&dev->odp_srcu, srcu_key);
+               destroy_prefetch_work(work);
                return -EINVAL;
        }
        queue_work(system_unbound_wq, &work->work);
index e050ead..1225b8d 100644 (file)
@@ -1766,15 +1766,14 @@ err:
 }
 
 static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+                                        struct mlx5_ib_qp *qp,
                                         struct ib_qp_init_attr *init_attr,
-                                        struct mlx5_ib_create_qp *ucmd,
                                         void *qpc)
 {
        int scqe_sz;
        bool allow_scat_cqe = false;
 
-       if (ucmd)
-               allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
+       allow_scat_cqe = qp->flags_en & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
 
        if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
                return;
@@ -1853,8 +1852,6 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        u32 *in;
        int err;
 
-       mutex_init(&qp->mutex);
-
        if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
@@ -1938,7 +1935,6 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        u32 *in;
        int err;
 
-       mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
        spin_lock_init(&qp->rq.lock);
 
@@ -2012,7 +2008,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        }
        if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
            (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
-               configure_requester_scat_cqe(dev, init_attr, ucmd, qpc);
+               configure_requester_scat_cqe(dev, qp, init_attr, qpc);
 
        if (qp->rq.wqe_cnt) {
                MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
@@ -2129,7 +2125,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        u32 *in;
        int err;
 
-       mutex_init(&qp->mutex);
        spin_lock_init(&qp->sq.lock);
        spin_lock_init(&qp->rq.lock);
 
@@ -2543,13 +2538,18 @@ static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
                return;
        }
 
-       if (flag == MLX5_QP_FLAG_SCATTER_CQE) {
+       switch (flag) {
+       case MLX5_QP_FLAG_SCATTER_CQE:
+       case MLX5_QP_FLAG_ALLOW_SCATTER_CQE:
                /*
-                * We don't return error if this flag was provided,
-                * and mlx5 doesn't have right capability.
-                */
-               *flags &= ~MLX5_QP_FLAG_SCATTER_CQE;
+                        * We don't return error if these flags were provided,
+                        * and mlx5 doesn't have right capability.
+                        */
+               *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE |
+                           MLX5_QP_FLAG_ALLOW_SCATTER_CQE);
                return;
+       default:
+               break;
        }
        mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
 }
@@ -2589,6 +2589,8 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
                            MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+       process_vendor_flag(dev, &flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE,
+                           MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
 
        if (qp->type == IB_QPT_RAW_PACKET) {
                cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
@@ -2963,6 +2965,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
                goto free_ucmd;
        }
 
+       mutex_init(&qp->mutex);
        qp->type = type;
        if (udata) {
                err = process_vendor_flags(dev, qp, params.ucmd, attr);
index 7db35dd..332a8ba 100644 (file)
@@ -901,8 +901,6 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
        qp->s_tail_ack_queue = 0;
        qp->s_acked_ack_queue = 0;
        qp->s_num_rd_atomic = 0;
-       if (qp->r_rq.kwq)
-               qp->r_rq.kwq->count = qp->r_rq.size;
        qp->r_sge.num_sge = 0;
        atomic_set(&qp->s_reserved_used, 0);
 }
@@ -2366,31 +2364,6 @@ bad_lkey:
        return 0;
 }
 
-/**
- * get_count - count numbers of request work queue entries
- * in circular buffer
- * @rq: data structure for request queue entry
- * @tail: tail indices of the circular buffer
- * @head: head indices of the circular buffer
- *
- * Return - total number of entries in the circular buffer
- */
-static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head)
-{
-       u32 count;
-
-       count = head;
-
-       if (count >= rq->size)
-               count = 0;
-       if (count < tail)
-               count += rq->size - tail;
-       else
-               count -= tail;
-
-       return count;
-}
-
 /**
  * get_rvt_head - get head indices of the circular buffer
  * @rq: data structure for request queue entry
@@ -2465,7 +2438,7 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
 
        if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) {
                head = get_rvt_head(rq, ip);
-               kwq->count = get_count(rq, tail, head);
+               kwq->count = rvt_get_rq_count(rq, head, tail);
        }
        if (unlikely(kwq->count == 0)) {
                ret = 0;
@@ -2500,7 +2473,9 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
                 * the number of remaining WQEs.
                 */
                if (kwq->count < srq->limit) {
-                       kwq->count = get_count(rq, tail, get_rvt_head(rq, ip));
+                       kwq->count =
+                               rvt_get_rq_count(rq,
+                                                get_rvt_head(rq, ip), tail);
                        if (kwq->count < srq->limit) {
                                struct ib_event ev;
 
index 977906c..c58735f 100644 (file)
@@ -127,9 +127,7 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp)
                         * not atomic, which is OK, since the fuzziness is
                         * resolved as further ACKs go out.
                         */
-                       credits = head - tail;
-                       if ((int)credits < 0)
-                               credits += qp->r_rq.size;
+                       credits = rvt_get_rq_count(&qp->r_rq, head, tail);
                }
                /*
                 * Binary search the credit table to find the code to
diff --git a/drivers/input/serio/i8042-unicore32io.h b/drivers/input/serio/i8042-unicore32io.h
deleted file mode 100644 (file)
index 50bb3ed..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2011 Guan Xuetao
- */
-#ifndef _I8042_UNICORE32_H
-#define _I8042_UNICORE32_H
-
-#include <mach/hardware.h>
-
-/*
- * Names.
- */
-#define I8042_KBD_PHYS_DESC "isa0060/serio0"
-#define I8042_AUX_PHYS_DESC "isa0060/serio1"
-#define I8042_MUX_PHYS_DESC "isa0060/serio%d"
-
-/*
- * IRQs.
- */
-#define I8042_KBD_IRQ           IRQ_PS2_KBD
-#define I8042_AUX_IRQ           IRQ_PS2_AUX
-
-/*
- * Register numbers.
- */
-#define I8042_COMMAND_REG      PS2_COMMAND
-#define I8042_STATUS_REG       PS2_STATUS
-#define I8042_DATA_REG         PS2_DATA
-
-#define I8042_REGION_START     (resource_size_t)(PS2_DATA)
-#define I8042_REGION_SIZE      (resource_size_t)(16)
-
-static inline int i8042_read_data(void)
-{
-       return readb(I8042_DATA_REG);
-}
-
-static inline int i8042_read_status(void)
-{
-       return readb(I8042_STATUS_REG);
-}
-
-static inline void i8042_write_data(int val)
-{
-       writeb(val, I8042_DATA_REG);
-}
-
-static inline void i8042_write_command(int val)
-{
-       writeb(val, I8042_COMMAND_REG);
-}
-
-static inline int i8042_platform_init(void)
-{
-       if (!request_mem_region(I8042_REGION_START, I8042_REGION_SIZE, "i8042"))
-               return -EBUSY;
-
-       i8042_reset = I8042_RESET_ALWAYS;
-       return 0;
-}
-
-static inline void i8042_platform_exit(void)
-{
-       release_mem_region(I8042_REGION_START, I8042_REGION_SIZE);
-}
-
-#endif /* _I8042_UNICORE32_H */
index eb37670..5538178 100644 (file)
@@ -21,8 +21,6 @@
 #include "i8042-sparcio.h"
 #elif defined(CONFIG_X86) || defined(CONFIG_IA64)
 #include "i8042-x86ia64io.h"
-#elif defined(CONFIG_UNICORE32)
-#include "i8042-unicore32io.h"
 #else
 #include "i8042-io.h"
 #endif
index 9564d23..aa096b3 100644 (file)
@@ -628,13 +628,21 @@ out_free_table:
 
 static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
 {
+       struct fwnode_handle *fn;
+
        if (iommu && iommu->ir_table) {
                if (iommu->ir_msi_domain) {
+                       fn = iommu->ir_msi_domain->fwnode;
+
                        irq_domain_remove(iommu->ir_msi_domain);
+                       irq_domain_free_fwnode(fn);
                        iommu->ir_msi_domain = NULL;
                }
                if (iommu->ir_domain) {
+                       fn = iommu->ir_domain->fwnode;
+
                        irq_domain_remove(iommu->ir_domain);
+                       irq_domain_free_fwnode(fn);
                        iommu->ir_domain = NULL;
                }
                free_pages((unsigned long)iommu->ir_table->base,
index 20738aa..e505b91 100644 (file)
@@ -118,46 +118,66 @@ static int of_iommu_xlate(struct device *dev,
        return ret;
 }
 
-struct of_pci_iommu_alias_info {
-       struct device *dev;
-       struct device_node *np;
-};
-
-static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
+static int of_iommu_configure_dev_id(struct device_node *master_np,
+                                    struct device *dev,
+                                    const u32 *id)
 {
-       struct of_pci_iommu_alias_info *info = data;
        struct of_phandle_args iommu_spec = { .args_count = 1 };
        int err;
 
-       err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
-                        &iommu_spec.np, iommu_spec.args);
+       err = of_map_id(master_np, *id, "iommu-map",
+                        "iommu-map-mask", &iommu_spec.np,
+                        iommu_spec.args);
        if (err)
                return err == -ENODEV ? NO_IOMMU : err;
 
-       err = of_iommu_xlate(info->dev, &iommu_spec);
+       err = of_iommu_xlate(dev, &iommu_spec);
        of_node_put(iommu_spec.np);
        return err;
 }
 
-static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev,
-                               struct device_node *master_np)
+static int of_iommu_configure_dev(struct device_node *master_np,
+                                 struct device *dev)
 {
-       struct of_phandle_args iommu_spec = { .args_count = 1 };
-       int err;
-
-       err = of_map_rid(master_np, mc_dev->icid, "iommu-map",
-                        "iommu-map-mask", &iommu_spec.np,
-                        iommu_spec.args);
-       if (err)
-               return err == -ENODEV ? NO_IOMMU : err;
+       struct of_phandle_args iommu_spec;
+       int err = NO_IOMMU, idx = 0;
+
+       while (!of_parse_phandle_with_args(master_np, "iommus",
+                                          "#iommu-cells",
+                                          idx, &iommu_spec)) {
+               err = of_iommu_xlate(dev, &iommu_spec);
+               of_node_put(iommu_spec.np);
+               idx++;
+               if (err)
+                       break;
+       }
 
-       err = of_iommu_xlate(&mc_dev->dev, &iommu_spec);
-       of_node_put(iommu_spec.np);
        return err;
 }
 
+struct of_pci_iommu_alias_info {
+       struct device *dev;
+       struct device_node *np;
+};
+
+static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
+{
+       struct of_pci_iommu_alias_info *info = data;
+       u32 input_id = alias;
+
+       return of_iommu_configure_dev_id(info->np, info->dev, &input_id);
+}
+
+static int of_iommu_configure_device(struct device_node *master_np,
+                                    struct device *dev, const u32 *id)
+{
+       return (id) ? of_iommu_configure_dev_id(master_np, dev, id) :
+                     of_iommu_configure_dev(master_np, dev);
+}
+
 const struct iommu_ops *of_iommu_configure(struct device *dev,
-                                          struct device_node *master_np)
+                                          struct device_node *master_np,
+                                          const u32 *id)
 {
        const struct iommu_ops *ops = NULL;
        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -188,21 +208,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
                pci_request_acs();
                err = pci_for_each_dma_alias(to_pci_dev(dev),
                                             of_pci_iommu_init, &info);
-       } else if (dev_is_fsl_mc(dev)) {
-               err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np);
        } else {
-               struct of_phandle_args iommu_spec;
-               int idx = 0;
-
-               while (!of_parse_phandle_with_args(master_np, "iommus",
-                                                  "#iommu-cells",
-                                                  idx, &iommu_spec)) {
-                       err = of_iommu_xlate(dev, &iommu_spec);
-                       of_node_put(iommu_spec.np);
-                       idx++;
-                       if (err)
-                               break;
-               }
+               err = of_iommu_configure_device(master_np, dev, id);
 
                fwspec = dev_iommu_fwspec_get(dev);
                if (!err && fwspec)
index 606efa6..634263d 100644 (file)
@@ -7,6 +7,8 @@
  *
  */
 
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/irq.h>
@@ -23,6 +25,19 @@ static struct irq_chip its_msi_irq_chip = {
        .irq_set_affinity = msi_domain_set_affinity
 };
 
+static u32 fsl_mc_msi_domain_get_msi_id(struct irq_domain *domain,
+                                       struct fsl_mc_device *mc_dev)
+{
+       struct device_node *of_node;
+       u32 out_id;
+
+       of_node = irq_domain_get_of_node(domain);
+       out_id = of_node ? of_msi_map_id(&mc_dev->dev, of_node, mc_dev->icid) :
+                       iort_msi_map_id(&mc_dev->dev, mc_dev->icid);
+
+       return out_id;
+}
+
 static int its_fsl_mc_msi_prepare(struct irq_domain *msi_domain,
                                  struct device *dev,
                                  int nvec, msi_alloc_info_t *info)
@@ -43,7 +58,8 @@ static int its_fsl_mc_msi_prepare(struct irq_domain *msi_domain,
         * NOTE: This device id corresponds to the IOMMU stream ID
         * associated with the DPRC object (ICID).
         */
-       info->scratchpad[0].ul = mc_bus_dev->icid;
+       info->scratchpad[0].ul = fsl_mc_msi_domain_get_msi_id(msi_domain,
+                                                             mc_bus_dev);
        msi_info = msi_get_domain_info(msi_domain->parent);
 
        /* Allocate at least 32 MSIs, and always as a power of 2 */
@@ -66,12 +82,71 @@ static const struct of_device_id its_device_id[] = {
        {},
 };
 
-static int __init its_fsl_mc_msi_init(void)
+static void __init its_fsl_mc_msi_init_one(struct fwnode_handle *handle,
+                                         const char *name)
 {
-       struct device_node *np;
        struct irq_domain *parent;
        struct irq_domain *mc_msi_domain;
 
+       parent = irq_find_matching_fwnode(handle, DOMAIN_BUS_NEXUS);
+       if (!parent || !msi_get_domain_info(parent)) {
+               pr_err("%s: unable to locate ITS domain\n", name);
+               return;
+       }
+
+       mc_msi_domain = fsl_mc_msi_create_irq_domain(handle,
+                                               &its_fsl_mc_msi_domain_info,
+                                               parent);
+       if (!mc_msi_domain) {
+               pr_err("%s: unable to create fsl-mc domain\n", name);
+               return;
+       }
+
+       pr_info("fsl-mc MSI: %s domain created\n", name);
+}
+
+#ifdef CONFIG_ACPI
+static int __init
+its_fsl_mc_msi_parse_madt(union acpi_subtable_headers *header,
+                         const unsigned long end)
+{
+       struct acpi_madt_generic_translator *its_entry;
+       struct fwnode_handle *dom_handle;
+       const char *node_name;
+       int err = 0;
+
+       its_entry = (struct acpi_madt_generic_translator *)header;
+       node_name = kasprintf(GFP_KERNEL, "ITS@0x%lx",
+                             (long)its_entry->base_address);
+
+       dom_handle = iort_find_domain_token(its_entry->translation_id);
+       if (!dom_handle) {
+               pr_err("%s: Unable to locate ITS domain handle\n", node_name);
+               err = -ENXIO;
+               goto out;
+       }
+
+       its_fsl_mc_msi_init_one(dom_handle, node_name);
+
+out:
+       kfree(node_name);
+       return err;
+}
+
+
+static void __init its_fsl_mc_acpi_msi_init(void)
+{
+       acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+                             its_fsl_mc_msi_parse_madt, 0);
+}
+#else
+static inline void its_fsl_mc_acpi_msi_init(void) { }
+#endif
+
+static void __init its_fsl_mc_of_msi_init(void)
+{
+       struct device_node *np;
+
        for (np = of_find_matching_node(NULL, its_device_id); np;
             np = of_find_matching_node(np, its_device_id)) {
                if (!of_device_is_available(np))
@@ -79,23 +154,15 @@ static int __init its_fsl_mc_msi_init(void)
                if (!of_property_read_bool(np, "msi-controller"))
                        continue;
 
-               parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
-               if (!parent || !msi_get_domain_info(parent)) {
-                       pr_err("%pOF: unable to locate ITS domain\n", np);
-                       continue;
-               }
-
-               mc_msi_domain = fsl_mc_msi_create_irq_domain(
-                                                of_node_to_fwnode(np),
-                                                &its_fsl_mc_msi_domain_info,
-                                                parent);
-               if (!mc_msi_domain) {
-                       pr_err("%pOF: unable to create fsl-mc domain\n", np);
-                       continue;
-               }
-
-               pr_info("fsl-mc MSI: %pOF domain created\n", np);
+               its_fsl_mc_msi_init_one(of_node_to_fwnode(np),
+                                       np->full_name);
        }
+}
+
+static int __init its_fsl_mc_msi_init(void)
+{
+       its_fsl_mc_of_msi_init();
+       its_fsl_mc_acpi_msi_init();
 
        return 0;
 }
index beac4ca..103d850 100644 (file)
@@ -3523,6 +3523,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
        msi_alloc_info_t *info = args;
        struct its_device *its_dev = info->scratchpad[0].ptr;
        struct its_node *its = its_dev->its;
+       struct irq_data *irqd;
        irq_hw_number_t hwirq;
        int err;
        int i;
@@ -3542,7 +3543,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 
                irq_domain_set_hwirq_and_chip(domain, virq + i,
                                              hwirq + i, &its_irq_chip, its_dev);
-               irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i)));
+               irqd = irq_get_irq_data(virq + i);
+               irqd_set_single_target(irqd);
+               irqd_set_affinity_on_activate(irqd);
                pr_debug("ID:%d pID:%d vID:%d\n",
                         (int)(hwirq + i - its_dev->event_map.lpi_base),
                         (int)(hwirq + i), virq + i);
index db38a68..fe78bf0 100644 (file)
@@ -236,10 +236,6 @@ err_dev:
        return tgt_dev;
 }
 
-static const struct block_device_operations nvm_fops = {
-       .owner          = THIS_MODULE,
-};
-
 static struct nvm_tgt_type *__nvm_find_target_type(const char *name)
 {
        struct nvm_tgt_type *tt;
@@ -380,7 +376,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
                goto err_dev;
        }
 
-       tqueue = blk_alloc_queue(tt->make_rq, dev->q->node);
+       tqueue = blk_alloc_queue(dev->q->node);
        if (!tqueue) {
                ret = -ENOMEM;
                goto err_disk;
@@ -390,7 +386,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
        tdisk->flags = GENHD_FL_EXT_DEVT;
        tdisk->major = 0;
        tdisk->first_minor = 0;
-       tdisk->fops = &nvm_fops;
+       tdisk->fops = tt->bops;
        tdisk->queue = tqueue;
 
        targetdata = tt->init(tgt_dev, tdisk, create->flags);
index 6e677ff..b6246f7 100644 (file)
@@ -47,9 +47,9 @@ static struct pblk_global_caches pblk_caches = {
 
 struct bio_set pblk_bio_set;
 
-static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
+static blk_qc_t pblk_submit_bio(struct bio *bio)
 {
-       struct pblk *pblk = q->queuedata;
+       struct pblk *pblk = bio->bi_disk->queue->queuedata;
 
        if (bio_op(bio) == REQ_OP_DISCARD) {
                pblk_discard(pblk, bio);
@@ -63,7 +63,7 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
         * constraint. Writes can be of arbitrary size.
         */
        if (bio_data_dir(bio) == READ) {
-               blk_queue_split(q, &bio);
+               blk_queue_split(&bio);
                pblk_submit_read(pblk, bio);
        } else {
                /* Prevent deadlock in the case of a modest LUN configuration
@@ -71,7 +71,7 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
                 * leaves at least 256KB available for user I/O.
                 */
                if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
-                       blk_queue_split(q, &bio);
+                       blk_queue_split(&bio);
 
                pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
        }
@@ -79,6 +79,12 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
        return BLK_QC_T_NONE;
 }
 
+static const struct block_device_operations pblk_bops = {
+       .owner          = THIS_MODULE,
+       .submit_bio     = pblk_submit_bio,
+};
+
+
 static size_t pblk_trans_map_size(struct pblk *pblk)
 {
        int entry_size = 8;
@@ -1280,7 +1286,7 @@ static struct nvm_tgt_type tt_pblk = {
        .name           = "pblk",
        .version        = {1, 0, 0},
 
-       .make_rq        = pblk_make_rq,
+       .bops           = &pblk_bops,
        .capacity       = pblk_capacity,
 
        .init           = pblk_init,
index 140927e..c28537a 100644 (file)
@@ -320,7 +320,7 @@ split_retry:
                split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
                                        &pblk_bio_set);
                bio_chain(split_bio, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 
                /* New bio contains first N sectors of the previous one, so
                 * we can continue to use existing rqd, but we need to shrink
index 221e019..3c708e8 100644 (file)
@@ -929,7 +929,7 @@ static inline void closure_bio_submit(struct cache_set *c,
                bio_endio(bio);
                return;
        }
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
 }
 
 /*
index 6548a60..d5c51e3 100644 (file)
@@ -959,7 +959,7 @@ err:
  * bch_btree_node_get - find a btree node in the cache and lock it, reading it
  * in from disk if necessary.
  *
- * If IO is necessary and running under generic_make_request, returns -EAGAIN.
+ * If IO is necessary and running under submit_bio_noacct, returns -EAGAIN.
  *
  * The btree node will have either a read or a write lock held, depending on
  * level and op->lock.
index 7acf024..a190bf4 100644 (file)
@@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
            !blk_queue_discard(bdev_get_queue(dc->bdev)))
                bio->bi_end_io(bio);
        else
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 }
 
 static void quit_max_writeback_rate(struct cache_set *c,
@@ -1158,7 +1158,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
 
 /* Cached devices - read & write stuff */
 
-blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t cached_dev_submit_bio(struct bio *bio)
 {
        struct search *s;
        struct bcache_device *d = bio->bi_disk->private_data;
@@ -1197,7 +1197,7 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
                if (!bio->bi_iter.bi_size) {
                        /*
                         * can't call bch_journal_meta from under
-                        * generic_make_request
+                        * submit_bio_noacct
                         */
                        continue_at_nobarrier(&s->cl,
                                              cached_dev_nodata,
@@ -1228,36 +1228,8 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
        return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
 }
 
-static int cached_dev_congested(void *data, int bits)
-{
-       struct bcache_device *d = data;
-       struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-       struct request_queue *q = bdev_get_queue(dc->bdev);
-       int ret = 0;
-
-       if (bdi_congested(q->backing_dev_info, bits))
-               return 1;
-
-       if (cached_dev_get(dc)) {
-               unsigned int i;
-               struct cache *ca;
-
-               for_each_cache(ca, d->c, i) {
-                       q = bdev_get_queue(ca->bdev);
-                       ret |= bdi_congested(q->backing_dev_info, bits);
-               }
-
-               cached_dev_put(dc);
-       }
-
-       return ret;
-}
-
 void bch_cached_dev_request_init(struct cached_dev *dc)
 {
-       struct gendisk *g = dc->disk.disk;
-
-       g->queue->backing_dev_info->congested_fn = cached_dev_congested;
        dc->disk.cache_miss                     = cached_dev_cache_miss;
        dc->disk.ioctl                          = cached_dev_ioctl;
 }
@@ -1291,7 +1263,7 @@ static void flash_dev_nodata(struct closure *cl)
        continue_at(cl, search_free, NULL);
 }
 
-blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t flash_dev_submit_bio(struct bio *bio)
 {
        struct search *s;
        struct closure *cl;
@@ -1311,8 +1283,7 @@ blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
 
        if (!bio->bi_iter.bi_size) {
                /*
-                * can't call bch_journal_meta from under
-                * generic_make_request
+                * can't call bch_journal_meta from under submit_bio_noacct
                 */
                continue_at_nobarrier(&s->cl,
                                      flash_dev_nodata,
@@ -1342,27 +1313,8 @@ static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
        return -ENOTTY;
 }
 
-static int flash_dev_congested(void *data, int bits)
-{
-       struct bcache_device *d = data;
-       struct request_queue *q;
-       struct cache *ca;
-       unsigned int i;
-       int ret = 0;
-
-       for_each_cache(ca, d->c, i) {
-               q = bdev_get_queue(ca->bdev);
-               ret |= bdi_congested(q->backing_dev_info, bits);
-       }
-
-       return ret;
-}
-
 void bch_flash_dev_request_init(struct bcache_device *d)
 {
-       struct gendisk *g = d->disk;
-
-       g->queue->backing_dev_info->congested_fn = flash_dev_congested;
        d->cache_miss                           = flash_dev_cache_miss;
        d->ioctl                                = flash_dev_ioctl;
 }
index bb005c9..82b3836 100644 (file)
@@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
 void bch_data_insert(struct closure *cl);
 
 void bch_cached_dev_request_init(struct cached_dev *dc);
-blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio);
+blk_qc_t cached_dev_submit_bio(struct bio *bio);
 
 void bch_flash_dev_request_init(struct bcache_device *d);
-blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio);
+blk_qc_t flash_dev_submit_bio(struct bio *bio);
 
 extern struct kmem_cache *bch_search_cache;
 
index 2014016..9e45faa 100644 (file)
@@ -680,7 +680,16 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
        return d->ioctl(d, mode, cmd, arg);
 }
 
-static const struct block_device_operations bcache_ops = {
+static const struct block_device_operations bcache_cached_ops = {
+       .submit_bio     = cached_dev_submit_bio,
+       .open           = open_dev,
+       .release        = release_dev,
+       .ioctl          = ioctl_dev,
+       .owner          = THIS_MODULE,
+};
+
+static const struct block_device_operations bcache_flash_ops = {
+       .submit_bio     = flash_dev_submit_bio,
        .open           = open_dev,
        .release        = release_dev,
        .ioctl          = ioctl_dev,
@@ -820,8 +829,8 @@ static void bcache_device_free(struct bcache_device *d)
 }
 
 static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
-                             sector_t sectors, make_request_fn make_request_fn,
-                             struct block_device *cached_bdev)
+               sector_t sectors, struct block_device *cached_bdev,
+               const struct block_device_operations *ops)
 {
        struct request_queue *q;
        const size_t max_stripes = min_t(size_t, INT_MAX,
@@ -868,16 +877,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 
        d->disk->major          = bcache_major;
        d->disk->first_minor    = idx_to_first_minor(idx);
-       d->disk->fops           = &bcache_ops;
+       d->disk->fops           = ops;
        d->disk->private_data   = d;
 
-       q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE);
+       q = blk_alloc_queue(NUMA_NO_NODE);
        if (!q)
                return -ENOMEM;
 
        d->disk->queue                  = q;
-       q->queuedata                    = d;
-       q->backing_dev_info->congested_data = d;
        q->limits.max_hw_sectors        = UINT_MAX;
        q->limits.max_sectors           = UINT_MAX;
        q->limits.max_segment_size      = UINT_MAX;
@@ -1356,7 +1363,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
 
        ret = bcache_device_init(&dc->disk, block_size,
                         dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
-                        cached_dev_make_request, dc->bdev);
+                        dc->bdev, &bcache_cached_ops);
        if (ret)
                return ret;
 
@@ -1469,7 +1476,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
        kobject_init(&d->kobj, &bch_flash_dev_ktype);
 
        if (bcache_device_init(d, block_bytes(c), u->sectors,
-                       flash_dev_make_request, NULL))
+                       NULL, &bcache_flash_ops))
                goto err;
 
        bcache_device_attach(d, c, u - c->uuids);
index d3bb355..96c9380 100644 (file)
@@ -421,8 +421,6 @@ struct cache {
 
        struct rw_semaphore quiesce_lock;
 
-       struct dm_target_callbacks callbacks;
-
        /*
         * origin_blocks entries, discarded if set.
         */
@@ -886,7 +884,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
 static void accounted_request(struct cache *cache, struct bio *bio)
 {
        accounted_begin(cache, bio);
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
 }
 
 static void issue_op(struct bio *bio, void *context)
@@ -1792,7 +1790,7 @@ static bool process_bio(struct cache *cache, struct bio *bio)
        bool commit_needed;
 
        if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 
        return commit_needed;
 }
@@ -1858,7 +1856,7 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio)
 
        if (cache->features.discard_passdown) {
                remap_to_origin(cache, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        } else
                bio_endio(bio);
 
@@ -2423,20 +2421,6 @@ static void set_cache_size(struct cache *cache, dm_cblock_t size)
        cache->cache_size = size;
 }
 
-static int is_congested(struct dm_dev *dev, int bdi_bits)
-{
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-       return bdi_congested(q->backing_dev_info, bdi_bits);
-}
-
-static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
-{
-       struct cache *cache = container_of(cb, struct cache, callbacks);
-
-       return is_congested(cache->origin_dev, bdi_bits) ||
-               is_congested(cache->cache_dev, bdi_bits);
-}
-
 #define DEFAULT_MIGRATION_THRESHOLD 2048
 
 static int cache_create(struct cache_args *ca, struct cache **result)
@@ -2471,9 +2455,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
                        goto bad;
        }
 
-       cache->callbacks.congested_fn = cache_is_congested;
-       dm_table_add_target_callbacks(ti->table, &cache->callbacks);
-
        cache->metadata_dev = ca->metadata_dev;
        cache->origin_dev = ca->origin_dev;
        cache->cache_dev = ca->cache_dev;
index 5ce96dd..bdb255e 100644 (file)
@@ -68,7 +68,6 @@ struct hash_table_bucket;
 
 struct clone {
        struct dm_target *ti;
-       struct dm_target_callbacks callbacks;
 
        struct dm_dev *metadata_dev;
        struct dm_dev *dest_dev;
@@ -330,7 +329,7 @@ static void submit_bios(struct bio_list *bios)
        blk_start_plug(&plug);
 
        while ((bio = bio_list_pop(bios)))
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 
        blk_finish_plug(&plug);
 }
@@ -346,7 +345,7 @@ static void submit_bios(struct bio_list *bios)
 static void issue_bio(struct clone *clone, struct bio *bio)
 {
        if (!bio_triggers_commit(clone, bio)) {
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                return;
        }
 
@@ -473,7 +472,7 @@ static void complete_discard_bio(struct clone *clone, struct bio *bio, bool succ
                bio_region_range(clone, bio, &rs, &nr_regions);
                trim_bio(bio, region_to_sector(clone, rs),
                         nr_regions << clone->region_shift);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        } else
                bio_endio(bio);
 }
@@ -865,7 +864,7 @@ static void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio
        bio->bi_private = hd;
 
        atomic_inc(&hd->clone->hydrations_in_flight);
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
 }
 
 /*
@@ -1281,7 +1280,7 @@ static void process_deferred_flush_bios(struct clone *clone)
                         */
                        bio_endio(bio);
                } else {
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                }
        }
 }
@@ -1518,18 +1517,6 @@ error:
        DMEMIT("Error");
 }
 
-static int clone_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
-{
-       struct request_queue *dest_q, *source_q;
-       struct clone *clone = container_of(cb, struct clone, callbacks);
-
-       source_q = bdev_get_queue(clone->source_dev->bdev);
-       dest_q = bdev_get_queue(clone->dest_dev->bdev);
-
-       return (bdi_congested(dest_q->backing_dev_info, bdi_bits) |
-               bdi_congested(source_q->backing_dev_info, bdi_bits));
-}
-
 static sector_t get_dev_size(struct dm_dev *dev)
 {
        return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
@@ -1930,8 +1917,6 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto out_with_mempool;
 
        mutex_init(&clone->commit_lock);
-       clone->callbacks.congested_fn = clone_is_congested;
-       dm_table_add_target_callbacks(ti->table, &clone->callbacks);
 
        /* Enable flushes */
        ti->num_flush_bios = 1;
index 000ddfa..ad324ab 100644 (file)
@@ -1789,7 +1789,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
                return 1;
        }
 
-       generic_make_request(clone);
+       submit_bio_noacct(clone);
        return 0;
 }
 
@@ -1815,7 +1815,7 @@ static void kcryptd_io_write(struct dm_crypt_io *io)
 {
        struct bio *clone = io->ctx.bio_out;
 
-       generic_make_request(clone);
+       submit_bio_noacct(clone);
 }
 
 #define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
@@ -1893,7 +1893,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
        clone->bi_iter.bi_sector = cc->start + io->sector;
 
        if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
-               generic_make_request(clone);
+               submit_bio_noacct(clone);
                return;
        }
 
index f496213..2628a83 100644 (file)
@@ -72,7 +72,7 @@ static void flush_bios(struct bio *bio)
        while (bio) {
                n = bio->bi_next;
                bio->bi_next = NULL;
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = n;
        }
 }
index bdb84b8..b24e383 100644 (file)
@@ -1137,7 +1137,6 @@ static int metadata_get_stats(struct era_metadata *md, void *ptr)
 
 struct era {
        struct dm_target *ti;
-       struct dm_target_callbacks callbacks;
 
        struct dm_dev *metadata_dev;
        struct dm_dev *origin_dev;
@@ -1265,7 +1264,7 @@ static void process_deferred_bios(struct era *era)
                        bio_io_error(bio);
        else
                while ((bio = bio_list_pop(&marked_bios)))
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
 }
 
 static void process_rpc_calls(struct era *era)
@@ -1375,18 +1374,6 @@ static void stop_worker(struct era *era)
 /*----------------------------------------------------------------
  * Target methods
  *--------------------------------------------------------------*/
-static int dev_is_congested(struct dm_dev *dev, int bdi_bits)
-{
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-       return bdi_congested(q->backing_dev_info, bdi_bits);
-}
-
-static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
-{
-       struct era *era = container_of(cb, struct era, callbacks);
-       return dev_is_congested(era->origin_dev, bdi_bits);
-}
-
 static void era_destroy(struct era *era)
 {
        if (era->md)
@@ -1514,8 +1501,6 @@ static int era_ctr(struct dm_target *ti, unsigned argc, char **argv)
        ti->flush_supported = true;
 
        ti->num_discard_bios = 1;
-       era->callbacks.congested_fn = era_is_congested;
-       dm_table_add_target_callbacks(ti->table, &era->callbacks);
 
        return 0;
 }
index a83a1de..5da3eb6 100644 (file)
@@ -2115,12 +2115,12 @@ offload_to_thread:
                dio->in_flight = (atomic_t)ATOMIC_INIT(1);
                dio->completion = NULL;
 
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 
                return;
        }
 
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
 
        if (need_sync_io) {
                wait_for_completion_io(&read_comp);
index 78cff42..73bb23d 100644 (file)
@@ -677,7 +677,7 @@ static void process_queued_bios(struct work_struct *work)
                        bio_endio(bio);
                        break;
                case DM_MAPIO_REMAPPED:
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                        break;
                case DM_MAPIO_SUBMITTED:
                        break;
index 10e8b2f..d9e2709 100644 (file)
@@ -242,7 +242,6 @@ struct raid_set {
 
        struct mddev md;
        struct raid_type *raid_type;
-       struct dm_target_callbacks callbacks;
 
        sector_t array_sectors;
        sector_t dev_sectors;
@@ -1705,13 +1704,6 @@ static void do_table_event(struct work_struct *ws)
        dm_table_event(rs->ti->table);
 }
 
-static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
-{
-       struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
-
-       return mddev_congested(&rs->md, bits);
-}
-
 /*
  * Make sure a valid takover (level switch) is being requested on @rs
  *
@@ -3248,9 +3240,6 @@ size_check:
                goto bad_md_start;
        }
 
-       rs->callbacks.congested_fn = raid_is_congested;
-       dm_table_add_target_callbacks(ti->table, &rs->callbacks);
-
        /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
        if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
                r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
@@ -3310,7 +3299,6 @@ static void raid_dtr(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
 
-       list_del_init(&rs->callbacks.list);
        md_stop(&rs->md);
        raid_set_free(rs);
 }
index 2f655d9..fa09bc4 100644 (file)
@@ -779,7 +779,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
                        wakeup_mirrord(ms);
                } else {
                        map_bio(get_default_mirror(ms), bio);
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                }
        }
 }
index 85e0daa..7ce387a 100644 (file)
@@ -284,7 +284,8 @@ static void dm_complete_request(struct request *rq, blk_status_t error)
        struct dm_rq_target_io *tio = tio_from_request(rq);
 
        tio->error = error;
-       blk_mq_complete_request(rq);
+       if (likely(!blk_should_fake_timeout(rq->q)))
+               blk_mq_complete_request(rq);
 }
 
 /*
index 963d377..2d1d4a4 100644 (file)
@@ -252,7 +252,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
 
        /*
         * Issue the synchronous I/O from a different thread
-        * to avoid generic_make_request recursion.
+        * to avoid submit_bio_noacct recursion.
         */
        INIT_WORK_ONSTACK(&req.work, do_metadata);
        queue_work(ps->metadata_wq, &req.work);
index 6b11a26..4668b2c 100644 (file)
@@ -1568,7 +1568,7 @@ static void flush_bios(struct bio *bio)
        while (bio) {
                n = bio->bi_next;
                bio->bi_next = NULL;
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = n;
        }
 }
@@ -1588,7 +1588,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
                bio->bi_next = NULL;
                r = do_origin(s->origin, bio, false);
                if (r == DM_MAPIO_REMAPPED)
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                bio = n;
        }
 }
@@ -1829,7 +1829,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
        bio->bi_end_io = full_bio_end_io;
        bio->bi_private = callback_data;
 
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
 }
 
 static struct dm_snap_pending_exception *
index 8277b95..0ea5b73 100644 (file)
@@ -64,8 +64,6 @@ struct dm_table {
        void *event_context;
 
        struct dm_md_mempools *mempools;
-
-       struct list_head target_callbacks;
 };
 
 /*
@@ -190,7 +188,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
                return -ENOMEM;
 
        INIT_LIST_HEAD(&t->devices);
-       INIT_LIST_HEAD(&t->target_callbacks);
 
        if (!num_targets)
                num_targets = KEYS_PER_NODE;
@@ -361,7 +358,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
  * This upgrades the mode on an already open dm_dev, being
  * careful to leave things as they were if we fail to reopen the
  * device and not to touch the existing bdev field in case
- * it is accessed concurrently inside dm_table_any_congested().
+ * it is accessed concurrently.
  */
 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
                        struct mapped_device *md)
@@ -2052,38 +2049,6 @@ int dm_table_resume_targets(struct dm_table *t)
        return 0;
 }
 
-void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb)
-{
-       list_add(&cb->list, &t->target_callbacks);
-}
-EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks);
-
-int dm_table_any_congested(struct dm_table *t, int bdi_bits)
-{
-       struct dm_dev_internal *dd;
-       struct list_head *devices = dm_table_get_devices(t);
-       struct dm_target_callbacks *cb;
-       int r = 0;
-
-       list_for_each_entry(dd, devices, list) {
-               struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
-               char b[BDEVNAME_SIZE];
-
-               if (likely(q))
-                       r |= bdi_congested(q->backing_dev_info, bdi_bits);
-               else
-                       DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
-                                    dm_device_name(t->md),
-                                    bdevname(dd->dm_dev->bdev, b));
-       }
-
-       list_for_each_entry(cb, &t->target_callbacks, list)
-               if (cb->congested_fn)
-                       r |= cb->congested_fn(cb, bdi_bits);
-
-       return r;
-}
-
 struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
        return t->md;
index fa8d546..fff4c50 100644 (file)
@@ -326,7 +326,6 @@ struct pool_c {
        struct pool *pool;
        struct dm_dev *data_dev;
        struct dm_dev *metadata_dev;
-       struct dm_target_callbacks callbacks;
 
        dm_block_t low_water_blocks;
        struct pool_features requested_pf; /* Features requested during table load */
@@ -758,7 +757,7 @@ static void issue(struct thin_c *tc, struct bio *bio)
        struct pool *pool = tc->pool;
 
        if (!bio_triggers_commit(tc, bio)) {
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                return;
        }
 
@@ -2394,7 +2393,7 @@ static void process_deferred_bios(struct pool *pool)
                if (bio->bi_opf & REQ_PREFLUSH)
                        bio_endio(bio);
                else
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
        }
 }
 
@@ -2796,18 +2795,6 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
        }
 }
 
-static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
-{
-       struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
-       struct request_queue *q;
-
-       if (get_pool_mode(pt->pool) == PM_OUT_OF_DATA_SPACE)
-               return 1;
-
-       q = bdev_get_queue(pt->data_dev->bdev);
-       return bdi_congested(q->backing_dev_info, bdi_bits);
-}
-
 static void requeue_bios(struct pool *pool)
 {
        struct thin_c *tc;
@@ -3420,9 +3407,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
        dm_pool_register_pre_commit_callback(pool->pmd,
                                             metadata_pre_commit_callback, pool);
 
-       pt->callbacks.congested_fn = pool_is_congested;
-       dm_table_add_target_callbacks(ti->table, &pt->callbacks);
-
        mutex_unlock(&dm_thin_pool_table.mutex);
 
        return 0;
index eec9f25..75fa4d9 100644 (file)
@@ -681,7 +681,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 
        verity_submit_prefetch(v, io);
 
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
 
        return DM_MAPIO_SUBMITTED;
 }
index 5358894..8aa306e 100644 (file)
@@ -1244,7 +1244,7 @@ static int writecache_flush_thread(void *data)
                                           bio_end_sector(bio));
                        wc_unlock(wc);
                        bio_set_dev(bio, wc->dev->bdev);
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                } else {
                        writecache_flush(wc);
                        wc_unlock(wc);
index 42aa513..697f9de 100644 (file)
@@ -140,7 +140,7 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
        bio_advance(bio, clone->bi_iter.bi_size);
 
        refcount_inc(&bioctx->ref);
-       generic_make_request(clone);
+       submit_bio_noacct(clone);
 
        if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
                zone->wp_block += nr_blocks;
index 5b9de2f..87cf45f 100644 (file)
@@ -1273,7 +1273,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
        sector_t sector;
        struct bio *clone = &tio->clone;
        struct dm_io *io = tio->io;
-       struct mapped_device *md = io->md;
        struct dm_target *ti = tio->ti;
        blk_qc_t ret = BLK_QC_T_NONE;
 
@@ -1295,10 +1294,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
                /* the bio has been remapped so dispatch it */
                trace_block_bio_remap(clone->bi_disk->queue, clone,
                                      bio_dev(io->orig_bio), sector);
-               if (md->type == DM_TYPE_NVME_BIO_BASED)
-                       ret = direct_make_request(clone);
-               else
-                       ret = generic_make_request(clone);
+               ret = submit_bio_noacct(clone);
                break;
        case DM_MAPIO_KILL:
                free_tio(tio);
@@ -1645,7 +1641,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
                        error = __split_and_process_non_flush(&ci);
                        if (current->bio_list && ci.sector_count && !error) {
                                /*
-                                * Remainder must be passed to generic_make_request()
+                                * Remainder must be passed to submit_bio_noacct()
                                 * so that it gets handled *after* bios already submitted
                                 * have been completely processed.
                                 * We take a clone of the original to store in
@@ -1670,7 +1666,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 
                                bio_chain(b, bio);
                                trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
-                               ret = generic_make_request(bio);
+                               ret = submit_bio_noacct(bio);
                                break;
                        }
                }
@@ -1738,7 +1734,7 @@ static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struc
 
                bio_chain(split, *bio);
                trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
-               generic_make_request(*bio);
+               submit_bio_noacct(*bio);
                *bio = split;
        }
 }
@@ -1763,13 +1759,13 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
        }
 
        /*
-        * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+        * If in ->queue_bio we need to use blk_queue_split(), otherwise
         * queue_limits for abnormal requests (e.g. discard, writesame, etc)
         * won't be imposed.
         */
        if (current->bio_list) {
                if (is_abnormal_io(bio))
-                       blk_queue_split(md->queue, &bio);
+                       blk_queue_split(&bio);
                else
                        dm_queue_split(md, ti, &bio);
        }
@@ -1780,9 +1776,9 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
                return __split_and_process_bio(md, map, bio);
 }
 
-static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t dm_submit_bio(struct bio *bio)
 {
-       struct mapped_device *md = q->queuedata;
+       struct mapped_device *md = bio->bi_disk->private_data;
        blk_qc_t ret = BLK_QC_T_NONE;
        int srcu_idx;
        struct dm_table *map;
@@ -1791,12 +1787,12 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
                /*
                 * We are called with a live reference on q_usage_counter, but
                 * that one will be released as soon as we return.  Grab an
-                * extra one as blk_mq_make_request expects to be able to
-                * consume a reference (which lives until the request is freed
-                * in case a request is allocated).
+                * extra one as blk_mq_submit_bio expects to be able to consume
+                * a reference (which lives until the request is freed in case a
+                * request is allocated).
                 */
-               percpu_ref_get(&q->q_usage_counter);
-               return blk_mq_make_request(q, bio);
+               percpu_ref_get(&bio->bi_disk->queue->q_usage_counter);
+               return blk_mq_submit_bio(bio);
        }
 
        map = dm_get_live_table(md, &srcu_idx);
@@ -1818,31 +1814,6 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
        return ret;
 }
 
-static int dm_any_congested(void *congested_data, int bdi_bits)
-{
-       int r = bdi_bits;
-       struct mapped_device *md = congested_data;
-       struct dm_table *map;
-
-       if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
-               if (dm_request_based(md)) {
-                       /*
-                        * With request-based DM we only need to check the
-                        * top-level queue for congestion.
-                        */
-                       struct backing_dev_info *bdi = md->queue->backing_dev_info;
-                       r = bdi->wb.congested->state & bdi_bits;
-               } else {
-                       map = dm_get_live_table_fast(md);
-                       if (map)
-                               r = dm_table_any_congested(map, bdi_bits);
-                       dm_put_live_table_fast(md);
-               }
-       }
-
-       return r;
-}
-
 /*-----------------------------------------------------------------
  * An IDR is used to keep track of allocated minor numbers.
  *---------------------------------------------------------------*/
@@ -1981,14 +1952,13 @@ static struct mapped_device *alloc_dev(int minor)
        spin_lock_init(&md->uevent_lock);
 
        /*
-        * default to bio-based required ->make_request_fn until DM
-        * table is loaded and md->type established. If request-based
-        * table is loaded: blk-mq will override accordingly.
+        * default to bio-based until DM table is loaded and md->type
+        * established. If request-based table is loaded: blk-mq will
+        * override accordingly.
         */
-       md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+       md->queue = blk_alloc_queue(numa_node_id);
        if (!md->queue)
                goto bad;
-       md->queue->queuedata = md;
 
        md->disk = alloc_disk_node(1, md->numa_node_id);
        if (!md->disk)
@@ -2282,12 +2252,6 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_get_queue_limits);
 
-static void dm_init_congested_fn(struct mapped_device *md)
-{
-       md->queue->backing_dev_info->congested_data = md;
-       md->queue->backing_dev_info->congested_fn = dm_any_congested;
-}
-
 /*
  * Setup the DM device's queue based on md's type
  */
@@ -2304,12 +2268,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
                        DMERR("Cannot initialize queue for request-based dm-mq mapped device");
                        return r;
                }
-               dm_init_congested_fn(md);
                break;
        case DM_TYPE_BIO_BASED:
        case DM_TYPE_DAX_BIO_BASED:
        case DM_TYPE_NVME_BIO_BASED:
-               dm_init_congested_fn(md);
                break;
        case DM_TYPE_NONE:
                WARN_ON_ONCE(true);
@@ -2531,7 +2493,7 @@ static void dm_wq_work(struct work_struct *work)
                        break;
 
                if (dm_request_based(md))
-                       (void) generic_make_request(c);
+                       (void) submit_bio_noacct(c);
                else
                        (void) dm_process_bio(md, map, c);
        }
@@ -3286,6 +3248,7 @@ static const struct pr_ops dm_pr_ops = {
 };
 
 static const struct block_device_operations dm_blk_dops = {
+       .submit_bio = dm_submit_bio,
        .open = dm_blk_open,
        .release = dm_blk_close,
        .ioctl = dm_blk_ioctl,
index d7c4f66..4f5fe66 100644 (file)
@@ -63,7 +63,6 @@ void dm_table_presuspend_targets(struct dm_table *t);
 void dm_table_presuspend_undo_targets(struct dm_table *t);
 void dm_table_postsuspend_targets(struct dm_table *t);
 int dm_table_resume_targets(struct dm_table *t);
-int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 enum dm_queue_mode dm_table_get_type(struct dm_table *t);
 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
 struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
index 50ad4ba..fda4cb3 100644 (file)
@@ -169,7 +169,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
        if (bio_data_dir(bio) == WRITE) {
                /* write request */
                if (atomic_read(&conf->counters[WriteAll])) {
-                       /* special case - don't decrement, don't generic_make_request,
+                       /* special case - don't decrement, don't submit_bio_noacct,
                         * just fail immediately
                         */
                        bio_io_error(bio);
@@ -214,7 +214,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
        } else
                bio_set_dev(bio, conf->rdev->bdev);
 
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
        return true;
 }
 
index 26c75c0..c2ae912 100644 (file)
@@ -46,29 +46,6 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
        return conf->disks + lo;
 }
 
-/*
- * In linear_congested() conf->raid_disks is used as a copy of
- * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks
- * and conf->disks[] are created in linear_conf(), they are always
- * consitent with each other, but mddev->raid_disks does not.
- */
-static int linear_congested(struct mddev *mddev, int bits)
-{
-       struct linear_conf *conf;
-       int i, ret = 0;
-
-       rcu_read_lock();
-       conf = rcu_dereference(mddev->private);
-
-       for (i = 0; i < conf->raid_disks && !ret ; i++) {
-               struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
-               ret |= bdi_congested(q->backing_dev_info, bits);
-       }
-
-       rcu_read_unlock();
-       return ret;
-}
-
 static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
 {
        struct linear_conf *conf;
@@ -267,7 +244,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
                struct bio *split = bio_split(bio, end_sector - bio_sector,
                                              GFP_NOIO, &mddev->bio_set);
                bio_chain(split, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = split;
        }
 
@@ -286,7 +263,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
                                              bio_sector);
                mddev_check_writesame(mddev, bio);
                mddev_check_write_zeroes(mddev, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        }
        return true;
 
@@ -322,7 +299,6 @@ static struct md_personality linear_personality =
        .hot_add_disk   = linear_add,
        .size           = linear_size,
        .quiesce        = linear_quiesce,
-       .congested      = linear_congested,
 };
 
 static int __init linear_init (void)
index 152f9e6..776bbe5 100644 (file)
@@ -131,7 +131,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
        mp_bh->bio.bi_private = mp_bh;
        mddev_check_writesame(mddev, &mp_bh->bio);
        mddev_check_write_zeroes(mddev, &mp_bh->bio);
-       generic_make_request(&mp_bh->bio);
+       submit_bio_noacct(&mp_bh->bio);
        return true;
 }
 
@@ -151,28 +151,6 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
        seq_putc(seq, ']');
 }
 
-static int multipath_congested(struct mddev *mddev, int bits)
-{
-       struct mpconf *conf = mddev->private;
-       int i, ret = 0;
-
-       rcu_read_lock();
-       for (i = 0; i < mddev->raid_disks ; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                       struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-                       ret |= bdi_congested(q->backing_dev_info, bits);
-                       /* Just like multipath_map, we just check the
-                        * first available device
-                        */
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
 /*
  * Careful, this can execute in IRQ contexts as well!
  */
@@ -348,7 +326,7 @@ static void multipathd(struct md_thread *thread)
                        bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
                        bio->bi_end_io = multipath_end_request;
                        bio->bi_private = mp_bh;
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                }
        }
        spin_unlock_irqrestore(&conf->device_lock, flags);
@@ -478,7 +456,6 @@ static struct md_personality multipath_personality =
        .hot_add_disk   = multipath_add_disk,
        .hot_remove_disk= multipath_remove_disk,
        .size           = multipath_size,
-       .congested      = multipath_congested,
 };
 
 static int __init multipath_init (void)
index f567f53..96b28f6 100644 (file)
@@ -199,7 +199,7 @@ static int rdevs_init_serial(struct mddev *mddev)
 static int rdev_need_serial(struct md_rdev *rdev)
 {
        return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
-               rdev->bdev->bd_queue->nr_hw_queues != 1 &&
+               rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
                test_bit(WriteMostly, &rdev->flags));
 }
 
@@ -463,7 +463,7 @@ check_suspended:
 }
 EXPORT_SYMBOL(md_handle_request);
 
-static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t md_submit_bio(struct bio *bio)
 {
        const int rw = bio_data_dir(bio);
        const int sgrp = op_stat_group(bio_op(bio));
@@ -475,7 +475,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
                return BLK_QC_T_NONE;
        }
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        if (mddev == NULL || mddev->pers == NULL) {
                bio_io_error(bio);
@@ -549,26 +549,6 @@ void mddev_resume(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(mddev_resume);
 
-int mddev_congested(struct mddev *mddev, int bits)
-{
-       struct md_personality *pers = mddev->pers;
-       int ret = 0;
-
-       rcu_read_lock();
-       if (mddev->suspended)
-               ret = 1;
-       else if (pers && pers->congested)
-               ret = pers->congested(mddev, bits);
-       rcu_read_unlock();
-       return ret;
-}
-EXPORT_SYMBOL_GPL(mddev_congested);
-static int md_congested(void *data, int bits)
-{
-       struct mddev *mddev = data;
-       return mddev_congested(mddev, bits);
-}
-
 /*
  * Generic flush handling for md
  */
@@ -5641,7 +5621,7 @@ static int md_alloc(dev_t dev, char *name)
                mddev->hold_active = UNTIL_STOP;
 
        error = -ENOMEM;
-       mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
+       mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!mddev->queue)
                goto abort;
 
@@ -5670,6 +5650,7 @@ static int md_alloc(dev_t dev, char *name)
         * remove it now.
         */
        disk->flags |= GENHD_FL_EXT_DEVT;
+       disk->events |= DISK_EVENT_MEDIA_CHANGE;
        mddev->gendisk = disk;
        /* As soon as we call add_disk(), another thread could get
         * through to md_open, so make sure it doesn't get too far
@@ -5964,8 +5945,6 @@ int md_run(struct mddev *mddev)
                        blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
                else
                        blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
-               mddev->queue->backing_dev_info->congested_data = mddev;
-               mddev->queue->backing_dev_info->congested_fn = md_congested;
        }
        if (pers->sync_request) {
                if (mddev->kobj.sd &&
@@ -6350,7 +6329,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
 
                __md_stop_writes(mddev);
                __md_stop(mddev);
-               mddev->queue->backing_dev_info->congested_fn = NULL;
 
                /* tell userspace to handle 'inactive' */
                sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -7806,23 +7784,21 @@ static void md_release(struct gendisk *disk, fmode_t mode)
        mddev_put(mddev);
 }
 
-static int md_media_changed(struct gendisk *disk)
-{
-       struct mddev *mddev = disk->private_data;
-
-       return mddev->changed;
-}
-
-static int md_revalidate(struct gendisk *disk)
+static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
 {
        struct mddev *mddev = disk->private_data;
+       unsigned int ret = 0;
 
+       if (mddev->changed)
+               ret = DISK_EVENT_MEDIA_CHANGE;
        mddev->changed = 0;
-       return 0;
+       return ret;
 }
+
 static const struct block_device_operations md_fops =
 {
        .owner          = THIS_MODULE,
+       .submit_bio     = md_submit_bio,
        .open           = md_open,
        .release        = md_release,
        .ioctl          = md_ioctl,
@@ -7830,8 +7806,7 @@ static const struct block_device_operations md_fops =
        .compat_ioctl   = md_compat_ioctl,
 #endif
        .getgeo         = md_getgeo,
-       .media_changed  = md_media_changed,
-       .revalidate_disk= md_revalidate,
+       .check_events   = md_check_events,
 };
 
 static int md_thread(void *arg)
index 612814d..e2f1ad9 100644 (file)
@@ -597,9 +597,6 @@ struct md_personality
         * array.
         */
        void *(*takeover) (struct mddev *mddev);
-       /* congested implements bdi.congested_fn().
-        * Will not be called while array is 'suspended' */
-       int (*congested)(struct mddev *mddev, int bits);
        /* Changes the consistency policy of an active array. */
        int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
 };
@@ -710,7 +707,6 @@ extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
 extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
 extern void md_finish_reshape(struct mddev *mddev);
 
-extern int mddev_congested(struct mddev *mddev, int bits);
 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
 extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
                           sector_t sector, int size, struct page *page);
index 322386f..f54a449 100644 (file)
@@ -29,21 +29,6 @@ module_param(default_layout, int, 0644);
         (1L << MD_HAS_PPL) |           \
         (1L << MD_HAS_MULTIPLE_PPLS))
 
-static int raid0_congested(struct mddev *mddev, int bits)
-{
-       struct r0conf *conf = mddev->private;
-       struct md_rdev **devlist = conf->devlist;
-       int raid_disks = conf->strip_zone[0].nb_dev;
-       int i, ret = 0;
-
-       for (i = 0; i < raid_disks && !ret ; i++) {
-               struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
-
-               ret |= bdi_congested(q->backing_dev_info, bits);
-       }
-       return ret;
-}
-
 /*
  * inform the user of the raid configuration
 */
@@ -495,7 +480,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
                        zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
                        &mddev->bio_set);
                bio_chain(split, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = split;
                end = zone->zone_end;
        } else
@@ -559,7 +544,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
                        trace_block_bio_remap(bdev_get_queue(rdev->bdev),
                                discard_bio, disk_devt(mddev->gendisk),
                                bio->bi_iter.bi_sector);
-               generic_make_request(discard_bio);
+               submit_bio_noacct(discard_bio);
        }
        bio_endio(bio);
 }
@@ -600,7 +585,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
                struct bio *split = bio_split(bio, sectors, GFP_NOIO,
                                              &mddev->bio_set);
                bio_chain(split, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = split;
        }
 
@@ -633,7 +618,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
                                disk_devt(mddev->gendisk), bio_sector);
        mddev_check_writesame(mddev, bio);
        mddev_check_write_zeroes(mddev, bio);
-       generic_make_request(bio);
+       submit_bio_noacct(bio);
        return true;
 }
 
@@ -818,7 +803,6 @@ static struct md_personality raid0_personality=
        .size           = raid0_size,
        .takeover       = raid0_takeover,
        .quiesce        = raid0_quiesce,
-       .congested      = raid0_congested,
 };
 
 static int __init raid0_init (void)
index dcd27f3..960d854 100644 (file)
@@ -786,36 +786,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        return best_disk;
 }
 
-static int raid1_congested(struct mddev *mddev, int bits)
-{
-       struct r1conf *conf = mddev->private;
-       int i, ret = 0;
-
-       if ((bits & (1 << WB_async_congested)) &&
-           conf->pending_count >= max_queued_requests)
-               return 1;
-
-       rcu_read_lock();
-       for (i = 0; i < conf->raid_disks * 2; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                       struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-                       BUG_ON(!q);
-
-                       /* Note the '|| 1' - when read_balance prefers
-                        * non-congested targets, it can be removed
-                        */
-                       if ((bits & (1 << WB_async_congested)) || 1)
-                               ret |= bdi_congested(q->backing_dev_info, bits);
-                       else
-                               ret &= bdi_congested(q->backing_dev_info, bits);
-               }
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
 static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 {
        /* flush any pending bitmap writes to disk before proceeding w/ I/O */
@@ -834,7 +804,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
                        /* Just ignore it */
                        bio_endio(bio);
                else
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                bio = next;
                cond_resched();
        }
@@ -1312,7 +1282,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
                struct bio *split = bio_split(bio, max_sectors,
                                              gfp, &conf->bio_split);
                bio_chain(split, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = split;
                r1_bio->master_bio = bio;
                r1_bio->sectors = max_sectors;
@@ -1338,7 +1308,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
                trace_block_bio_remap(read_bio->bi_disk->queue, read_bio,
                                disk_devt(mddev->gendisk), r1_bio->sector);
 
-       generic_make_request(read_bio);
+       submit_bio_noacct(read_bio);
 }
 
 static void raid1_write_request(struct mddev *mddev, struct bio *bio,
@@ -1483,7 +1453,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                struct bio *split = bio_split(bio, max_sectors,
                                              GFP_NOIO, &conf->bio_split);
                bio_chain(split, bio);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                bio = split;
                r1_bio->master_bio = bio;
                r1_bio->sectors = max_sectors;
@@ -2240,7 +2210,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
                atomic_inc(&r1_bio->remaining);
                md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
 
-               generic_make_request(wbio);
+               submit_bio_noacct(wbio);
        }
 
        put_sync_write_buf(r1_bio, 1);
@@ -2926,7 +2896,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                                md_sync_acct_bio(bio, nr_sectors);
                                if (read_targets == 1)
                                        bio->bi_opf &= ~MD_FAILFAST;
-                               generic_make_request(bio);
+                               submit_bio_noacct(bio);
                        }
                }
        } else {
@@ -2935,7 +2905,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                md_sync_acct_bio(bio, nr_sectors);
                if (read_targets == 1)
                        bio->bi_opf &= ~MD_FAILFAST;
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        }
        return nr_sectors;
 }
@@ -3396,7 +3366,6 @@ static struct md_personality raid1_personality =
        .check_reshape  = raid1_reshape,
        .quiesce        = raid1_quiesce,
        .takeover       = raid1_takeover,
-       .congested      = raid1_congested,
 };
 
 static int __init raid_init(void)
index ec136e4..353288b 100644 (file)
@@ -848,31 +848,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
        return rdev;
 }
 
-static int raid10_congested(struct mddev *mddev, int bits)
-{
-       struct r10conf *conf = mddev->private;
-       int i, ret = 0;
-
-       if ((bits & (1 << WB_async_congested)) &&
-           conf->pending_count >= max_queued_requests)
-               return 1;
-
-       rcu_read_lock();
-       for (i = 0;
-            (i < conf->geo.raid_disks || i < conf->prev.raid_disks)
-                    && ret == 0;
-            i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-               if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                       struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-                       ret |= bdi_congested(q->backing_dev_info, bits);
-               }
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
 static void flush_pending_writes(struct r10conf *conf)
 {
        /* Any writes that have been queued but are awaiting
@@ -917,7 +892,7 @@ static void flush_pending_writes(struct r10conf *conf)
                                /* Just ignore it */
                                bio_endio(bio);
                        else
-                               generic_make_request(bio);
+                               submit_bio_noacct(bio);
                        bio = next;
                }
                blk_finish_plug(&plug);
@@ -1102,7 +1077,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
                        /* Just ignore it */
                        bio_endio(bio);
                else
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                bio = next;
        }
        kfree(plug);
@@ -1194,7 +1169,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
                                              gfp, &conf->bio_split);
                bio_chain(split, bio);
                allow_barrier(conf);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                wait_barrier(conf);
                bio = split;
                r10_bio->master_bio = bio;
@@ -1221,7 +1196,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
                trace_block_bio_remap(read_bio->bi_disk->queue,
                                      read_bio, disk_devt(mddev->gendisk),
                                      r10_bio->sector);
-       generic_make_request(read_bio);
+       submit_bio_noacct(read_bio);
        return;
 }
 
@@ -1479,7 +1454,7 @@ retry_write:
                                              GFP_NOIO, &conf->bio_split);
                bio_chain(split, bio);
                allow_barrier(conf);
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
                wait_barrier(conf);
                bio = split;
                r10_bio->master_bio = bio;
@@ -2099,7 +2074,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                        tbio->bi_opf |= MD_FAILFAST;
                tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
                bio_set_dev(tbio, conf->mirrors[d].rdev->bdev);
-               generic_make_request(tbio);
+               submit_bio_noacct(tbio);
        }
 
        /* Now write out to any replacement devices
@@ -2118,7 +2093,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                atomic_inc(&r10_bio->remaining);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
                             bio_sectors(tbio));
-               generic_make_request(tbio);
+               submit_bio_noacct(tbio);
        }
 
 done:
@@ -2241,7 +2216,7 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
        wbio = r10_bio->devs[1].bio;
        wbio2 = r10_bio->devs[1].repl_bio;
        /* Need to test wbio2->bi_end_io before we call
-        * generic_make_request as if the former is NULL,
+        * submit_bio_noacct as if the former is NULL,
         * the latter is free to free wbio2.
         */
        if (wbio2 && !wbio2->bi_end_io)
@@ -2249,13 +2224,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
        if (wbio->bi_end_io) {
                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
-               generic_make_request(wbio);
+               submit_bio_noacct(wbio);
        }
        if (wbio2) {
                atomic_inc(&conf->mirrors[d].replacement->nr_pending);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
                             bio_sectors(wbio2));
-               generic_make_request(wbio2);
+               submit_bio_noacct(wbio2);
        }
 }
 
@@ -2889,7 +2864,7 @@ static void raid10_set_cluster_sync_high(struct r10conf *conf)
  * a number of r10_bio structures, one for each out-of-sync device.
  * As we setup these structures, we collect all bio's together into a list
  * which we then process collectively to add pages, and then process again
- * to pass to generic_make_request.
+ * to pass to submit_bio_noacct.
  *
  * The r10_bio structures are linked using a borrowed master_bio pointer.
  * This link is counted in ->remaining.  When the r10_bio that points to NULL
@@ -3496,7 +3471,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                if (bio->bi_end_io == end_sync_read) {
                        md_sync_acct_bio(bio, nr_sectors);
                        bio->bi_status = 0;
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                }
        }
 
@@ -4654,7 +4629,7 @@ read_more:
        md_sync_acct_bio(read_bio, r10_bio->sectors);
        atomic_inc(&r10_bio->remaining);
        read_bio->bi_next = NULL;
-       generic_make_request(read_bio);
+       submit_bio_noacct(read_bio);
        sectors_done += nr_sectors;
        if (sector_nr <= last)
                goto read_more;
@@ -4717,7 +4692,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                md_sync_acct_bio(b, r10_bio->sectors);
                atomic_inc(&r10_bio->remaining);
                b->bi_next = NULL;
-               generic_make_request(b);
+               submit_bio_noacct(b);
        }
        end_reshape_request(r10_bio);
 }
@@ -4929,7 +4904,6 @@ static struct md_personality raid10_personality =
        .start_reshape  = raid10_start_reshape,
        .finish_reshape = raid10_finish_reshape,
        .update_reshape_pos = raid10_update_reshape_pos,
-       .congested      = raid10_congested,
 };
 
 static int __init raid_init(void)
index ab8067f..774ea89 100644 (file)
@@ -873,7 +873,7 @@ static void dispatch_bio_list(struct bio_list *tmp)
        struct bio *bio;
 
        while ((bio = bio_list_pop(tmp)))
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
 }
 
 static int cmp_stripe(void *priv, struct list_head *a, struct list_head *b)
@@ -1151,7 +1151,7 @@ again:
                        if (should_defer && op_is_write(op))
                                bio_list_add(&pending_bios, bi);
                        else
-                               generic_make_request(bi);
+                               submit_bio_noacct(bi);
                }
                if (rrdev) {
                        if (s->syncing || s->expanding || s->expanded
@@ -1201,7 +1201,7 @@ again:
                        if (should_defer && op_is_write(op))
                                bio_list_add(&pending_bios, rbi);
                        else
-                               generic_make_request(rbi);
+                               submit_bio_noacct(rbi);
                }
                if (!rdev && !rrdev) {
                        if (op_is_write(op))
@@ -5099,28 +5099,6 @@ static void activate_bit_delay(struct r5conf *conf,
        }
 }
 
-static int raid5_congested(struct mddev *mddev, int bits)
-{
-       struct r5conf *conf = mddev->private;
-
-       /* No difference between reads and writes.  Just check
-        * how busy the stripe_cache is
-        */
-
-       if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
-               return 1;
-
-       /* Also checks whether there is pressure on r5cache log space */
-       if (test_bit(R5C_LOG_TIGHT, &conf->cache_state))
-               return 1;
-       if (conf->quiesce)
-               return 1;
-       if (atomic_read(&conf->empty_inactive_list_nr))
-               return 1;
-
-       return 0;
-}
-
 static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
 {
        struct r5conf *conf = mddev->private;
@@ -5289,7 +5267,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
                        trace_block_bio_remap(align_bi->bi_disk->queue,
                                              align_bi, disk_devt(mddev->gendisk),
                                              raid_bio->bi_iter.bi_sector);
-               generic_make_request(align_bi);
+               submit_bio_noacct(align_bi);
                return 1;
        } else {
                rcu_read_unlock();
@@ -5309,7 +5287,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
                struct r5conf *conf = mddev->private;
                split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
                bio_chain(split, raid_bio);
-               generic_make_request(raid_bio);
+               submit_bio_noacct(raid_bio);
                raid_bio = split;
        }
 
@@ -8427,7 +8405,6 @@ static struct md_personality raid6_personality =
        .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid6_takeover,
-       .congested      = raid5_congested,
        .change_consistency_policy = raid5_change_consistency_policy,
 };
 static struct md_personality raid5_personality =
@@ -8452,7 +8429,6 @@ static struct md_personality raid5_personality =
        .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid5_takeover,
-       .congested      = raid5_congested,
        .change_consistency_policy = raid5_change_consistency_policy,
 };
 
@@ -8478,7 +8454,6 @@ static struct md_personality raid4_personality =
        .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid4_takeover,
-       .congested      = raid5_congested,
        .change_consistency_policy = raid5_change_consistency_policy,
 };
 
index 74cee7c..d939ccc 100644 (file)
@@ -616,7 +616,10 @@ static int ioc3_mfd_probe(struct pci_dev *pdev,
                /* Remove all already added MFD devices */
                mfd_remove_devices(&ipd->pdev->dev);
                if (ipd->domain) {
+                       struct fwnode_handle *fn = ipd->domain->fwnode;
+
                        irq_domain_remove(ipd->domain);
+                       irq_domain_free_fwnode(fn);
                        free_irq(ipd->domain_irq, (void *)ipd);
                }
                pci_iounmap(pdev, regs);
@@ -643,7 +646,10 @@ static void ioc3_mfd_remove(struct pci_dev *pdev)
        /* Release resources */
        mfd_remove_devices(&ipd->pdev->dev);
        if (ipd->domain) {
+               struct fwnode_handle *fn = ipd->domain->fwnode;
+
                irq_domain_remove(ipd->domain);
+               irq_domain_free_fwnode(fn);
                free_irq(ipd->domain_irq, (void *)ipd);
        }
        pci_iounmap(pdev, ipd->regs);
index 7896952..fa313b6 100644 (file)
@@ -312,10 +312,7 @@ static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
 
        mutex_lock(&block_mutex);
        if (md) {
-               if (md->usage == 2)
-                       check_disk_change(bdev);
                ret = 0;
-
                if ((mode & FMODE_WRITE) && md->read_only) {
                        mmc_blk_put(md);
                        ret = -EROFS;
@@ -1446,7 +1443,7 @@ static void mmc_blk_cqe_req_done(struct mmc_request *mrq)
         */
        if (mq->in_recovery)
                mmc_blk_cqe_complete_rq(mq, req);
-       else
+       else if (likely(!blk_should_fake_timeout(req->q)))
                blk_mq_complete_request(req);
 }
 
@@ -1926,7 +1923,7 @@ static void mmc_blk_hsq_req_done(struct mmc_request *mrq)
         */
        if (mq->in_recovery)
                mmc_blk_cqe_complete_rq(mq, req);
-       else
+       else if (likely(!blk_should_fake_timeout(req->q)))
                blk_mq_complete_request(req);
 }
 
@@ -1936,7 +1933,7 @@ void mmc_blk_mq_complete(struct request *req)
 
        if (mq->use_cqe)
                mmc_blk_cqe_complete_rq(mq, req);
-       else
+       else if (likely(!blk_should_fake_timeout(req->q)))
                mmc_blk_mq_complete_rq(mq, req);
 }
 
@@ -1988,7 +1985,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
         */
        if (mq->in_recovery)
                mmc_blk_mq_complete_rq(mq, req);
-       else
+       else if (likely(!blk_should_fake_timeout(req->q)))
                blk_mq_complete_request(req);
 
        mmc_blk_mq_dec_in_flight(mq, req);
index c5935b2..b40f46a 100644 (file)
@@ -355,9 +355,6 @@ static int mtdchar_writeoob(struct file *file, struct mtd_info *mtd,
        uint32_t retlen;
        int ret = 0;
 
-       if (!(file->f_mode & FMODE_WRITE))
-               return -EPERM;
-
        if (length > 4096)
                return -EINVAL;
 
@@ -643,6 +640,48 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 
        pr_debug("MTD_ioctl\n");
 
+       /*
+        * Check the file mode to require "dangerous" commands to have write
+        * permissions.
+        */
+       switch (cmd) {
+       /* "safe" commands */
+       case MEMGETREGIONCOUNT:
+       case MEMGETREGIONINFO:
+       case MEMGETINFO:
+       case MEMREADOOB:
+       case MEMREADOOB64:
+       case MEMLOCK:
+       case MEMUNLOCK:
+       case MEMISLOCKED:
+       case MEMGETOOBSEL:
+       case MEMGETBADBLOCK:
+       case MEMSETBADBLOCK:
+       case OTPSELECT:
+       case OTPGETREGIONCOUNT:
+       case OTPGETREGIONINFO:
+       case OTPLOCK:
+       case ECCGETLAYOUT:
+       case ECCGETSTATS:
+       case MTDFILEMODE:
+       case BLKPG:
+       case BLKRRPART:
+               break;
+
+       /* "dangerous" commands */
+       case MEMERASE:
+       case MEMERASE64:
+       case MEMWRITEOOB:
+       case MEMWRITEOOB64:
+       case MEMWRITE:
+               if (!(file->f_mode & FMODE_WRITE))
+                       return -EPERM;
+               break;
+
+       default:
+               return -ENOTTY;
+       }
+
        switch (cmd) {
        case MEMGETREGIONCOUNT:
                if (copy_to_user(argp, &(mtd->numeraseregions), sizeof(int)))
@@ -690,9 +729,6 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
        {
                struct erase_info *erase;
 
-               if(!(file->f_mode & FMODE_WRITE))
-                       return -EPERM;
-
                erase=kzalloc(sizeof(struct erase_info),GFP_KERNEL);
                if (!erase)
                        ret = -ENOMEM;
@@ -985,9 +1021,6 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
                ret = 0;
                break;
        }
-
-       default:
-               ret = -ENOTTY;
        }
 
        return ret;
@@ -1031,6 +1064,11 @@ static long mtdchar_compat_ioctl(struct file *file, unsigned int cmd,
                struct mtd_oob_buf32 buf;
                struct mtd_oob_buf32 __user *buf_user = argp;
 
+               if (!(file->f_mode & FMODE_WRITE)) {
+                       ret = -EPERM;
+                       break;
+               }
+
                if (copy_from_user(&buf, argp, sizeof(buf)))
                        ret = -EFAULT;
                else
index 3dd46cd..88e7900 100644 (file)
@@ -407,19 +407,34 @@ free_dst:
        return err;
 }
 
+static bool bareudp_proto_valid(struct bareudp_dev *bareudp, __be16 proto)
+{
+       if (bareudp->ethertype == proto)
+               return true;
+
+       if (!bareudp->multi_proto_mode)
+               return false;
+
+       if (bareudp->ethertype == htons(ETH_P_MPLS_UC) &&
+           proto == htons(ETH_P_MPLS_MC))
+               return true;
+
+       if (bareudp->ethertype == htons(ETH_P_IP) &&
+           proto == htons(ETH_P_IPV6))
+               return true;
+
+       return false;
+}
+
 static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct bareudp_dev *bareudp = netdev_priv(dev);
        struct ip_tunnel_info *info = NULL;
        int err;
 
-       if (skb->protocol != bareudp->ethertype) {
-               if (!bareudp->multi_proto_mode ||
-                   (skb->protocol !=  htons(ETH_P_MPLS_MC) &&
-                    skb->protocol !=  htons(ETH_P_IPV6))) {
-                       err = -EINVAL;
-                       goto tx_error;
-               }
+       if (!bareudp_proto_valid(bareudp, skb->protocol)) {
+               err = -EINVAL;
+               goto tx_error;
        }
 
        info = skb_tunnel_info(skb);
index 8d13ea3..66e67b2 100644 (file)
@@ -2446,6 +2446,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
        port->reset = devm_reset_control_get_exclusive(dev, NULL);
        if (IS_ERR(port->reset)) {
                dev_err(dev, "no reset\n");
+               clk_disable_unprepare(port->pclk);
                return PTR_ERR(port->reset);
        }
        reset_control_reset(port->reset);
@@ -2501,8 +2502,10 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
                                        IRQF_SHARED,
                                        port_names[port->id],
                                        port);
-       if (ret)
+       if (ret) {
+               clk_disable_unprepare(port->pclk);
                return ret;
+       }
 
        ret = register_netdev(netdev);
        if (!ret) {
index 33c481d..71ed4c5 100644 (file)
@@ -1093,16 +1093,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
        int k, sizeoflast;
        dma_addr_t dma;
 
-       if (type == DESC_TYPE_SKB) {
-               struct sk_buff *skb = (struct sk_buff *)priv;
-               int ret;
-
-               ret = hns3_fill_skb_desc(ring, skb, desc);
-               if (unlikely(ret < 0))
-                       return ret;
-
-               dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
-       } else if (type == DESC_TYPE_FRAGLIST_SKB) {
+       if (type == DESC_TYPE_FRAGLIST_SKB ||
+           type == DESC_TYPE_SKB) {
                struct sk_buff *skb = (struct sk_buff *)priv;
 
                dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
@@ -1439,6 +1431,10 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        next_to_use_head = ring->next_to_use;
 
+       ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use]);
+       if (unlikely(ret < 0))
+               goto fill_err;
+
        ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
        if (unlikely(ret < 0))
                goto fill_err;
@@ -4140,8 +4136,8 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
                return;
 
        if (linkup) {
-               netif_carrier_on(netdev);
                netif_tx_wake_all_queues(netdev);
+               netif_carrier_on(netdev);
                if (netif_msg_link(handle))
                        netdev_info(netdev, "link up\n");
        } else {
index bb4a632..36575e7 100644 (file)
@@ -5806,9 +5806,9 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
        /* to avoid rule conflict, when user configure rule by ethtool,
         * we need to clear all arfs rules
         */
+       spin_lock_bh(&hdev->fd_rule_lock);
        hclge_clear_arfs_rules(handle);
 
-       spin_lock_bh(&hdev->fd_rule_lock);
        ret = hclge_fd_config_rule(hdev, rule);
 
        spin_unlock_bh(&hdev->fd_rule_lock);
@@ -5851,6 +5851,7 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
        return ret;
 }
 
+/* make sure being called after lock up with fd_rule_lock */
 static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
                                     bool clear_list)
 {
@@ -5863,7 +5864,6 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
        if (!hnae3_dev_fd_supported(hdev))
                return;
 
-       spin_lock_bh(&hdev->fd_rule_lock);
        for_each_set_bit(location, hdev->fd_bmap,
                         hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
                hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
@@ -5880,8 +5880,6 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
                bitmap_zero(hdev->fd_bmap,
                            hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
        }
-
-       spin_unlock_bh(&hdev->fd_rule_lock);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -6263,7 +6261,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
                                      u16 flow_id, struct flow_keys *fkeys)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
-       struct hclge_fd_rule_tuples new_tuples;
+       struct hclge_fd_rule_tuples new_tuples = {};
        struct hclge_dev *hdev = vport->back;
        struct hclge_fd_rule *rule;
        u16 tmp_queue_id;
@@ -6273,19 +6271,17 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
        if (!hnae3_dev_fd_supported(hdev))
                return -EOPNOTSUPP;
 
-       memset(&new_tuples, 0, sizeof(new_tuples));
-       hclge_fd_get_flow_tuples(fkeys, &new_tuples);
-
-       spin_lock_bh(&hdev->fd_rule_lock);
-
        /* when there is already fd rule existed add by user,
         * arfs should not work
         */
+       spin_lock_bh(&hdev->fd_rule_lock);
        if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
                spin_unlock_bh(&hdev->fd_rule_lock);
                return -EOPNOTSUPP;
        }
 
+       hclge_fd_get_flow_tuples(fkeys, &new_tuples);
+
        /* check is there flow director filter existed for this flow,
         * if not, create a new filter for it;
         * if filter exist with different queue id, modify the filter;
@@ -6368,6 +6364,7 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
 #endif
 }
 
+/* make sure being called after lock up with fd_rule_lock */
 static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
 {
 #ifdef CONFIG_RFS_ACCEL
@@ -6420,10 +6417,14 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 
        hdev->fd_en = enable;
        clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
-       if (!enable)
+
+       if (!enable) {
+               spin_lock_bh(&hdev->fd_rule_lock);
                hclge_del_all_fd_entries(handle, clear);
-       else
+               spin_unlock_bh(&hdev->fd_rule_lock);
+       } else {
                hclge_restore_fd_entries(handle);
+       }
 }
 
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
@@ -6886,8 +6887,9 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
        int i;
 
        set_bit(HCLGE_STATE_DOWN, &hdev->state);
-
+       spin_lock_bh(&hdev->fd_rule_lock);
        hclge_clear_arfs_rules(handle);
+       spin_unlock_bh(&hdev->fd_rule_lock);
 
        /* If it is not PF reset, the firmware will disable the MAC,
         * so it only need to stop phy here.
@@ -9040,11 +9042,12 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
        bool writen_to_tbl = false;
        int ret = 0;
 
-       /* When device is resetting, firmware is unable to handle
-        * mailbox. Just record the vlan id, and remove it after
+       /* When device is resetting or reset failed, firmware is unable to
+        * handle mailbox. Just record the vlan id, and remove it after
         * reset finished.
         */
-       if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && is_kill) {
+       if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+            test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) {
                set_bit(vlan_id, vport->vlan_del_fail_bmap);
                return -EBUSY;
        }
index a10b022..9162856 100644 (file)
@@ -1592,11 +1592,12 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
        if (proto != htons(ETH_P_8021Q))
                return -EPROTONOSUPPORT;
 
-       /* When device is resetting, firmware is unable to handle
-        * mailbox. Just record the vlan id, and remove it after
+       /* When device is resetting or reset failed, firmware is unable to
+        * handle mailbox. Just record the vlan id, and remove it after
         * reset finished.
         */
-       if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) && is_kill) {
+       if ((test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) ||
+            test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) {
                set_bit(vlan_id, hdev->vlan_del_fail_bmap);
                return -EBUSY;
        }
@@ -3439,23 +3440,36 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
 {
        struct hnae3_handle *nic = &hdev->nic;
        struct hclge_vf_to_pf_msg send_msg;
+       int ret;
 
        rtnl_lock();
-       hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
-       rtnl_unlock();
+
+       if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) ||
+           test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) {
+               dev_warn(&hdev->pdev->dev,
+                        "is resetting when updating port based vlan info\n");
+               rtnl_unlock();
+               return;
+       }
+
+       ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+       if (ret) {
+               rtnl_unlock();
+               return;
+       }
 
        /* send msg to PF and wait update port based vlan info */
        hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN,
                               HCLGE_MBX_PORT_BASE_VLAN_CFG);
        memcpy(send_msg.data, port_base_vlan_info, data_size);
-       hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
-
-       if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
-               nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
-       else
-               nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+       ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
+       if (!ret) {
+               if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+                       nic->port_base_vlan_state = state;
+               else
+                       nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+       }
 
-       rtnl_lock();
        hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
        rtnl_unlock();
 }
index 0fd7eae..5afb3c9 100644 (file)
@@ -3206,7 +3206,7 @@ req_rx_irq_failed:
 req_tx_irq_failed:
        for (j = 0; j < i; j++) {
                free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
-               irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+               irq_dispose_mapping(adapter->tx_scrq[j]->irq);
        }
        release_sub_crqs(adapter, 1);
        return rc;
index f999cca..489bb5b 100644 (file)
@@ -301,10 +301,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
         */
        hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_unknown;
        ret_val = e1000_disable_ulp_lpt_lp(hw, true);
-       if (ret_val) {
+       if (ret_val)
                e_warn("Failed to disable ULP\n");
-               goto out;
-       }
 
        ret_val = hw->phy.ops.acquire(hw);
        if (ret_val) {
index 8bb3db2..6e5861b 100644 (file)
@@ -6224,9 +6224,18 @@ static void igb_reset_task(struct work_struct *work)
        struct igb_adapter *adapter;
        adapter = container_of(work, struct igb_adapter, reset_task);
 
+       rtnl_lock();
+       /* If we're already down or resetting, just bail */
+       if (test_bit(__IGB_DOWN, &adapter->state) ||
+           test_bit(__IGB_RESETTING, &adapter->state)) {
+               rtnl_unlock();
+               return;
+       }
+
        igb_dump(adapter);
        netdev_err(adapter->netdev, "Reset adapter\n");
        igb_reinit_locked(adapter);
+       rtnl_unlock();
 }
 
 /**
index 6478656..75a8c40 100644 (file)
@@ -1730,10 +1730,12 @@ static void otx2_reset_task(struct work_struct *work)
        if (!netif_running(pf->netdev))
                return;
 
+       rtnl_lock();
        otx2_stop(pf->netdev);
        pf->reset_count++;
        otx2_open(pf->netdev);
        netif_trans_update(pf->netdev);
+       rtnl_unlock();
 }
 
 static const struct net_device_ops otx2_netdev_ops = {
@@ -2111,6 +2113,7 @@ static void otx2_remove(struct pci_dev *pdev)
 
        pf = netdev_priv(netdev);
 
+       cancel_work_sync(&pf->reset_task);
        /* Disable link notifications */
        otx2_cgx_config_linkevents(pf, false);
 
index f422751..92a3db6 100644 (file)
@@ -617,6 +617,8 @@ static void otx2vf_remove(struct pci_dev *pdev)
 
        vf = netdev_priv(netdev);
 
+       cancel_work_sync(&vf->reset_task);
+       unregister_netdev(netdev);
        otx2vf_disable_mbox_intr(vf);
 
        otx2_detach_resources(&vf->mbox);
index f6a1f86..a1c45b3 100644 (file)
@@ -171,11 +171,21 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
        return 0;
 }
 
-static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
+static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth,
+                                  phy_interface_t interface, int speed)
 {
        u32 val;
        int ret;
 
+       if (interface == PHY_INTERFACE_MODE_TRGMII) {
+               mtk_w32(eth, TRGMII_MODE, INTF_MODE);
+               val = 500000000;
+               ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val);
+               if (ret)
+                       dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
+               return;
+       }
+
        val = (speed == SPEED_1000) ?
                INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100;
        mtk_w32(eth, val, INTF_MODE);
@@ -262,10 +272,9 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
                                                              state->interface))
                                        goto err_phy;
                        } else {
-                               if (state->interface !=
-                                   PHY_INTERFACE_MODE_TRGMII)
-                                       mtk_gmac0_rgmii_adjust(mac->hw,
-                                                              state->speed);
+                               mtk_gmac0_rgmii_adjust(mac->hw,
+                                                      state->interface,
+                                                      state->speed);
 
                                /* mt7623_pad_clk_setup */
                                for (i = 0 ; i < NUM_TRGMII_CTRL; i++)
@@ -2882,6 +2891,8 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
        eth->netdev[id]->irq = eth->irq[0];
        eth->netdev[id]->dev.of_node = np;
 
+       eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+
        return 0;
 
 free_netdev:
index 3d9aa7d..2d3e457 100644 (file)
@@ -4356,12 +4356,14 @@ end:
 static void mlx4_shutdown(struct pci_dev *pdev)
 {
        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+       struct mlx4_dev *dev = persist->dev;
 
        mlx4_info(persist->dev, "mlx4_shutdown was called\n");
        mutex_lock(&persist->interface_state_mutex);
        if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
                mlx4_unload_one(pdev);
        mutex_unlock(&persist->interface_state_mutex);
+       mlx4_pci_disable_device(dev);
 }
 
 static const struct pci_error_handlers mlx4_err_handler = {
index bdb7133..3e44e4d 100644 (file)
@@ -183,13 +183,16 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 
 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
 {
-       struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_priv *priv;
 
        /* A given netdev is not a representor or not a slave of LAG configuration */
        if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
                return false;
 
+       priv = netdev_priv(netdev);
+       rpriv = priv->ppriv;
+
        /* Egress acl forward to vport is supported only non-uplink representor */
        return rpriv->rep->vport != MLX5_VPORT_UPLINK;
 }
index eefeb1c..245a99f 100644 (file)
@@ -551,19 +551,31 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
                }
        }
 
-       tun_dst = tun_rx_dst(enc_opts.key.len);
+       if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+               tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
+                                          key.enc_ip.tos, key.enc_ip.ttl,
+                                          key.enc_tp.dst, TUNNEL_KEY,
+                                          key32_to_tunnel_id(key.enc_key_id.keyid),
+                                          enc_opts.key.len);
+       } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+               tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
+                                            key.enc_ip.tos, key.enc_ip.ttl,
+                                            key.enc_tp.dst, 0, TUNNEL_KEY,
+                                            key32_to_tunnel_id(key.enc_key_id.keyid),
+                                            enc_opts.key.len);
+       } else {
+               netdev_dbg(priv->netdev,
+                          "Couldn't restore tunnel, unsupported addr_type: %d\n",
+                          key.enc_control.addr_type);
+               return false;
+       }
+
        if (!tun_dst) {
-               WARN_ON_ONCE(true);
+               netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
                return false;
        }
 
-       ip_tunnel_key_init(&tun_dst->u.tun_info.key,
-                          key.enc_ipv4.src, key.enc_ipv4.dst,
-                          key.enc_ip.tos, key.enc_ip.ttl,
-                          0, /* label */
-                          key.enc_tp.src, key.enc_tp.dst,
-                          key32_to_tunnel_id(key.enc_key_id.keyid),
-                          TUNNEL_KEY);
+       tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
 
        if (enc_opts.key.len)
                ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
index 951ea26..e472ed0 100644 (file)
@@ -301,6 +301,8 @@ static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
                MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
        }
 
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
        return 0;
 }
 
index 58b1319..2805416 100644 (file)
@@ -80,6 +80,8 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
                         gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
        }
 
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
        return 0;
 }
 
index 37b1768..038a0f1 100644 (file)
@@ -136,6 +136,8 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
        MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
                 be32_to_cpu(enc_keyid.key->keyid));
 
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
        return 0;
 }
 
index 081f150..3b892ec 100644 (file)
@@ -419,7 +419,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
                err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
                                        &rq->wq_ctrl);
                if (err)
-                       return err;
+                       goto err_rq_wq_destroy;
 
                rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
 
@@ -470,7 +470,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
                err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
                                         &rq->wq_ctrl);
                if (err)
-                       return err;
+                       goto err_rq_wq_destroy;
 
                rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
 
@@ -3069,6 +3069,25 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
        priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
 }
 
+static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
+                                    enum mlx5_port_status state)
+{
+       struct mlx5_eswitch *esw = mdev->priv.eswitch;
+       int vport_admin_state;
+
+       mlx5_set_port_admin_status(mdev, state);
+
+       if (!MLX5_ESWITCH_MANAGER(mdev) ||  mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
+               return;
+
+       if (state == MLX5_PORT_UP)
+               vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+       else
+               vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+
+       mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
+}
+
 int mlx5e_open_locked(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -3101,7 +3120,7 @@ int mlx5e_open(struct net_device *netdev)
        mutex_lock(&priv->state_lock);
        err = mlx5e_open_locked(netdev);
        if (!err)
-               mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
+               mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
        mutex_unlock(&priv->state_lock);
 
        return err;
@@ -3135,7 +3154,7 @@ int mlx5e_close(struct net_device *netdev)
                return -ENODEV;
 
        mutex_lock(&priv->state_lock);
-       mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
+       mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
        err = mlx5e_close_locked(netdev);
        mutex_unlock(&priv->state_lock);
 
@@ -5182,7 +5201,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
        /* Marking the link as currently not needed by the Driver */
        if (!netif_running(netdev))
-               mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+               mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
 
        mlx5e_set_netdev_mtu_boundaries(priv);
        mlx5e_set_dev_port_mtu(priv);
@@ -5390,6 +5409,8 @@ err_cleanup_tx:
        profile->cleanup_tx(priv);
 
 out:
+       set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+       cancel_work_sync(&priv->update_stats_work);
        return err;
 }
 
index 006807e..9519a61 100644 (file)
@@ -936,6 +936,7 @@ err_close_drop_rq:
 
 static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 {
+       mlx5e_ethtool_cleanup_steering(priv);
        rep_vport_rx_rule_destroy(priv);
        mlx5e_destroy_rep_root_ft(priv);
        mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
@@ -1080,6 +1081,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 
        mlx5e_rep_tc_enable(priv);
 
+       mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
+                                     0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
        mlx5_lag_add(mdev, netdev);
        priv->events_nb.notifier_call = uplink_rep_async_event;
        mlx5_notifier_register(mdev, &priv->events_nb);
index cc84121..fcedb5b 100644 (file)
@@ -2356,6 +2356,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                 match.key->vlan_priority);
 
                        *match_level = MLX5_MATCH_L2;
+                       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
                }
        }
 
index 1116ab9..43005ca 100644 (file)
@@ -1608,7 +1608,7 @@ abort:
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
        }
-
+       esw_destroy_tsar(esw);
        return err;
 }
 
@@ -1653,8 +1653,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
        else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
                esw_offloads_disable(esw);
 
-       esw_destroy_tsar(esw);
-
        old_mode = esw->mode;
        esw->mode = MLX5_ESWITCH_NONE;
 
@@ -1664,6 +1662,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
        }
+       esw_destroy_tsar(esw);
+
        if (clear_vf)
                mlx5_eswitch_clear_vf_vports_info(esw);
 }
@@ -1826,6 +1826,8 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
                                 u16 vport, int link_state)
 {
        struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+       int opmod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
+       int other_vport = 1;
        int err = 0;
 
        if (!ESW_ALLOWED(esw))
@@ -1833,15 +1835,17 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
        if (IS_ERR(evport))
                return PTR_ERR(evport);
 
+       if (vport == MLX5_VPORT_UPLINK) {
+               opmod = MLX5_VPORT_STATE_OP_MOD_UPLINK;
+               other_vport = 0;
+               vport = 0;
+       }
        mutex_lock(&esw->state_lock);
 
-       err = mlx5_modify_vport_admin_state(esw->dev,
-                                           MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-                                           vport, 1, link_state);
+       err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state);
        if (err) {
-               mlx5_core_warn(esw->dev,
-                              "Failed to set vport %d link state, err = %d",
-                              vport, err);
+               mlx5_core_warn(esw->dev, "Failed to set vport %d link state, opmod = %d, err = %d",
+                              vport, opmod, err);
                goto unlock;
        }
 
@@ -1883,8 +1887,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
        struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
        int err = 0;
 
-       if (!ESW_ALLOWED(esw))
-               return -EPERM;
        if (IS_ERR(evport))
                return PTR_ERR(evport);
        if (vlan > 4095 || qos > 7)
@@ -1912,6 +1914,9 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
        u8 set_flags = 0;
        int err;
 
+       if (!ESW_ALLOWED(esw))
+               return -EPERM;
+
        if (vlan || qos)
                set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
 
index a5175e9..5785596 100644 (file)
@@ -680,6 +680,8 @@ static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { r
 static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
 static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
 {
        return ERR_PTR(-EOPNOTSUPP);
index 060354b..ed75353 100644 (file)
@@ -236,6 +236,15 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
        return &esw->offloads.vport_reps[idx];
 }
 
+static void
+mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
+                                 struct mlx5_flow_spec *spec,
+                                 struct mlx5_esw_flow_attr *attr)
+{
+       if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+           attr && attr->in_rep && attr->in_rep->vport == MLX5_VPORT_UPLINK)
+               spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
 
 static void
 mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
@@ -259,9 +268,6 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
                         mlx5_eswitch_get_vport_metadata_mask());
 
                spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
-               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-               if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
-                       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
        } else {
                misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
                MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
@@ -279,10 +285,6 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
 
                spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
        }
-
-       if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
-           attr->in_rep->vport == MLX5_VPORT_UPLINK)
-               spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
 }
 
 struct mlx5_flow_handle *
@@ -396,6 +398,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                goto err_esw_get;
        }
 
+       mlx5_eswitch_set_rule_flow_source(esw, spec, attr);
+
        if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
                rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr,
                                                     &flow_act, dest, i);
@@ -462,6 +466,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
        i++;
 
        mlx5_eswitch_set_rule_source_port(esw, spec, attr);
+       mlx5_eswitch_set_rule_flow_source(esw, spec, attr);
 
        if (attr->outer_match_level != MLX5_MATCH_NONE)
                spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
index 13e2fb7..2569bb6 100644 (file)
@@ -797,7 +797,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
        return ft;
 }
 
-/* If reverse if false then return the first flow table in next priority of
+/* If reverse is false then return the first flow table in next priority of
  * prio in the tree, else return the last flow table in the previous priority
  * of prio in the tree.
  */
@@ -829,34 +829,16 @@ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
        return find_closest_ft(prio, true);
 }
 
-static struct fs_prio *find_fwd_ns_prio(struct mlx5_flow_root_namespace *root,
-                                       struct mlx5_flow_namespace *ns)
-{
-       struct mlx5_flow_namespace *root_ns = &root->ns;
-       struct fs_prio *iter_prio;
-       struct fs_prio *prio;
-
-       fs_get_obj(prio, ns->node.parent);
-       list_for_each_entry(iter_prio, &root_ns->node.children, node.list) {
-               if (iter_prio == prio &&
-                   !list_is_last(&prio->node.children, &iter_prio->node.list))
-                       return list_next_entry(iter_prio, node.list);
-       }
-       return NULL;
-}
-
 static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
                                                struct mlx5_flow_act *flow_act)
 {
-       struct mlx5_flow_root_namespace *root = find_root(&ft->node);
        struct fs_prio *prio;
+       bool next_ns;
 
-       if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS)
-               prio = find_fwd_ns_prio(root, ft->ns);
-       else
-               fs_get_obj(prio, ft->node.parent);
+       next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+       fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
 
-       return (prio) ? find_next_chained_ft(prio) : NULL;
+       return find_next_chained_ft(prio);
 }
 
 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
index ef0706d..2d55b7c 100644 (file)
@@ -273,17 +273,17 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
        if (rq->extts.index >= clock->ptp_info.n_pins)
                return -EINVAL;
 
+       pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+       if (pin < 0)
+               return -EBUSY;
+
        if (on) {
-               pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
-               if (pin < 0)
-                       return -EBUSY;
                pin_mode = MLX5_PIN_MODE_IN;
                pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
                field_select = MLX5_MTPPS_FS_PIN_MODE |
                               MLX5_MTPPS_FS_PATTERN |
                               MLX5_MTPPS_FS_ENABLE;
        } else {
-               pin = rq->extts.index;
                field_select = MLX5_MTPPS_FS_ENABLE;
        }
 
@@ -331,12 +331,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
        if (rq->perout.index >= clock->ptp_info.n_pins)
                return -EINVAL;
 
-       if (on) {
-               pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
-                                  rq->perout.index);
-               if (pin < 0)
-                       return -EBUSY;
+       pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
+                          rq->perout.index);
+       if (pin < 0)
+               return -EBUSY;
 
+       if (on) {
                pin_mode = MLX5_PIN_MODE_OUT;
                pattern = MLX5_OUT_PATTERN_PERIODIC;
                ts.tv_sec = rq->perout.period.sec;
@@ -362,7 +362,6 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
                               MLX5_MTPPS_FS_ENABLE |
                               MLX5_MTPPS_FS_TIME_STAMP;
        } else {
-               pin = rq->perout.index;
                field_select = MLX5_MTPPS_FS_ENABLE;
        }
 
@@ -409,10 +408,31 @@ static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
        return 0;
 }
 
+enum {
+       MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0),
+       MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1),
+};
+
 static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
                           enum ptp_pin_function func, unsigned int chan)
 {
-       return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0;
+       struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+                                               ptp_info);
+
+       switch (func) {
+       case PTP_PF_NONE:
+               return 0;
+       case PTP_PF_EXTTS:
+               return !(clock->pps_info.pin_caps[pin] &
+                        MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN);
+       case PTP_PF_PEROUT:
+               return !(clock->pps_info.pin_caps[pin] &
+                        MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT);
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
 }
 
 static const struct ptp_clock_info mlx5_ptp_clock_info = {
@@ -432,6 +452,38 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
        .verify         = NULL,
 };
 
+static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
+                                    u32 *mtpps, u32 mtpps_size)
+{
+       u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+
+       MLX5_SET(mtpps_reg, in, pin, pin);
+
+       return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+                                   mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
+{
+       struct mlx5_core_dev *mdev = clock->mdev;
+       u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
+       u8 mode;
+       int err;
+
+       err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out));
+       if (err || !MLX5_GET(mtpps_reg, out, enable))
+               return PTP_PF_NONE;
+
+       mode = MLX5_GET(mtpps_reg, out, pin_mode);
+
+       if (mode == MLX5_PIN_MODE_IN)
+               return PTP_PF_EXTTS;
+       else if (mode == MLX5_PIN_MODE_OUT)
+               return PTP_PF_PEROUT;
+
+       return PTP_PF_NONE;
+}
+
 static int mlx5_init_pin_config(struct mlx5_clock *clock)
 {
        int i;
@@ -451,8 +503,8 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock)
                         sizeof(clock->ptp_info.pin_config[i].name),
                         "mlx5_pps%d", i);
                clock->ptp_info.pin_config[i].index = i;
-               clock->ptp_info.pin_config[i].func = PTP_PF_NONE;
-               clock->ptp_info.pin_config[i].chan = i;
+               clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
+               clock->ptp_info.pin_config[i].chan = 0;
        }
 
        return 0;
index d6d6fe6..71b6185 100644 (file)
@@ -1814,7 +1814,7 @@ static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core,
        err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans,
                                    bulk_list, cb, cb_priv, tid);
        if (err) {
-               kfree(trans);
+               kfree_rcu(trans, rcu);
                return err;
        }
        return 0;
@@ -2051,11 +2051,13 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
                        break;
                }
        }
-       rcu_read_unlock();
-       if (!found)
+       if (!found) {
+               rcu_read_unlock();
                goto drop;
+       }
 
        rxl->func(skb, local_port, rxl_item->priv);
+       rcu_read_unlock();
        return;
 
 drop:
index fcb88d4..8ac987c 100644 (file)
@@ -5536,6 +5536,7 @@ enum mlxsw_reg_htgt_trap_group {
        MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_EXTERNAL_ROUTE,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
index 019ed50..0521e9d 100644 (file)
@@ -5001,15 +5001,6 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 
 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
 {
-       /* Packets with link-local destination IP arriving to the router
-        * are trapped to the CPU, so no need to program specific routes
-        * for them. Only allow prefix routes (usually one fe80::/64) so
-        * that packets are trapped for the right reason.
-        */
-       if ((ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL) &&
-           (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)))
-               return true;
-
        /* Multicast routes aren't supported, so ignore them. Neighbour
         * Discovery packets are specifically trapped.
         */
@@ -8078,16 +8069,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp->router = router;
        router->mlxsw_sp = mlxsw_sp;
 
-       router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
-       err = register_inetaddr_notifier(&router->inetaddr_nb);
-       if (err)
-               goto err_register_inetaddr_notifier;
-
-       router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
-       err = register_inet6addr_notifier(&router->inet6addr_nb);
-       if (err)
-               goto err_register_inet6addr_notifier;
-
        INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
        err = __mlxsw_sp_router_init(mlxsw_sp);
        if (err)
@@ -8128,12 +8109,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_neigh_init;
 
-       mlxsw_sp->router->netevent_nb.notifier_call =
-               mlxsw_sp_router_netevent_event;
-       err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
-       if (err)
-               goto err_register_netevent_notifier;
-
        err = mlxsw_sp_mp_hash_init(mlxsw_sp);
        if (err)
                goto err_mp_hash_init;
@@ -8142,6 +8117,22 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_dscp_init;
 
+       router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
+       err = register_inetaddr_notifier(&router->inetaddr_nb);
+       if (err)
+               goto err_register_inetaddr_notifier;
+
+       router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
+       err = register_inet6addr_notifier(&router->inet6addr_nb);
+       if (err)
+               goto err_register_inet6addr_notifier;
+
+       mlxsw_sp->router->netevent_nb.notifier_call =
+               mlxsw_sp_router_netevent_event;
+       err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+       if (err)
+               goto err_register_netevent_notifier;
+
        mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
        err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
                                    &mlxsw_sp->router->fib_nb,
@@ -8152,10 +8143,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
        return 0;
 
 err_register_fib_notifier:
-err_dscp_init:
-err_mp_hash_init:
        unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
 err_register_netevent_notifier:
+       unregister_inet6addr_notifier(&router->inet6addr_nb);
+err_register_inet6addr_notifier:
+       unregister_inetaddr_notifier(&router->inetaddr_nb);
+err_register_inetaddr_notifier:
+       mlxsw_core_flush_owq();
+err_dscp_init:
+err_mp_hash_init:
        mlxsw_sp_neigh_fini(mlxsw_sp);
 err_neigh_init:
        mlxsw_sp_vrs_fini(mlxsw_sp);
@@ -8174,10 +8170,6 @@ err_ipips_init:
 err_rifs_init:
        __mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
-       unregister_inet6addr_notifier(&router->inet6addr_nb);
-err_register_inet6addr_notifier:
-       unregister_inetaddr_notifier(&router->inetaddr_nb);
-err_register_inetaddr_notifier:
        mutex_destroy(&mlxsw_sp->router->lock);
        kfree(mlxsw_sp->router);
        return err;
@@ -8188,6 +8180,9 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
        unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
                                &mlxsw_sp->router->fib_nb);
        unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+       unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
+       unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
+       mlxsw_core_flush_owq();
        mlxsw_sp_neigh_fini(mlxsw_sp);
        mlxsw_sp_vrs_fini(mlxsw_sp);
        mlxsw_sp_mr_fini(mlxsw_sp);
@@ -8197,8 +8192,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
        mlxsw_sp_ipips_fini(mlxsw_sp);
        mlxsw_sp_rifs_fini(mlxsw_sp);
        __mlxsw_sp_router_fini(mlxsw_sp);
-       unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
-       unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
        mutex_destroy(&mlxsw_sp->router->lock);
        kfree(mlxsw_sp->router);
 }
index 157a42c..1e38dfe 100644 (file)
@@ -328,6 +328,9 @@ mlxsw_sp_trap_policer_items_arr[] = {
        {
                .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 128),
        },
+       {
+               .policer = MLXSW_SP_TRAP_POLICER(19, 1024, 512),
+       },
 };
 
 static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
@@ -421,6 +424,11 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
                .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
                .priority = 2,
        },
+       {
+               .group = DEVLINK_TRAP_GROUP_GENERIC(EXTERNAL_DELIVERY, 19),
+               .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_EXTERNAL_ROUTE,
+               .priority = 1,
+       },
        {
                .group = DEVLINK_TRAP_GROUP_GENERIC(IPV6, 15),
                .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6,
@@ -882,11 +890,11 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
                },
        },
        {
-               .trap = MLXSW_SP_TRAP_CONTROL(EXTERNAL_ROUTE, LOCAL_DELIVERY,
+               .trap = MLXSW_SP_TRAP_CONTROL(EXTERNAL_ROUTE, EXTERNAL_DELIVERY,
                                              TRAP),
                .listeners_arr = {
-                       MLXSW_SP_RXL_MARK(RTR_INGRESS0, IP2ME, TRAP_TO_CPU,
-                                         false),
+                       MLXSW_SP_RXL_MARK(RTR_INGRESS0, EXTERNAL_ROUTE,
+                                         TRAP_TO_CPU, false),
                },
        },
        {
index 9cfe1fd..f17da67 100644 (file)
@@ -748,21 +748,21 @@ void ocelot_get_txtstamp(struct ocelot *ocelot)
 
                spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
 
-               /* Next ts */
-               ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
+               /* Get the h/w timestamp */
+               ocelot_get_hwtimestamp(ocelot, &ts);
 
                if (unlikely(!skb_match))
                        continue;
 
-               /* Get the h/w timestamp */
-               ocelot_get_hwtimestamp(ocelot, &ts);
-
                /* Set the timestamp into the skb */
                memset(&shhwtstamps, 0, sizeof(shhwtstamps));
                shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
                skb_tstamp_tx(skb_match, &shhwtstamps);
 
                dev_kfree_skb_any(skb_match);
+
+               /* Next ts */
+               ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
        }
 }
 EXPORT_SYMBOL(ocelot_get_txtstamp);
index d2708a5..4075f5e 100644 (file)
@@ -1299,19 +1299,21 @@ static int nixge_probe(struct platform_device *pdev)
        netif_napi_add(ndev, &priv->napi, nixge_poll, NAPI_POLL_WEIGHT);
        err = nixge_of_get_resources(pdev);
        if (err)
-               return err;
+               goto free_netdev;
        __nixge_hw_set_mac_address(ndev);
 
        priv->tx_irq = platform_get_irq_byname(pdev, "tx");
        if (priv->tx_irq < 0) {
                netdev_err(ndev, "could not find 'tx' irq");
-               return priv->tx_irq;
+               err = priv->tx_irq;
+               goto free_netdev;
        }
 
        priv->rx_irq = platform_get_irq_byname(pdev, "rx");
        if (priv->rx_irq < 0) {
                netdev_err(ndev, "could not find 'rx' irq");
-               return priv->rx_irq;
+               err = priv->rx_irq;
+               goto free_netdev;
        }
 
        priv->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
index 5fd31ba..e55d415 100644 (file)
@@ -2001,7 +2001,7 @@ int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg)
                netif_device_detach(lif->netdev);
                err = ionic_stop(lif->netdev);
                if (err)
-                       return err;
+                       goto reset_out;
        }
 
        if (cb)
@@ -2011,6 +2011,8 @@ int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg)
                err = ionic_open(lif->netdev);
                netif_device_attach(lif->netdev);
        }
+
+reset_out:
        mutex_unlock(&lif->queue_lock);
 
        return err;
index 5f123a8..d2fdb54 100644 (file)
@@ -2261,12 +2261,14 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs,
 
        minor = get_free_serial_index();
        if (minor < 0)
-               goto exit;
+               goto exit2;
 
        /* register our minor number */
        serial->parent->dev = tty_port_register_device_attr(&serial->port,
                        tty_drv, minor, &serial->parent->interface->dev,
                        serial->parent, hso_serial_dev_groups);
+       if (IS_ERR(serial->parent->dev))
+               goto exit2;
 
        /* fill in specific data for later use */
        serial->minor = minor;
@@ -2311,6 +2313,7 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs,
        return 0;
 exit:
        hso_serial_tty_unregister(serial);
+exit2:
        hso_serial_common_free(serial);
        return -1;
 }
index eccbf4c..442507f 100644 (file)
@@ -377,10 +377,6 @@ struct lan78xx_net {
        struct tasklet_struct   bh;
        struct delayed_work     wq;
 
-       struct usb_host_endpoint *ep_blkin;
-       struct usb_host_endpoint *ep_blkout;
-       struct usb_host_endpoint *ep_intr;
-
        int                     msg_enable;
 
        struct urb              *urb_intr;
@@ -2860,78 +2856,12 @@ lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
        return NETDEV_TX_OK;
 }
 
-static int
-lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf)
-{
-       int tmp;
-       struct usb_host_interface *alt = NULL;
-       struct usb_host_endpoint *in = NULL, *out = NULL;
-       struct usb_host_endpoint *status = NULL;
-
-       for (tmp = 0; tmp < intf->num_altsetting; tmp++) {
-               unsigned ep;
-
-               in = NULL;
-               out = NULL;
-               status = NULL;
-               alt = intf->altsetting + tmp;
-
-               for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
-                       struct usb_host_endpoint *e;
-                       int intr = 0;
-
-                       e = alt->endpoint + ep;
-                       switch (e->desc.bmAttributes) {
-                       case USB_ENDPOINT_XFER_INT:
-                               if (!usb_endpoint_dir_in(&e->desc))
-                                       continue;
-                               intr = 1;
-                               /* FALLTHROUGH */
-                       case USB_ENDPOINT_XFER_BULK:
-                               break;
-                       default:
-                               continue;
-                       }
-                       if (usb_endpoint_dir_in(&e->desc)) {
-                               if (!intr && !in)
-                                       in = e;
-                               else if (intr && !status)
-                                       status = e;
-                       } else {
-                               if (!out)
-                                       out = e;
-                       }
-               }
-               if (in && out)
-                       break;
-       }
-       if (!alt || !in || !out)
-               return -EINVAL;
-
-       dev->pipe_in = usb_rcvbulkpipe(dev->udev,
-                                      in->desc.bEndpointAddress &
-                                      USB_ENDPOINT_NUMBER_MASK);
-       dev->pipe_out = usb_sndbulkpipe(dev->udev,
-                                       out->desc.bEndpointAddress &
-                                       USB_ENDPOINT_NUMBER_MASK);
-       dev->ep_intr = status;
-
-       return 0;
-}
-
 static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 {
        struct lan78xx_priv *pdata = NULL;
        int ret;
        int i;
 
-       ret = lan78xx_get_endpoints(dev, intf);
-       if (ret) {
-               netdev_warn(dev->net, "lan78xx_get_endpoints failed: %d\n",
-                           ret);
-               return ret;
-       }
-
        dev->data[0] = (unsigned long)kzalloc(sizeof(*pdata), GFP_KERNEL);
 
        pdata = (struct lan78xx_priv *)(dev->data[0]);
@@ -3700,6 +3630,7 @@ static void lan78xx_stat_monitor(struct timer_list *t)
 static int lan78xx_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
 {
+       struct usb_host_endpoint *ep_blkin, *ep_blkout, *ep_intr;
        struct lan78xx_net *dev;
        struct net_device *netdev;
        struct usb_device *udev;
@@ -3748,6 +3679,34 @@ static int lan78xx_probe(struct usb_interface *intf,
 
        mutex_init(&dev->stats.access_lock);
 
+       if (intf->cur_altsetting->desc.bNumEndpoints < 3) {
+               ret = -ENODEV;
+               goto out2;
+       }
+
+       dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE);
+       ep_blkin = usb_pipe_endpoint(udev, dev->pipe_in);
+       if (!ep_blkin || !usb_endpoint_is_bulk_in(&ep_blkin->desc)) {
+               ret = -ENODEV;
+               goto out2;
+       }
+
+       dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE);
+       ep_blkout = usb_pipe_endpoint(udev, dev->pipe_out);
+       if (!ep_blkout || !usb_endpoint_is_bulk_out(&ep_blkout->desc)) {
+               ret = -ENODEV;
+               goto out2;
+       }
+
+       ep_intr = &intf->cur_altsetting->endpoint[2];
+       if (!usb_endpoint_is_int_in(&ep_intr->desc)) {
+               ret = -ENODEV;
+               goto out2;
+       }
+
+       dev->pipe_intr = usb_rcvintpipe(dev->udev,
+                                       usb_endpoint_num(&ep_intr->desc));
+
        ret = lan78xx_bind(dev, intf);
        if (ret < 0)
                goto out2;
@@ -3759,18 +3718,7 @@ static int lan78xx_probe(struct usb_interface *intf,
        netdev->max_mtu = MAX_SINGLE_PACKET_SIZE;
        netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER);
 
-       dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0;
-       dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1;
-       dev->ep_intr = (intf->cur_altsetting)->endpoint + 2;
-
-       dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE);
-       dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE);
-
-       dev->pipe_intr = usb_rcvintpipe(dev->udev,
-                                       dev->ep_intr->desc.bEndpointAddress &
-                                       USB_ENDPOINT_NUMBER_MASK);
-       period = dev->ep_intr->desc.bInterval;
-
+       period = ep_intr->desc.bInterval;
        maxp = usb_maxpacket(dev->udev, dev->pipe_intr, 0);
        buf = kmalloc(maxp, GFP_KERNEL);
        if (buf) {
@@ -3783,6 +3731,7 @@ static int lan78xx_probe(struct usb_interface *intf,
                        usb_fill_int_urb(dev->urb_intr, dev->udev,
                                         dev->pipe_intr, buf, maxp,
                                         intr_complete, dev, period);
+                       dev->urb_intr->transfer_flags |= URB_FREE_BUFFER;
                }
        }
 
index 89d85dc..a7c3939 100644 (file)
@@ -1376,6 +1376,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
        for (h = 0; h < FDB_HASH_SIZE; ++h) {
                struct vxlan_fdb *f;
 
+               rcu_read_lock();
                hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
                        struct vxlan_rdst *rd;
 
@@ -1387,8 +1388,10 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                                                     cb->nlh->nlmsg_seq,
                                                     RTM_NEWNEIGH,
                                                     NLM_F_MULTI, NULL);
-                               if (err < 0)
+                               if (err < 0) {
+                                       rcu_read_unlock();
                                        goto out;
+                               }
 skip_nh:
                                *idx += 1;
                                continue;
@@ -1403,12 +1406,15 @@ skip_nh:
                                                     cb->nlh->nlmsg_seq,
                                                     RTM_NEWNEIGH,
                                                     NLM_F_MULTI, rd);
-                               if (err < 0)
+                               if (err < 0) {
+                                       rcu_read_unlock();
                                        goto out;
+                               }
 skip:
                                *idx += 1;
                        }
                }
+               rcu_read_unlock();
        }
 out:
        return err;
@@ -3070,8 +3076,10 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
                        if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
                                continue;
                        /* the all_zeros_mac entry is deleted at vxlan_uninit */
-                       if (!is_zero_ether_addr(f->eth_addr))
-                               vxlan_fdb_destroy(vxlan, f, true, true);
+                       if (is_zero_ether_addr(f->eth_addr) &&
+                           f->vni == vxlan->cfg.vni)
+                               continue;
+                       vxlan_fdb_destroy(vxlan, f, true, true);
                }
                spin_unlock_bh(&vxlan->hash_lock[h]);
        }
index 39030a3..1f71838 100644 (file)
@@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
        return err;
 }
 
-static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t nd_blk_submit_bio(struct bio *bio)
 {
        struct bio_integrity_payload *bip;
        struct nd_namespace_blk *nsblk = bio->bi_disk->private_data;
@@ -225,6 +225,7 @@ static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
 
 static const struct block_device_operations nd_blk_fops = {
        .owner = THIS_MODULE,
+       .submit_bio =  nd_blk_submit_bio,
        .revalidate_disk = nvdimm_revalidate_disk,
 };
 
@@ -250,7 +251,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
        internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
        available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
 
-       q = blk_alloc_queue(nd_blk_make_request, NUMA_NO_NODE);
+       q = blk_alloc_queue(NUMA_NO_NODE);
        if (!q)
                return -ENOMEM;
        if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
index 48e9d16..412d21d 100644 (file)
@@ -1439,7 +1439,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
        return ret;
 }
 
-static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t btt_submit_bio(struct bio *bio)
 {
        struct bio_integrity_payload *bip = bio_integrity(bio);
        struct btt *btt = bio->bi_disk->private_data;
@@ -1512,6 +1512,7 @@ static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
 
 static const struct block_device_operations btt_fops = {
        .owner =                THIS_MODULE,
+       .submit_bio =           btt_submit_bio,
        .rw_page =              btt_rw_page,
        .getgeo =               btt_getgeo,
        .revalidate_disk =      nvdimm_revalidate_disk,
@@ -1523,7 +1524,7 @@ static int btt_blk_init(struct btt *btt)
        struct nd_namespace_common *ndns = nd_btt->ndns;
 
        /* create a new disk and request queue for btt */
-       btt->btt_queue = blk_alloc_queue(btt_make_request, NUMA_NO_NODE);
+       btt->btt_queue = blk_alloc_queue(NUMA_NO_NODE);
        if (!btt->btt_queue)
                return -ENOMEM;
 
index d25e66f..94790e6 100644 (file)
@@ -189,7 +189,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem,
        return rc;
 }
 
-static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pmem_submit_bio(struct bio *bio)
 {
        int ret = 0;
        blk_status_t rc = 0;
@@ -281,6 +281,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
 
 static const struct block_device_operations pmem_fops = {
        .owner =                THIS_MODULE,
+       .submit_bio =           pmem_submit_bio,
        .rw_page =              pmem_rw_page,
        .revalidate_disk =      nvdimm_revalidate_disk,
 };
@@ -423,7 +424,7 @@ static int pmem_attach_disk(struct device *dev,
                return -EBUSY;
        }
 
-       q = blk_alloc_queue(pmem_make_request, dev_to_node(dev));
+       q = blk_alloc_queue(dev_to_node(dev));
        if (!q)
                return -ENOMEM;
 
index add0401..6bdcdd9 100644 (file)
@@ -304,7 +304,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
                return true;
 
        nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
-       blk_mq_force_complete_rq(req);
+       blk_mq_complete_request(req);
        return true;
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@ -1102,6 +1102,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
        int pos;
        int len;
 
+       if (ctrl->quirks & NVME_QUIRK_NO_NS_DESC_LIST)
+               return 0;
+
        c.identify.opcode = nvme_admin_identify;
        c.identify.nsid = cpu_to_le32(nsid);
        c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
@@ -1115,18 +1118,6 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
        if (status) {
                dev_warn(ctrl->device,
                        "Identify Descriptors failed (%d)\n", status);
-                /*
-                 * Don't treat non-retryable errors as fatal, as we potentially
-                 * already have a NGUID or EUI-64.  If we failed with DNR set,
-                 * we want to silently ignore the error as we can still
-                 * identify the device, but if the status has DNR set, we want
-                 * to propagate the error back specifically for the disk
-                 * revalidation flow to make sure we don't abandon the
-                 * device just because of a temporal retry-able error (such
-                 * as path of transport errors).
-                 */
-               if (status > 0 && (status & NVME_SC_DNR))
-                       status = 0;
                goto free_data;
        }
 
@@ -2184,6 +2175,7 @@ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
 
 const struct block_device_operations nvme_ns_head_ops = {
        .owner          = THIS_MODULE,
+       .submit_bio     = nvme_ns_head_submit_bio,
        .open           = nvme_ns_head_open,
        .release        = nvme_ns_head_release,
        .ioctl          = nvme_ioctl,
index e999a8c..6aa30bb 100644 (file)
@@ -227,6 +227,7 @@ static DECLARE_COMPLETION(nvme_fc_unload_proceed);
  */
 static struct device *fc_udev_device;
 
+static void nvme_fc_complete_rq(struct request *rq);
 
 /* *********************** FC-NVME Port Management ************************ */
 
@@ -2033,7 +2034,8 @@ done:
        }
 
        __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
-       nvme_end_request(rq, status, result);
+       if (!nvme_end_request(rq, status, result))
+               nvme_fc_complete_rq(rq);
 
 check_error:
        if (terminate_assoc)
index 6650947..5a37a59 100644 (file)
@@ -291,8 +291,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
        return false;
 }
 
-static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
-               struct bio *bio)
+blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
 {
        struct nvme_ns_head *head = bio->bi_disk->private_data;
        struct device *dev = disk_to_dev(head->disk);
@@ -301,12 +300,11 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
        int srcu_idx;
 
        /*
-        * The namespace might be going away and the bio might
-        * be moved to a different queue via blk_steal_bios(),
-        * so we need to use the bio_split pool from the original
-        * queue to allocate the bvecs from.
+        * The namespace might be going away and the bio might be moved to a
+        * different queue via blk_steal_bios(), so we need to use the bio_split
+        * pool from the original queue to allocate the bvecs from.
         */
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        srcu_idx = srcu_read_lock(&head->srcu);
        ns = nvme_find_path(head);
@@ -316,7 +314,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
                trace_block_bio_remap(bio->bi_disk->queue, bio,
                                      disk_devt(ns->head->disk),
                                      bio->bi_iter.bi_sector);
-               ret = direct_make_request(bio);
+               ret = submit_bio_noacct(bio);
        } else if (nvme_available_path(head)) {
                dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
 
@@ -353,7 +351,7 @@ static void nvme_requeue_work(struct work_struct *work)
                 * path.
                 */
                bio->bi_disk = head->disk;
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
        }
 }
 
@@ -375,7 +373,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
        if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
                return 0;
 
-       q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
+       q = blk_alloc_queue(ctrl->numa_node);
        if (!q)
                goto out;
        blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
index 1de3f9b..9c5b82a 100644 (file)
@@ -129,6 +129,13 @@ enum nvme_quirks {
         * Don't change the value of the temperature threshold feature
         */
        NVME_QUIRK_NO_TEMP_THRESH_CHANGE        = (1 << 14),
+
+       /*
+        * The controller doesn't handle the Identify Namespace
+        * Identification Descriptor list subcommand despite claiming
+        * NVMe 1.3 compliance.
+        */
+       NVME_QUIRK_NO_NS_DESC_LIST              = (1 << 15),
 };
 
 /*
@@ -474,7 +481,7 @@ static inline u32 nvme_bytes_to_numd(size_t len)
        return (len >> 2) - 1;
 }
 
-static inline void nvme_end_request(struct request *req, __le16 status,
+static inline bool nvme_end_request(struct request *req, __le16 status,
                union nvme_result result)
 {
        struct nvme_request *rq = nvme_req(req);
@@ -483,7 +490,9 @@ static inline void nvme_end_request(struct request *req, __le16 status,
        rq->result = result;
        /* inject error when permitted by fault injection framework */
        nvme_should_fail(req);
-       blk_mq_complete_request(req);
+       if (unlikely(blk_should_fake_timeout(req->q)))
+               return true;
+       return blk_mq_complete_request_remote(req);
 }
 
 static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
@@ -586,6 +595,7 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+blk_qc_t nvme_ns_head_submit_bio(struct bio *bio);
 
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 {
index b1d18f0..0c85680 100644 (file)
@@ -963,7 +963,8 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 
        req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
        trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
-       nvme_end_request(req, cqe->status, cqe->result);
+       if (!nvme_end_request(req, cqe->status, cqe->result))
+               nvme_pci_complete_rq(req);
 }
 
 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
@@ -3099,6 +3100,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                .driver_data = NVME_QUIRK_IDENTIFY_CNS |
                                NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+       { PCI_DEVICE(0x126f, 0x2263),   /* Silicon Motion unidentified */
+               .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
        { PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
@@ -3122,6 +3125,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE(0x1cc1, 0x8201),   /* ADATA SX8200PNP 512GB */
                .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
                                NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+       { PCI_DEVICE(0x1c5c, 0x1504),   /* SK Hynix PC400 */
+               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
                .driver_data = NVME_QUIRK_SINGLE_VECTOR },
index 13506a8..e881f87 100644 (file)
@@ -149,6 +149,7 @@ MODULE_PARM_DESC(register_always,
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                struct rdma_cm_event *event);
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvme_rdma_complete_rq(struct request *rq);
 
 static const struct blk_mq_ops nvme_rdma_mq_ops;
 static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
@@ -1149,6 +1150,16 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
        queue_work(nvme_reset_wq, &ctrl->err_work);
 }
 
+static void nvme_rdma_end_request(struct nvme_rdma_request *req)
+{
+       struct request *rq = blk_mq_rq_from_pdu(req);
+
+       if (!refcount_dec_and_test(&req->ref))
+               return;
+       if (!nvme_end_request(rq, req->status, req->result))
+               nvme_rdma_complete_rq(rq);
+}
+
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
                const char *op)
 {
@@ -1173,16 +1184,11 @@ static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
 {
        struct nvme_rdma_request *req =
                container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
-       struct request *rq = blk_mq_rq_from_pdu(req);
 
-       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
                nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
-               return;
-       }
-
-       if (refcount_dec_and_test(&req->ref))
-               nvme_end_request(rq, req->status, req->result);
-
+       else
+               nvme_rdma_end_request(req);
 }
 
 static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
@@ -1547,15 +1553,11 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
                container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
        struct nvme_rdma_request *req =
                container_of(qe, struct nvme_rdma_request, sqe);
-       struct request *rq = blk_mq_rq_from_pdu(req);
 
-       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
                nvme_rdma_wr_error(cq, wc, "SEND");
-               return;
-       }
-
-       if (refcount_dec_and_test(&req->ref))
-               nvme_end_request(rq, req->status, req->result);
+       else
+               nvme_rdma_end_request(req);
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
@@ -1697,8 +1699,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                return;
        }
 
-       if (refcount_dec_and_test(&req->ref))
-               nvme_end_request(rq, req->status, req->result);
+       nvme_rdma_end_request(req);
 }
 
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
index 79ef2b8..472f900 100644 (file)
@@ -464,7 +464,8 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
                return -EINVAL;
        }
 
-       nvme_end_request(rq, cqe->status, cqe->result);
+       if (!nvme_end_request(rq, cqe->status, cqe->result))
+               nvme_complete_rq(rq);
        queue->nr_cqe++;
 
        return 0;
@@ -654,7 +655,8 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
 {
        union nvme_result res = {};
 
-       nvme_end_request(rq, cpu_to_le16(status << 1), res);
+       if (!nvme_end_request(rq, cpu_to_le16(status << 1), res))
+               nvme_complete_rq(rq);
 }
 
 static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
@@ -1382,6 +1384,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
        if (nctrl->opts->tos >= 0)
                ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
 
+       /* Set 10 seconds timeout for icresp recvmsg */
+       queue->sock->sk->sk_rcvtimeo = 10 * HZ;
+
        queue->sock->sk->sk_allocation = GFP_ATOMIC;
        nvme_tcp_set_queue_io_cpu(queue);
        queue->request = NULL;
index 6e2f623..6816507 100644 (file)
@@ -467,7 +467,7 @@ static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
                return -EINVAL;
        }
 
-       if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
+       if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) {
                pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
                       ns->device_path);
                return -EINVAL;
index 6344e73..8a0d4fe 100644 (file)
@@ -116,7 +116,8 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
                        return;
                }
 
-               nvme_end_request(rq, cqe->status, cqe->result);
+               if (!nvme_end_request(rq, cqe->status, cqe->result))
+                       nvme_loop_complete_rq(rq);
        }
 }
 
index ae03b12..ea44fea 100644 (file)
@@ -2201,15 +2201,15 @@ int of_find_last_cache_level(unsigned int cpu)
 }
 
 /**
- * of_map_rid - Translate a requester ID through a downstream mapping.
+ * of_map_id - Translate an ID through a downstream mapping.
  * @np: root complex device node.
- * @rid: device requester ID to map.
+ * @id: device ID to map.
  * @map_name: property name of the map to use.
  * @map_mask_name: optional property name of the mask to use.
  * @target: optional pointer to a target device node.
  * @id_out: optional pointer to receive the translated ID.
  *
- * Given a device requester ID, look up the appropriate implementation-defined
+ * Given a device ID, look up the appropriate implementation-defined
  * platform ID and/or the target device which receives transactions on that
  * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
  * @id_out may be NULL if only the other is required. If @target points to
@@ -2219,11 +2219,11 @@ int of_find_last_cache_level(unsigned int cpu)
  *
  * Return: 0 on success or a standard error code on failure.
  */
-int of_map_rid(struct device_node *np, u32 rid,
+int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out)
 {
-       u32 map_mask, masked_rid;
+       u32 map_mask, masked_id;
        int map_len;
        const __be32 *map = NULL;
 
@@ -2235,7 +2235,7 @@ int of_map_rid(struct device_node *np, u32 rid,
                if (target)
                        return -ENODEV;
                /* Otherwise, no map implies no translation */
-               *id_out = rid;
+               *id_out = id;
                return 0;
        }
 
@@ -2255,22 +2255,22 @@ int of_map_rid(struct device_node *np, u32 rid,
        if (map_mask_name)
                of_property_read_u32(np, map_mask_name, &map_mask);
 
-       masked_rid = map_mask & rid;
+       masked_id = map_mask & id;
        for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
                struct device_node *phandle_node;
-               u32 rid_base = be32_to_cpup(map + 0);
+               u32 id_base = be32_to_cpup(map + 0);
                u32 phandle = be32_to_cpup(map + 1);
                u32 out_base = be32_to_cpup(map + 2);
-               u32 rid_len = be32_to_cpup(map + 3);
+               u32 id_len = be32_to_cpup(map + 3);
 
-               if (rid_base & ~map_mask) {
-                       pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+               if (id_base & ~map_mask) {
+                       pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores id-base (0x%x)\n",
                                np, map_name, map_name,
-                               map_mask, rid_base);
+                               map_mask, id_base);
                        return -EFAULT;
                }
 
-               if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+               if (masked_id < id_base || masked_id >= id_base + id_len)
                        continue;
 
                phandle_node = of_find_node_by_phandle(phandle);
@@ -2288,20 +2288,20 @@ int of_map_rid(struct device_node *np, u32 rid,
                }
 
                if (id_out)
-                       *id_out = masked_rid - rid_base + out_base;
+                       *id_out = masked_id - id_base + out_base;
 
-               pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
-                       np, map_name, map_mask, rid_base, out_base,
-                       rid_len, rid, masked_rid - rid_base + out_base);
+               pr_debug("%pOF: %s, using mask %08x, id-base: %08x, out-base: %08x, length: %08x, id: %08x -> %08x\n",
+                       np, map_name, map_mask, id_base, out_base,
+                       id_len, id, masked_id - id_base + out_base);
                return 0;
        }
 
-       pr_info("%pOF: no %s translation for rid 0x%x on %pOF\n", np, map_name,
-               rid, target && *target ? *target : NULL);
+       pr_info("%pOF: no %s translation for id 0x%x on %pOF\n", np, map_name,
+               id, target && *target ? *target : NULL);
 
        /* Bypasses translation */
        if (id_out)
-               *id_out = rid;
+               *id_out = id;
        return 0;
 }
-EXPORT_SYMBOL_GPL(of_map_rid);
+EXPORT_SYMBOL_GPL(of_map_id);
index 27203bf..b439c1e 100644 (file)
@@ -78,6 +78,7 @@ int of_device_add(struct platform_device *ofdev)
  * @np:                Pointer to OF node having DMA configuration
  * @force_dma:  Whether device is to be set up by of_dma_configure() even if
  *             DMA capability is not explicitly described by firmware.
+ * @id:                Optional const pointer value input id
  *
  * Try to get devices's DMA configuration from DT and update it
  * accordingly.
@@ -86,7 +87,8 @@ int of_device_add(struct platform_device *ofdev)
  * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
  * to fix up DMA configuration.
  */
-int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
+int of_dma_configure_id(struct device *dev, struct device_node *np,
+                       bool force_dma, const u32 *id)
 {
        u64 dma_addr, paddr, size = 0;
        int ret;
@@ -160,7 +162,7 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
        dev_dbg(dev, "device is%sdma coherent\n",
                coherent ? " " : " not ");
 
-       iommu = of_iommu_configure(dev, np);
+       iommu = of_iommu_configure(dev, np, id);
        if (PTR_ERR(iommu) == -EPROBE_DEFER)
                return -EPROBE_DEFER;
 
@@ -171,7 +173,7 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(of_dma_configure);
+EXPORT_SYMBOL_GPL(of_dma_configure_id);
 
 int of_device_register(struct platform_device *pdev)
 {
index a296eaf..25d17b8 100644 (file)
@@ -576,55 +576,57 @@ err:
        }
 }
 
-static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
-                           u32 rid_in)
+static u32 __of_msi_map_id(struct device *dev, struct device_node **np,
+                           u32 id_in)
 {
        struct device *parent_dev;
-       u32 rid_out = rid_in;
+       u32 id_out = id_in;
 
        /*
         * Walk up the device parent links looking for one with a
         * "msi-map" property.
         */
        for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
-               if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map",
-                               "msi-map-mask", np, &rid_out))
+               if (!of_map_id(parent_dev->of_node, id_in, "msi-map",
+                               "msi-map-mask", np, &id_out))
                        break;
-       return rid_out;
+       return id_out;
 }
 
 /**
- * of_msi_map_rid - Map a MSI requester ID for a device.
+ * of_msi_map_id - Map a MSI ID for a device.
  * @dev: device for which the mapping is to be done.
  * @msi_np: device node of the expected msi controller.
- * @rid_in: unmapped MSI requester ID for the device.
+ * @id_in: unmapped MSI ID for the device.
  *
  * Walk up the device hierarchy looking for devices with a "msi-map"
- * property.  If found, apply the mapping to @rid_in.
+ * property.  If found, apply the mapping to @id_in.
  *
- * Returns the mapped MSI requester ID.
+ * Returns the mapped MSI ID.
  */
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
+u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in)
 {
-       return __of_msi_map_rid(dev, &msi_np, rid_in);
+       return __of_msi_map_id(dev, &msi_np, id_in);
 }
 
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
- * @rid: Requester ID for the device.
+ * @id: Device ID.
+ * @bus_token: Bus token
  *
  * Walk up the device hierarchy looking for devices with a "msi-map"
  * property.
  *
  * Returns: the MSI domain for this device (or NULL on failure)
  */
-struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
+struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id,
+                                               u32 bus_token)
 {
        struct device_node *np = NULL;
 
-       __of_msi_map_rid(dev, &np, rid);
-       return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
+       __of_msi_map_id(dev, &np, id);
+       return irq_find_matching_host(np, bus_token);
 }
 
 /**
index 9a64cf9..ebec0a6 100644 (file)
@@ -560,6 +560,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
        if (!vmd->bus) {
                pci_free_resource_list(&resources);
                irq_domain_remove(vmd->irq_domain);
+               irq_domain_free_fwnode(fn);
                return -ENODEV;
        }
 
@@ -673,6 +674,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd)
 static void vmd_remove(struct pci_dev *dev)
 {
        struct vmd_dev *vmd = pci_get_drvdata(dev);
+       struct fwnode_handle *fn = vmd->irq_domain->fwnode;
 
        sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
        pci_stop_root_bus(vmd->bus);
@@ -680,6 +682,7 @@ static void vmd_remove(struct pci_dev *dev)
        vmd_cleanup_srcu(vmd);
        vmd_detach_resources(vmd);
        irq_domain_remove(vmd->irq_domain);
+       irq_domain_free_fwnode(fn);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 6b43a54..19aeadb 100644 (file)
@@ -1535,8 +1535,8 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
        pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
 
        of_node = irq_domain_get_of_node(domain);
-       rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) :
-                       iort_msi_map_rid(&pdev->dev, rid);
+       rid = of_node ? of_msi_map_id(&pdev->dev, of_node, rid) :
+                       iort_msi_map_id(&pdev->dev, rid);
 
        return rid;
 }
@@ -1556,9 +1556,10 @@ struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
        u32 rid = pci_dev_id(pdev);
 
        pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
-       dom = of_msi_map_get_device_domain(&pdev->dev, rid);
+       dom = of_msi_map_get_device_domain(&pdev->dev, rid, DOMAIN_BUS_PCI_MSI);
        if (!dom)
-               dom = iort_get_device_domain(&pdev->dev, rid);
+               dom = iort_get_device_domain(&pdev->dev, rid,
+                                            DOMAIN_BUS_PCI_MSI);
        return dom;
 }
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
index da6510a..449466f 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/isolation.h>
 #include <linux/cpu.h>
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
@@ -333,6 +334,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
                          const struct pci_device_id *id)
 {
        int error, node, cpu;
+       int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
        struct drv_dev_and_id ddi = { drv, dev, id };
 
        /*
@@ -353,7 +355,8 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
            pci_physfn_is_probed(dev))
                cpu = nr_cpu_ids;
        else
-               cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+               cpu = cpumask_any_and(cpumask_of_node(node),
+                                     housekeeping_cpumask(hk_flags));
 
        if (cpu < nr_cpu_ids)
                error = work_on_cpu(cpu, local_pci_probe, &ddi);
index 812bfc3..2ea61ab 100644 (file)
@@ -2330,6 +2330,19 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s);
 
+static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev)
+{
+       pci_info(dev, "Disabling ASPM L0s/L1\n");
+       pci_disable_link_state(dev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+}
+
+/*
+ * ASM1083/1085 PCIe-PCI bridge devices cause AER timeout errors on the
+ * upstream PCIe root port when ASPM is enabled. At least L0s mode is affected;
+ * disable both L0s and L1 for now to be safe.
+ */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1);
+
 /*
  * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain
  * Link bit cleared after starting the link retrain process to allow this
index 4cdb35d..5274f7f 100644 (file)
@@ -756,8 +756,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
                .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
        };
 
-       res_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       smmu_pmu->reg_base = devm_ioremap_resource(dev, res_0);
+       smmu_pmu->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res_0);
        if (IS_ERR(smmu_pmu->reg_base))
                return PTR_ERR(smmu_pmu->reg_base);
 
index ff1ee15..f8ff30c 100644 (file)
@@ -7,6 +7,8 @@ config PINCTRL_MSM
        select PINCONF
        select GENERIC_PINCONF
        select GPIOLIB_IRQCHIP
+       select IRQ_DOMAIN_HIERARCHY
+       select IRQ_FASTEOI_HIERARCHY_HANDLERS
 
 config PINCTRL_APQ8064
        tristate "Qualcomm APQ8064 pin controller driver"
index 83b7d64..c322f30 100644 (file)
@@ -832,6 +832,52 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
        msm_gpio_irq_clear_unmask(d, false);
 }
 
+/**
+ * msm_gpio_update_dual_edge_parent() - Prime next edge for IRQs handled by parent.
+ * @d: The irq dta.
+ *
+ * This is much like msm_gpio_update_dual_edge_pos() but for IRQs that are
+ * normally handled by the parent irqchip.  The logic here is slightly
+ * different due to what's easy to do with our parent, but in principle it's
+ * the same.
+ */
+static void msm_gpio_update_dual_edge_parent(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
+       const struct msm_pingroup *g = &pctrl->soc->groups[d->hwirq];
+       int loop_limit = 100;
+       unsigned int val;
+       unsigned int type;
+
+       /* Read the value and make a guess about what edge we need to catch */
+       val = msm_readl_io(pctrl, g) & BIT(g->in_bit);
+       type = val ? IRQ_TYPE_EDGE_FALLING : IRQ_TYPE_EDGE_RISING;
+
+       do {
+               /* Set the parent to catch the next edge */
+               irq_chip_set_type_parent(d, type);
+
+               /*
+                * Possibly the line changed between when we last read "val"
+                * (and decided what edge we needed) and when set the edge.
+                * If the value didn't change (or changed and then changed
+                * back) then we're done.
+                */
+               val = msm_readl_io(pctrl, g) & BIT(g->in_bit);
+               if (type == IRQ_TYPE_EDGE_RISING) {
+                       if (!val)
+                               return;
+                       type = IRQ_TYPE_EDGE_FALLING;
+               } else if (type == IRQ_TYPE_EDGE_FALLING) {
+                       if (val)
+                               return;
+                       type = IRQ_TYPE_EDGE_RISING;
+               }
+       } while (loop_limit-- > 0);
+       dev_warn_once(pctrl->dev, "dual-edge irq failed to stabilize\n");
+}
+
 static void msm_gpio_irq_ack(struct irq_data *d)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -840,8 +886,11 @@ static void msm_gpio_irq_ack(struct irq_data *d)
        unsigned long flags;
        u32 val;
 
-       if (test_bit(d->hwirq, pctrl->skip_wake_irqs))
+       if (test_bit(d->hwirq, pctrl->skip_wake_irqs)) {
+               if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
+                       msm_gpio_update_dual_edge_parent(d);
                return;
+       }
 
        g = &pctrl->soc->groups[d->hwirq];
 
@@ -860,6 +909,17 @@ static void msm_gpio_irq_ack(struct irq_data *d)
        raw_spin_unlock_irqrestore(&pctrl->lock, flags);
 }
 
+static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d,
+                                                      unsigned int type)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
+
+       return type == IRQ_TYPE_EDGE_BOTH &&
+              pctrl->soc->wakeirq_dual_edge_errata && d->parent_data &&
+              test_bit(d->hwirq, pctrl->skip_wake_irqs);
+}
+
 static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -868,11 +928,21 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
        unsigned long flags;
        u32 val;
 
+       if (msm_gpio_needs_dual_edge_parent_workaround(d, type)) {
+               set_bit(d->hwirq, pctrl->dual_edge_irqs);
+               irq_set_handler_locked(d, handle_fasteoi_ack_irq);
+               msm_gpio_update_dual_edge_parent(d);
+               return 0;
+       }
+
        if (d->parent_data)
                irq_chip_set_type_parent(d, type);
 
-       if (test_bit(d->hwirq, pctrl->skip_wake_irqs))
+       if (test_bit(d->hwirq, pctrl->skip_wake_irqs)) {
+               clear_bit(d->hwirq, pctrl->dual_edge_irqs);
+               irq_set_handler_locked(d, handle_fasteoi_irq);
                return 0;
+       }
 
        g = &pctrl->soc->groups[d->hwirq];
 
index 9452da1..7486fe0 100644 (file)
@@ -113,6 +113,9 @@ struct msm_gpio_wakeirq_map {
  * @pull_no_keeper: The SoC does not support keeper bias.
  * @wakeirq_map:    The map of wakeup capable GPIOs and the pin at PDC/MPM
  * @nwakeirq_map:   The number of entries in @wakeirq_map
+ * @wakeirq_dual_edge_errata: If true then GPIOs using the wakeirq_map need
+ *                            to be aware that their parent can't handle dual
+ *                            edge interrupts.
  */
 struct msm_pinctrl_soc_data {
        const struct pinctrl_pin_desc *pins;
@@ -128,6 +131,7 @@ struct msm_pinctrl_soc_data {
        const int *reserved_gpios;
        const struct msm_gpio_wakeirq_map *wakeirq_map;
        unsigned int nwakeirq_map;
+       bool wakeirq_dual_edge_errata;
 };
 
 extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops;
index 1b6465a..1d9acad 100644 (file)
@@ -1147,6 +1147,7 @@ static const struct msm_pinctrl_soc_data sc7180_pinctrl = {
        .ntiles = ARRAY_SIZE(sc7180_tiles),
        .wakeirq_map = sc7180_pdc_map,
        .nwakeirq_map = ARRAY_SIZE(sc7180_pdc_map),
+       .wakeirq_dual_edge_errata = true,
 };
 
 static int sc7180_pinctrl_probe(struct platform_device *pdev)
index cb8d739..7dbcf69 100644 (file)
@@ -370,15 +370,6 @@ config PWM_PCA9685
          To compile this driver as a module, choose M here: the module
          will be called pwm-pca9685.
 
-config PWM_PUV3
-       tristate "PKUnity NetBook-0916 PWM support"
-       depends on ARCH_PUV3
-       help
-         Generic PWM framework driver for PKUnity NetBook-0916.
-
-         To compile this driver as a module, choose M here: the module
-         will be called pwm-puv3.
-
 config PWM_PXA
        tristate "PXA PWM support"
        depends on ARCH_PXA || COMPILE_TEST
index a59c710..2c2ba0a 100644 (file)
@@ -34,7 +34,6 @@ obj-$(CONFIG_PWM_MTK_DISP)    += pwm-mtk-disp.o
 obj-$(CONFIG_PWM_MXS)          += pwm-mxs.o
 obj-$(CONFIG_PWM_OMAP_DMTIMER) += pwm-omap-dmtimer.o
 obj-$(CONFIG_PWM_PCA9685)      += pwm-pca9685.o
-obj-$(CONFIG_PWM_PUV3)         += pwm-puv3.o
 obj-$(CONFIG_PWM_PXA)          += pwm-pxa.o
 obj-$(CONFIG_PWM_RCAR)         += pwm-rcar.o
 obj-$(CONFIG_PWM_RENESAS_TPU)  += pwm-renesas-tpu.o
diff --git a/drivers/pwm/pwm-puv3.c b/drivers/pwm/pwm-puv3.c
deleted file mode 100644 (file)
index 9d0bd87..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/unicore32/kernel/pwm.c
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/pwm.h>
-
-#include <asm/div64.h>
-#include <mach/hardware.h>
-
-struct puv3_pwm_chip {
-       struct pwm_chip chip;
-       void __iomem *base;
-       struct clk *clk;
-};
-
-static inline struct puv3_pwm_chip *to_puv3(struct pwm_chip *chip)
-{
-       return container_of(chip, struct puv3_pwm_chip, chip);
-}
-
-/*
- * period_ns = 10^9 * (PRESCALE + 1) * (PV + 1) / PWM_CLK_RATE
- * duty_ns   = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE
- */
-static int puv3_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                          int duty_ns, int period_ns)
-{
-       unsigned long period_cycles, prescale, pv, dc;
-       struct puv3_pwm_chip *puv3 = to_puv3(chip);
-       unsigned long long c;
-
-       c = clk_get_rate(puv3->clk);
-       c = c * period_ns;
-       do_div(c, 1000000000);
-       period_cycles = c;
-
-       if (period_cycles < 1)
-               period_cycles = 1;
-
-       prescale = (period_cycles - 1) / 1024;
-       pv = period_cycles / (prescale + 1) - 1;
-
-       if (prescale > 63)
-               return -EINVAL;
-
-       if (duty_ns == period_ns)
-               dc = OST_PWMDCCR_FDCYCLE;
-       else
-               dc = (pv + 1) * duty_ns / period_ns;
-
-       /*
-        * NOTE: the clock to PWM has to be enabled first
-        * before writing to the registers
-        */
-       clk_prepare_enable(puv3->clk);
-
-       writel(prescale, puv3->base + OST_PWM_PWCR);
-       writel(pv - dc, puv3->base + OST_PWM_DCCR);
-       writel(pv, puv3->base + OST_PWM_PCR);
-
-       clk_disable_unprepare(puv3->clk);
-
-       return 0;
-}
-
-static int puv3_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-       struct puv3_pwm_chip *puv3 = to_puv3(chip);
-
-       return clk_prepare_enable(puv3->clk);
-}
-
-static void puv3_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-       struct puv3_pwm_chip *puv3 = to_puv3(chip);
-
-       clk_disable_unprepare(puv3->clk);
-}
-
-static const struct pwm_ops puv3_pwm_ops = {
-       .config = puv3_pwm_config,
-       .enable = puv3_pwm_enable,
-       .disable = puv3_pwm_disable,
-       .owner = THIS_MODULE,
-};
-
-static int pwm_probe(struct platform_device *pdev)
-{
-       struct puv3_pwm_chip *puv3;
-       struct resource *r;
-       int ret;
-
-       puv3 = devm_kzalloc(&pdev->dev, sizeof(*puv3), GFP_KERNEL);
-       if (!puv3)
-               return -ENOMEM;
-
-       puv3->clk = devm_clk_get(&pdev->dev, "OST_CLK");
-       if (IS_ERR(puv3->clk))
-               return PTR_ERR(puv3->clk);
-
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       puv3->base = devm_ioremap_resource(&pdev->dev, r);
-       if (IS_ERR(puv3->base))
-               return PTR_ERR(puv3->base);
-
-       puv3->chip.dev = &pdev->dev;
-       puv3->chip.ops = &puv3_pwm_ops;
-       puv3->chip.base = -1;
-       puv3->chip.npwm = 1;
-
-       ret = pwmchip_add(&puv3->chip);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
-               return ret;
-       }
-
-       platform_set_drvdata(pdev, puv3);
-       return 0;
-}
-
-static int pwm_remove(struct platform_device *pdev)
-{
-       struct puv3_pwm_chip *puv3 = platform_get_drvdata(pdev);
-
-       return pwmchip_remove(&puv3->chip);
-}
-
-static struct platform_driver puv3_pwm_driver = {
-       .driver = {
-               .name = "PKUnity-v3-PWM",
-       },
-       .probe = pwm_probe,
-       .remove = pwm_remove,
-};
-module_platform_driver(puv3_pwm_driver);
-
-MODULE_LICENSE("GPL v2");
index b54d87d..f3b8e6d 100644 (file)
@@ -1729,15 +1729,6 @@ config RTC_DRV_TEGRA
          This drive can also be built as a module. If so, the module
          will be called rtc-tegra.
 
-config RTC_DRV_PUV3
-       tristate "PKUnity v3 RTC support"
-       depends on ARCH_PUV3
-       help
-         This enables support for the RTC in the PKUnity-v3 SoCs.
-
-         This drive can also be built as a module. If so, the module
-         will be called rtc-puv3.
-
 config RTC_DRV_LOONGSON1
        tristate "loongson1 RTC support"
        depends on MACH_LOONGSON32
index 0721752..880e08a 100644 (file)
@@ -128,7 +128,6 @@ obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o
 obj-$(CONFIG_RTC_DRV_PL031)    += rtc-pl031.o
 obj-$(CONFIG_RTC_DRV_PM8XXX)   += rtc-pm8xxx.o
 obj-$(CONFIG_RTC_DRV_PS3)      += rtc-ps3.o
-obj-$(CONFIG_RTC_DRV_PUV3)     += rtc-puv3.o
 obj-$(CONFIG_RTC_DRV_PXA)      += rtc-pxa.o
 obj-$(CONFIG_RTC_DRV_R7301)    += rtc-r7301.o
 obj-$(CONFIG_RTC_DRV_R9701)    += rtc-r9701.o
diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c
deleted file mode 100644 (file)
index 954b88d..0000000
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * RTC driver code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-#include <linux/clk.h>
-#include <linux/log2.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-
-static struct resource *puv3_rtc_mem;
-
-static int puv3_rtc_alarmno = IRQ_RTCAlarm;
-static int puv3_rtc_tickno  = IRQ_RTC;
-
-static DEFINE_SPINLOCK(puv3_rtc_pie_lock);
-
-/* IRQ Handlers */
-static irqreturn_t puv3_rtc_alarmirq(int irq, void *id)
-{
-       struct rtc_device *rdev = id;
-
-       writel(readl(RTC_RTSR) | RTC_RTSR_AL, RTC_RTSR);
-       rtc_update_irq(rdev, 1, RTC_AF | RTC_IRQF);
-       return IRQ_HANDLED;
-}
-
-static irqreturn_t puv3_rtc_tickirq(int irq, void *id)
-{
-       struct rtc_device *rdev = id;
-
-       writel(readl(RTC_RTSR) | RTC_RTSR_HZ, RTC_RTSR);
-       rtc_update_irq(rdev, 1, RTC_PF | RTC_IRQF);
-       return IRQ_HANDLED;
-}
-
-/* Update control registers */
-static void puv3_rtc_setaie(struct device *dev, int to)
-{
-       unsigned int tmp;
-
-       dev_dbg(dev, "%s: aie=%d\n", __func__, to);
-
-       tmp = readl(RTC_RTSR) & ~RTC_RTSR_ALE;
-
-       if (to)
-               tmp |= RTC_RTSR_ALE;
-
-       writel(tmp, RTC_RTSR);
-}
-
-static int puv3_rtc_setpie(struct device *dev, int enabled)
-{
-       unsigned int tmp;
-
-       dev_dbg(dev, "%s: pie=%d\n", __func__, enabled);
-
-       spin_lock_irq(&puv3_rtc_pie_lock);
-       tmp = readl(RTC_RTSR) & ~RTC_RTSR_HZE;
-
-       if (enabled)
-               tmp |= RTC_RTSR_HZE;
-
-       writel(tmp, RTC_RTSR);
-       spin_unlock_irq(&puv3_rtc_pie_lock);
-
-       return 0;
-}
-
-/* Time read/write */
-static int puv3_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
-{
-       rtc_time64_to_tm(readl(RTC_RCNR), rtc_tm);
-
-       dev_dbg(dev, "read time %ptRr\n", rtc_tm);
-
-       return 0;
-}
-
-static int puv3_rtc_settime(struct device *dev, struct rtc_time *tm)
-{
-       dev_dbg(dev, "set time %ptRr\n", tm);
-
-       writel(rtc_tm_to_time64(tm), RTC_RCNR);
-
-       return 0;
-}
-
-static int puv3_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-       struct rtc_time *alm_tm = &alrm->time;
-
-       rtc_time64_to_tm(readl(RTC_RTAR), alm_tm);
-
-       alrm->enabled = readl(RTC_RTSR) & RTC_RTSR_ALE;
-
-       dev_dbg(dev, "read alarm: %d, %ptRr\n", alrm->enabled, alm_tm);
-
-       return 0;
-}
-
-static int puv3_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-       struct rtc_time *tm = &alrm->time;
-
-       dev_dbg(dev, "set alarm: %d, %ptRr\n", alrm->enabled, tm);
-
-       writel(rtc_tm_to_time64(tm), RTC_RTAR);
-
-       puv3_rtc_setaie(dev, alrm->enabled);
-
-       if (alrm->enabled)
-               enable_irq_wake(puv3_rtc_alarmno);
-       else
-               disable_irq_wake(puv3_rtc_alarmno);
-
-       return 0;
-}
-
-static int puv3_rtc_proc(struct device *dev, struct seq_file *seq)
-{
-       seq_printf(seq, "periodic_IRQ\t: %s\n",
-                    (readl(RTC_RTSR) & RTC_RTSR_HZE) ? "yes" : "no");
-       return 0;
-}
-
-static const struct rtc_class_ops puv3_rtcops = {
-       .read_time      = puv3_rtc_gettime,
-       .set_time       = puv3_rtc_settime,
-       .read_alarm     = puv3_rtc_getalarm,
-       .set_alarm      = puv3_rtc_setalarm,
-       .proc           = puv3_rtc_proc,
-};
-
-static void puv3_rtc_enable(struct device *dev, int en)
-{
-       if (!en) {
-               writel(readl(RTC_RTSR) & ~RTC_RTSR_HZE, RTC_RTSR);
-       } else {
-               /* re-enable the device, and check it is ok */
-               if ((readl(RTC_RTSR) & RTC_RTSR_HZE) == 0) {
-                       dev_info(dev, "rtc disabled, re-enabling\n");
-                       writel(readl(RTC_RTSR) | RTC_RTSR_HZE, RTC_RTSR);
-               }
-       }
-}
-
-static int puv3_rtc_remove(struct platform_device *dev)
-{
-       puv3_rtc_setpie(&dev->dev, 0);
-       puv3_rtc_setaie(&dev->dev, 0);
-
-       release_resource(puv3_rtc_mem);
-       kfree(puv3_rtc_mem);
-
-       return 0;
-}
-
-static int puv3_rtc_probe(struct platform_device *pdev)
-{
-       struct rtc_device *rtc;
-       struct resource *res;
-       int ret;
-
-       dev_dbg(&pdev->dev, "%s: probe=%p\n", __func__, pdev);
-
-       /* find the IRQs */
-       puv3_rtc_tickno = platform_get_irq(pdev, 1);
-       if (puv3_rtc_tickno < 0)
-               return -ENOENT;
-
-       puv3_rtc_alarmno = platform_get_irq(pdev, 0);
-       if (puv3_rtc_alarmno < 0)
-               return -ENOENT;
-
-       dev_dbg(&pdev->dev, "PKUnity_rtc: tick irq %d, alarm irq %d\n",
-                puv3_rtc_tickno, puv3_rtc_alarmno);
-
-       rtc = devm_rtc_allocate_device(&pdev->dev);
-       if (IS_ERR(rtc))
-               return PTR_ERR(rtc);
-
-       ret = devm_request_irq(&pdev->dev, puv3_rtc_alarmno, puv3_rtc_alarmirq,
-                              0, "pkunity-rtc alarm", rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "IRQ%d error %d\n", puv3_rtc_alarmno, ret);
-               return ret;
-       }
-
-       ret = devm_request_irq(&pdev->dev, puv3_rtc_tickno, puv3_rtc_tickirq,
-                              0, "pkunity-rtc tick", rtc);
-       if (ret) {
-               dev_err(&pdev->dev, "IRQ%d error %d\n", puv3_rtc_tickno, ret);
-               return ret;
-       }
-
-       /* get the memory region */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (res == NULL) {
-               dev_err(&pdev->dev, "failed to get memory region resource\n");
-               return -ENOENT;
-       }
-
-       puv3_rtc_mem = request_mem_region(res->start, resource_size(res),
-                                         pdev->name);
-
-       if (puv3_rtc_mem == NULL) {
-               dev_err(&pdev->dev, "failed to reserve memory region\n");
-               ret = -ENOENT;
-               goto err_nores;
-       }
-
-       puv3_rtc_enable(&pdev->dev, 1);
-
-       /* register RTC and exit */
-       rtc->ops = &puv3_rtcops;
-       rtc->range_max = U32_MAX;
-       ret = rtc_register_device(rtc);
-       if (ret)
-               goto err_nortc;
-
-       /* platform setup code should have handled this; sigh */
-       if (!device_can_wakeup(&pdev->dev))
-               device_init_wakeup(&pdev->dev, 1);
-
-       platform_set_drvdata(pdev, rtc);
-       return 0;
-
- err_nortc:
-       puv3_rtc_enable(&pdev->dev, 0);
-       release_resource(puv3_rtc_mem);
-
- err_nores:
-       return ret;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int ticnt_save;
-
-static int puv3_rtc_suspend(struct device *dev)
-{
-       /* save RTAR for anyone using periodic interrupts */
-       ticnt_save = readl(RTC_RTAR);
-       puv3_rtc_enable(dev, 0);
-       return 0;
-}
-
-static int puv3_rtc_resume(struct device *dev)
-{
-       puv3_rtc_enable(dev, 1);
-       writel(ticnt_save, RTC_RTAR);
-       return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(puv3_rtc_pm_ops, puv3_rtc_suspend, puv3_rtc_resume);
-
-static struct platform_driver puv3_rtc_driver = {
-       .probe          = puv3_rtc_probe,
-       .remove         = puv3_rtc_remove,
-       .driver         = {
-               .name   = "PKUnity-v3-RTC",
-               .pm     = &puv3_rtc_pm_ops,
-       }
-};
-
-module_platform_driver(puv3_rtc_driver);
-
-MODULE_DESCRIPTION("RTC Driver for the PKUnity v3 chip");
-MODULE_AUTHOR("Hu Dongliang");
-MODULE_LICENSE("GPL v2");
index cf87eb2..eb17fea 100644 (file)
@@ -2802,7 +2802,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
                        blk_update_request(req, BLK_STS_OK,
                                           blk_rq_bytes(req) - proc_bytes);
                        blk_mq_requeue_request(req, true);
-               } else {
+               } else if (likely(!blk_should_fake_timeout(req->q))) {
                        blk_mq_complete_request(req);
                }
        }
index 384edff..299e77e 100644 (file)
@@ -31,8 +31,7 @@
 
 static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
-static blk_qc_t dcssblk_make_request(struct request_queue *q,
-                                               struct bio *bio);
+static blk_qc_t dcssblk_submit_bio(struct bio *bio);
 static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
                long nr_pages, void **kaddr, pfn_t *pfn);
 
@@ -41,6 +40,7 @@ static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 static int dcssblk_major;
 static const struct block_device_operations dcssblk_devops = {
        .owner          = THIS_MODULE,
+       .submit_bio     = dcssblk_submit_bio,
        .open           = dcssblk_open,
        .release        = dcssblk_release,
 };
@@ -651,8 +651,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
        }
        dev_info->gd->major = dcssblk_major;
        dev_info->gd->fops = &dcssblk_devops;
-       dev_info->dcssblk_queue =
-               blk_alloc_queue(dcssblk_make_request, NUMA_NO_NODE);
+       dev_info->dcssblk_queue = blk_alloc_queue(NUMA_NO_NODE);
        dev_info->gd->queue = dev_info->dcssblk_queue;
        dev_info->gd->private_data = dev_info;
        blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
@@ -833,7 +832,6 @@ dcssblk_open(struct block_device *bdev, fmode_t mode)
                goto out;
        }
        atomic_inc(&dev_info->use_count);
-       bdev->bd_block_size = 4096;
        rc = 0;
 out:
        return rc;
@@ -868,7 +866,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
 }
 
 static blk_qc_t
-dcssblk_make_request(struct request_queue *q, struct bio *bio)
+dcssblk_submit_bio(struct bio *bio)
 {
        struct dcssblk_dev_info *dev_info;
        struct bio_vec bvec;
@@ -878,7 +876,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
        unsigned long source_addr;
        unsigned long bytes_done;
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        bytes_done = 0;
        dev_info = bio->bi_disk->private_data;
index e018893..a4f6f2e 100644 (file)
@@ -256,7 +256,8 @@ static void scm_request_finish(struct scm_request *scmrq)
        for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
                error = blk_mq_rq_to_pdu(scmrq->request[i]);
                *error = scmrq->error;
-               blk_mq_complete_request(scmrq->request[i]);
+               if (likely(!blk_should_fake_timeout(scmrq->request[i]->q)))
+                       blk_mq_complete_request(scmrq->request[i]);
        }
 
        atomic_dec(&bdev->queued_reqs);
index 45a04da..c2536f7 100644 (file)
@@ -182,7 +182,7 @@ static unsigned long xpram_highest_page_index(void)
 /*
  * Block device make request function.
  */
-static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t xpram_submit_bio(struct bio *bio)
 {
        xpram_device_t *xdev = bio->bi_disk->private_data;
        struct bio_vec bvec;
@@ -191,7 +191,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
        unsigned long page_addr;
        unsigned long bytes;
 
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
 
        if ((bio->bi_iter.bi_sector & 7) != 0 ||
            (bio->bi_iter.bi_size & 4095) != 0)
@@ -250,6 +250,7 @@ static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 static const struct block_device_operations xpram_devops =
 {
        .owner  = THIS_MODULE,
+       .submit_bio = xpram_submit_bio,
        .getgeo = xpram_getgeo,
 };
 
@@ -343,8 +344,7 @@ static int __init xpram_setup_blkdev(void)
                xpram_disks[i] = alloc_disk(1);
                if (!xpram_disks[i])
                        goto out;
-               xpram_queues[i] = blk_alloc_queue(xpram_make_request,
-                               NUMA_NO_NODE);
+               xpram_queues[i] = blk_alloc_queue(NUMA_NO_NODE);
                if (!xpram_queues[i]) {
                        put_disk(xpram_disks[i]);
                        goto out;
index 98d7fc1..aec996d 100644 (file)
@@ -556,8 +556,9 @@ tty3270_scroll_backward(struct kbd_data *kbd)
  * Pass input line to tty.
  */
 static void
-tty3270_read_tasklet(struct raw3270_request *rrq)
+tty3270_read_tasklet(unsigned long data)
 {
+       struct raw3270_request *rrq = (struct raw3270_request *)data;
        static char kreset_data = TW_KR;
        struct tty3270 *tp = container_of(rrq->view, struct tty3270, view);
        char *input;
@@ -652,8 +653,9 @@ tty3270_issue_read(struct tty3270 *tp, int lock)
  * Hang up the tty
  */
 static void
-tty3270_hangup_tasklet(struct tty3270 *tp)
+tty3270_hangup_tasklet(unsigned long data)
 {
+       struct tty3270 *tp = (struct tty3270 *)data;
        tty_port_tty_hangup(&tp->port, true);
        raw3270_put_view(&tp->view);
 }
@@ -752,11 +754,9 @@ tty3270_alloc_view(void)
 
        tty_port_init(&tp->port);
        timer_setup(&tp->timer, tty3270_update, 0);
-       tasklet_init(&tp->readlet,
-                    (void (*)(unsigned long)) tty3270_read_tasklet,
+       tasklet_init(&tp->readlet, tty3270_read_tasklet,
                     (unsigned long) tp->read);
-       tasklet_init(&tp->hanglet,
-                    (void (*)(unsigned long)) tty3270_hangup_tasklet,
+       tasklet_init(&tp->hanglet, tty3270_hangup_tasklet,
                     (unsigned long) tp);
        INIT_WORK(&tp->resize_work, tty3270_resize_work);
 
index 08f8124..d29f1b7 100644 (file)
@@ -1,8 +1,7 @@
 // SPDX-License-Identifier: GPL-1.0+
 /*
  * zcore module to export memory content and register sets for creating system
- * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
- * dump format as s390 standalone dumps.
+ * dumps on SCSI disks (zfcpdump).
  *
  * For more information please refer to Documentation/s390/zfcpdump.rst
  *
@@ -16,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
-#include <linux/memblock.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/ipl.h>
@@ -33,8 +31,6 @@
 
 #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
 
-#define CHUNK_INFO_SIZE        34 /* 2 16-byte char, each followed by blank */
-
 enum arch_id {
        ARCH_S390       = 0,
        ARCH_S390X      = 1,
@@ -48,7 +44,6 @@ struct ipib_info {
 static struct debug_info *zcore_dbf;
 static int hsa_available;
 static struct dentry *zcore_dir;
-static struct dentry *zcore_memmap_file;
 static struct dentry *zcore_reipl_file;
 static struct dentry *zcore_hsa_file;
 static struct ipl_parameter_block *zcore_ipl_block;
@@ -139,46 +134,6 @@ static void release_hsa(void)
        hsa_available = 0;
 }
 
-static ssize_t zcore_memmap_read(struct file *filp, char __user *buf,
-                                size_t count, loff_t *ppos)
-{
-       return simple_read_from_buffer(buf, count, ppos, filp->private_data,
-                                      memblock.memory.cnt * CHUNK_INFO_SIZE);
-}
-
-static int zcore_memmap_open(struct inode *inode, struct file *filp)
-{
-       struct memblock_region *reg;
-       char *buf;
-       int i = 0;
-
-       buf = kcalloc(memblock.memory.cnt, CHUNK_INFO_SIZE, GFP_KERNEL);
-       if (!buf) {
-               return -ENOMEM;
-       }
-       for_each_memblock(memory, reg) {
-               sprintf(buf + (i++ * CHUNK_INFO_SIZE), "%016llx %016llx ",
-                       (unsigned long long) reg->base,
-                       (unsigned long long) reg->size);
-       }
-       filp->private_data = buf;
-       return nonseekable_open(inode, filp);
-}
-
-static int zcore_memmap_release(struct inode *inode, struct file *filp)
-{
-       kfree(filp->private_data);
-       return 0;
-}
-
-static const struct file_operations zcore_memmap_fops = {
-       .owner          = THIS_MODULE,
-       .read           = zcore_memmap_read,
-       .open           = zcore_memmap_open,
-       .release        = zcore_memmap_release,
-       .llseek         = no_llseek,
-};
-
 static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf,
                                 size_t count, loff_t *ppos)
 {
@@ -335,17 +290,11 @@ static int __init zcore_init(void)
                rc = -ENOMEM;
                goto fail;
        }
-       zcore_memmap_file = debugfs_create_file("memmap", S_IRUSR, zcore_dir,
-                                               NULL, &zcore_memmap_fops);
-       if (!zcore_memmap_file) {
-               rc = -ENOMEM;
-               goto fail_dir;
-       }
        zcore_reipl_file = debugfs_create_file("reipl", S_IRUSR, zcore_dir,
                                                NULL, &zcore_reipl_fops);
        if (!zcore_reipl_file) {
                rc = -ENOMEM;
-               goto fail_memmap_file;
+               goto fail_dir;
        }
        zcore_hsa_file = debugfs_create_file("hsa", S_IRUSR|S_IWUSR, zcore_dir,
                                             NULL, &zcore_hsa_fops);
@@ -357,8 +306,6 @@ static int __init zcore_init(void)
 
 fail_reipl_file:
        debugfs_remove(zcore_reipl_file);
-fail_memmap_file:
-       debugfs_remove(zcore_memmap_file);
 fail_dir:
        debugfs_remove(zcore_dir);
 fail:
index bb1c840..cd2df4f 100644 (file)
@@ -15,7 +15,6 @@
 #define QDIO_BUSY_BIT_PATIENCE         (100 << 12)     /* 100 microseconds */
 #define QDIO_BUSY_BIT_RETRY_DELAY      10              /* 10 milliseconds */
 #define QDIO_BUSY_BIT_RETRIES          1000            /* = 10s retry time */
-#define QDIO_INPUT_THRESHOLD           (500 << 12)     /* 500 microseconds */
 
 enum qdio_irq_states {
        QDIO_IRQ_STATE_INACTIVE,
@@ -166,11 +165,7 @@ struct qdio_dev_perf_stat {
 } ____cacheline_aligned;
 
 struct qdio_queue_perf_stat {
-       /*
-        * Sorted into order-2 buckets: 1, 2-3, 4-7, ... 64-127, 128.
-        * Since max. 127 SBALs are scanned reuse entry for 128 as queue full
-        * aka 127 SBALs found.
-        */
+       /* Sorted into order-2 buckets: 1, 2-3, 4-7, ... 64-127, 128. */
        unsigned int nr_sbals[8];
        unsigned int nr_sbal_error;
        unsigned int nr_sbal_nop;
@@ -185,8 +180,6 @@ struct qdio_input_q {
        /* Batch of SBALs that we processed while polling the queue: */
        unsigned int batch_start;
        unsigned int batch_count;
-       /* last time of noticing incoming data */
-       u64 timestamp;
 };
 
 struct qdio_output_q {
index da95c92..863d17c 100644 (file)
@@ -165,7 +165,7 @@ static int qstat_show(struct seq_file *m, void *v)
        }
 
        seq_printf(m, "\n1          2..        4..        8..        "
-                  "16..       32..       64..       127\n");
+                  "16..       32..       64..       128\n");
        for (i = 0; i < ARRAY_SIZE(q->q_stats.nr_sbals); i++)
                seq_printf(m, "%-10u ", q->q_stats.nr_sbals[i]);
        seq_printf(m, "\nError      NOP        Total\n%-10u %-10u %-10u\n\n",
index 0c919a1..4fab8bb 100644 (file)
@@ -413,15 +413,8 @@ static inline void qdio_stop_polling(struct qdio_q *q)
 
 static inline void account_sbals(struct qdio_q *q, unsigned int count)
 {
-       int pos;
-
        q->q_stats.nr_sbal_total += count;
-       if (count == QDIO_MAX_BUFFERS_MASK) {
-               q->q_stats.nr_sbals[7]++;
-               return;
-       }
-       pos = ilog2(count);
-       q->q_stats.nr_sbals[pos]++;
+       q->q_stats.nr_sbals[ilog2(count)]++;
 }
 
 static void process_buffer_error(struct qdio_q *q, unsigned int start,
@@ -464,11 +457,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start)
 
        q->timestamp = get_tod_clock_fast();
 
-       /*
-        * Don't check 128 buffers, as otherwise qdio_inbound_q_moved
-        * would return 0.
-        */
-       count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
+       count = atomic_read(&q->nr_buf_used);
        if (!count)
                return 0;
 
@@ -521,14 +510,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start)
 
 static int qdio_inbound_q_moved(struct qdio_q *q, unsigned int start)
 {
-       int count;
-
-       count = get_inbound_buffer_frontier(q, start);
-
-       if (count && !is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR)
-               q->u.in.timestamp = get_tod_clock();
-
-       return count;
+       return get_inbound_buffer_frontier(q, start);
 }
 
 static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
@@ -546,22 +528,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
                /* more work coming */
                return 0;
 
-       if (is_thinint_irq(q->irq_ptr))
-               return 1;
-
-       /* don't poll under z/VM */
-       if (MACHINE_IS_VM)
-               return 1;
-
-       /*
-        * At this point we know, that inbound first_to_check
-        * has (probably) not moved (see qdio_inbound_processing).
-        */
-       if (get_tod_clock_fast() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) {
-               DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%02x", start);
-               return 1;
-       } else
-               return 0;
+       return 1;
 }
 
 static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count)
index e71ca4a..f218a0b 100644 (file)
@@ -73,8 +73,7 @@ EXPORT_SYMBOL(ap_perms);
 DEFINE_MUTEX(ap_perms_mutex);
 EXPORT_SYMBOL(ap_perms_mutex);
 
-static struct ap_config_info *ap_configuration;
-static bool initialised;
+static struct ap_config_info *ap_qci_info;
 
 /*
  * AP bus related debug feature things.
@@ -105,8 +104,10 @@ static struct hrtimer ap_poll_timer;
  */
 static unsigned long long poll_timeout = 250000;
 
-/* Maximum domain id */
-static int ap_max_domain_id;
+/* Maximum domain id, if not given via qci */
+static int ap_max_domain_id = 15;
+/* Maximum adapter id, if not given via qci */
+static int ap_max_adapter_id = 63;
 
 static struct bus_type ap_bus_type;
 
@@ -154,12 +155,12 @@ static int ap_interrupts_available(void)
 }
 
 /**
- * ap_configuration_available(): Test if AP configuration
- * information is available.
+ * ap_qci_available(): Test if AP configuration
+ * information can be queried via QCI subfunction.
  *
- * Returns 1 if AP configuration information is available.
+ * Returns 1 if subfunction PQAP(QCI) is available.
  */
-static int ap_configuration_available(void)
+static int ap_qci_available(void)
 {
        return test_facility(12);
 }
@@ -182,22 +183,22 @@ static int ap_apft_available(void)
  */
 static inline int ap_qact_available(void)
 {
-       if (ap_configuration)
-               return ap_configuration->qact;
+       if (ap_qci_info)
+               return ap_qci_info->qact;
        return 0;
 }
 
 /*
- * ap_query_configuration(): Fetch cryptographic config info
+ * ap_fetch_qci_info(): Fetch cryptographic config info
  *
  * Returns the ap configuration info fetched via PQAP(QCI).
  * On success 0 is returned, on failure a negative errno
  * is returned, e.g. if the PQAP(QCI) instruction is not
  * available, the return value will be -EOPNOTSUPP.
  */
-static inline int ap_query_configuration(struct ap_config_info *info)
+static inline int ap_fetch_qci_info(struct ap_config_info *info)
 {
-       if (!ap_configuration_available())
+       if (!ap_qci_available())
                return -EOPNOTSUPP;
        if (!info)
                return -EINVAL;
@@ -205,21 +206,40 @@ static inline int ap_query_configuration(struct ap_config_info *info)
 }
 
 /**
- * ap_init_configuration(): Allocate and query configuration array.
+ * ap_init_qci_info(): Allocate and query qci config info.
+ * Does also update the static variables ap_max_domain_id
+ * and ap_max_adapter_id if this info is available.
+
  */
-static void ap_init_configuration(void)
+static void __init ap_init_qci_info(void)
 {
-       if (!ap_configuration_available())
+       if (!ap_qci_available()) {
+               AP_DBF(DBF_INFO, "%s QCI not supported\n", __func__);
                return;
+       }
 
-       ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL);
-       if (!ap_configuration)
+       ap_qci_info = kzalloc(sizeof(*ap_qci_info), GFP_KERNEL);
+       if (!ap_qci_info)
                return;
-       if (ap_query_configuration(ap_configuration) != 0) {
-               kfree(ap_configuration);
-               ap_configuration = NULL;
+       if (ap_fetch_qci_info(ap_qci_info) != 0) {
+               kfree(ap_qci_info);
+               ap_qci_info = NULL;
                return;
        }
+       AP_DBF(DBF_INFO, "%s successful fetched initial qci info\n", __func__);
+
+       if (ap_qci_info->apxa) {
+               if (ap_qci_info->Na) {
+                       ap_max_adapter_id = ap_qci_info->Na;
+                       AP_DBF(DBF_INFO, "%s new ap_max_adapter_id is %d\n",
+                              __func__, ap_max_adapter_id);
+               }
+               if (ap_qci_info->Nd) {
+                       ap_max_domain_id = ap_qci_info->Nd;
+                       AP_DBF(DBF_INFO, "%s new ap_max_domain_id is %d\n",
+                              __func__, ap_max_domain_id);
+               }
+       }
 }
 
 /*
@@ -233,7 +253,6 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr)
 
 /*
  * ap_test_config_card_id(): Test, whether an AP card ID is configured.
- * @id AP card ID
  *
  * Returns 0 if the card is not configured
  *        1 if the card is configured or
@@ -241,16 +260,16 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr)
  */
 static inline int ap_test_config_card_id(unsigned int id)
 {
-       if (!ap_configuration)  /* QCI not supported */
-               /* only ids 0...3F may be probed */
-               return id < 0x40 ? 1 : 0;
-       return ap_test_config(ap_configuration->apm, id);
+       if (id > ap_max_adapter_id)
+               return 0;
+       if (ap_qci_info)
+               return ap_test_config(ap_qci_info->apm, id);
+       return 1;
 }
 
 /*
  * ap_test_config_usage_domain(): Test, whether an AP usage domain
  * is configured.
- * @domain AP usage domain ID
  *
  * Returns 0 if the usage domain is not configured
  *        1 if the usage domain is configured or
@@ -258,9 +277,11 @@ static inline int ap_test_config_card_id(unsigned int id)
  */
 int ap_test_config_usage_domain(unsigned int domain)
 {
-       if (!ap_configuration)  /* QCI not supported */
-               return domain < 16;
-       return ap_test_config(ap_configuration->aqm, domain);
+       if (domain > ap_max_domain_id)
+               return 0;
+       if (ap_qci_info)
+               return ap_test_config(ap_qci_info->aqm, domain);
+       return 1;
 }
 EXPORT_SYMBOL(ap_test_config_usage_domain);
 
@@ -274,43 +295,44 @@ EXPORT_SYMBOL(ap_test_config_usage_domain);
  */
 int ap_test_config_ctrl_domain(unsigned int domain)
 {
-       if (!ap_configuration)  /* QCI not supported */
+       if (!ap_qci_info || domain > ap_max_domain_id)
                return 0;
-       return ap_test_config(ap_configuration->adm, domain);
+       return ap_test_config(ap_qci_info->adm, domain);
 }
 EXPORT_SYMBOL(ap_test_config_ctrl_domain);
 
-/**
- * ap_query_queue(): Check if an AP queue is available.
- * @qid: The AP queue number
- * @queue_depth: Pointer to queue depth value
- * @device_type: Pointer to device type value
- * @facilities: Pointer to facility indicator
+/*
+ * ap_queue_info(): Check and get AP queue info.
+ * Returns true if TAPQ succeeded and the info is filled or
+ * false otherwise.
  */
-static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type,
-                         unsigned int *facilities)
+static bool ap_queue_info(ap_qid_t qid, int *q_type,
+                         unsigned int *q_fac, int *q_depth)
 {
        struct ap_queue_status status;
-       unsigned long info;
-       int nd;
+       unsigned long info = 0;
 
-       if (!ap_test_config_card_id(AP_QID_CARD(qid)))
-               return -ENODEV;
+       /* make sure we don't run into a specifiation exception */
+       if (AP_QID_CARD(qid) > ap_max_adapter_id ||
+           AP_QID_QUEUE(qid) > ap_max_domain_id)
+               return false;
 
+       /* call TAPQ on this APQN */
        status = ap_test_queue(qid, ap_apft_available(), &info);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               *queue_depth = (int)(info & 0xff);
-               *device_type = (int)((info >> 24) & 0xff);
-               *facilities = (unsigned int)(info >> 32);
-               /* Update maximum domain id */
-               nd = (info >> 16) & 0xff;
-               /* if N bit is available, z13 and newer */
-               if ((info & (1UL << 57)) && nd > 0)
-                       ap_max_domain_id = nd;
-               else /* older machine types */
-                       ap_max_domain_id = 15;
-               switch (*device_type) {
+       case AP_RESPONSE_RESET_IN_PROGRESS:
+               /*
+                * According to the architecture in all these cases the
+                * info should be filled. All bits 0 is not possible as
+                * there is at least one of the mode bits set.
+                */
+               if (WARN_ON_ONCE(!info))
+                       return false;
+               *q_type = (int)((info >> 24) & 0xff);
+               *q_fac = (unsigned int)(info >> 32);
+               *q_depth = (int)(info & 0xff);
+               switch (*q_type) {
                        /* For CEX2 and CEX3 the available functions
                         * are not reflected by the facilities bits.
                         * Instead it is coded into the type. So here
@@ -318,37 +340,31 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type,
                         */
                case AP_DEVICE_TYPE_CEX2A:
                case AP_DEVICE_TYPE_CEX3A:
-                       *facilities |= 0x08000000;
+                       *q_fac |= 0x08000000;
                        break;
                case AP_DEVICE_TYPE_CEX2C:
                case AP_DEVICE_TYPE_CEX3C:
-                       *facilities |= 0x10000000;
+                       *q_fac |= 0x10000000;
                        break;
                default:
                        break;
                }
-               return 0;
-       case AP_RESPONSE_Q_NOT_AVAIL:
-       case AP_RESPONSE_DECONFIGURED:
-       case AP_RESPONSE_CHECKSTOPPED:
-       case AP_RESPONSE_INVALID_ADDRESS:
-               return -ENODEV;
-       case AP_RESPONSE_RESET_IN_PROGRESS:
-       case AP_RESPONSE_OTHERWISE_CHANGED:
-       case AP_RESPONSE_BUSY:
-               return -EBUSY;
+               return true;
        default:
-               BUG();
+               /*
+                * A response code which indicates, there is no info available.
+                */
+               return false;
        }
 }
 
-void ap_wait(enum ap_wait wait)
+void ap_wait(enum ap_sm_wait wait)
 {
        ktime_t hr_time;
 
        switch (wait) {
-       case AP_WAIT_AGAIN:
-       case AP_WAIT_INTERRUPT:
+       case AP_SM_WAIT_AGAIN:
+       case AP_SM_WAIT_INTERRUPT:
                if (ap_using_interrupts())
                        break;
                if (ap_poll_kthread) {
@@ -356,7 +372,7 @@ void ap_wait(enum ap_wait wait)
                        break;
                }
                fallthrough;
-       case AP_WAIT_TIMEOUT:
+       case AP_SM_WAIT_TIMEOUT:
                spin_lock_bh(&ap_poll_timer_lock);
                if (!hrtimer_is_queued(&ap_poll_timer)) {
                        hr_time = poll_timeout;
@@ -365,7 +381,7 @@ void ap_wait(enum ap_wait wait)
                }
                spin_unlock_bh(&ap_poll_timer_lock);
                break;
-       case AP_WAIT_NONE:
+       case AP_SM_WAIT_NONE:
        default:
                break;
        }
@@ -382,7 +398,7 @@ void ap_request_timeout(struct timer_list *t)
        struct ap_queue *aq = from_timer(aq, t, timeout);
 
        spin_lock_bh(&aq->lock);
-       ap_wait(ap_sm_event(aq, AP_EVENT_TIMEOUT));
+       ap_wait(ap_sm_event(aq, AP_SM_EVENT_TIMEOUT));
        spin_unlock_bh(&aq->lock);
 }
 
@@ -418,7 +434,7 @@ static void ap_tasklet_fn(unsigned long dummy)
 {
        int bkt;
        struct ap_queue *aq;
-       enum ap_wait wait = AP_WAIT_NONE;
+       enum ap_sm_wait wait = AP_SM_WAIT_NONE;
 
        /* Reset the indicator if interrupts are used. Thus new interrupts can
         * be received. Doing it in the beginning of the tasklet is therefor
@@ -430,7 +446,7 @@ static void ap_tasklet_fn(unsigned long dummy)
        spin_lock_bh(&ap_queues_lock);
        hash_for_each(ap_queues, bkt, aq, hnode) {
                spin_lock_bh(&aq->lock);
-               wait = min(wait, ap_sm_event_loop(aq, AP_EVENT_POLL));
+               wait = min(wait, ap_sm_event_loop(aq, AP_SM_EVENT_POLL));
                spin_unlock_bh(&aq->lock);
        }
        spin_unlock_bh(&ap_queues_lock);
@@ -751,9 +767,6 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
 {
        struct device_driver *drv = &ap_drv->driver;
 
-       if (!initialised)
-               return -ENODEV;
-
        drv->bus = &ap_bus_type;
        drv->probe = ap_device_probe;
        drv->remove = ap_device_remove;
@@ -929,11 +942,12 @@ static ssize_t ap_domain_store(struct bus_type *bus,
            domain < 0 || domain > ap_max_domain_id ||
            !test_bit_inv(domain, ap_perms.aqm))
                return -EINVAL;
+
        spin_lock_bh(&ap_domain_lock);
        ap_domain_index = domain;
        spin_unlock_bh(&ap_domain_lock);
 
-       AP_DBF(DBF_DEBUG, "stored new default domain=%d\n", domain);
+       AP_DBF(DBF_INFO, "stored new default domain=%d\n", domain);
 
        return count;
 }
@@ -942,45 +956,45 @@ static BUS_ATTR_RW(ap_domain);
 
 static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf)
 {
-       if (!ap_configuration)  /* QCI not supported */
+       if (!ap_qci_info)       /* QCI not supported */
                return scnprintf(buf, PAGE_SIZE, "not supported\n");
 
        return scnprintf(buf, PAGE_SIZE,
                         "0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
-                        ap_configuration->adm[0], ap_configuration->adm[1],
-                        ap_configuration->adm[2], ap_configuration->adm[3],
-                        ap_configuration->adm[4], ap_configuration->adm[5],
-                        ap_configuration->adm[6], ap_configuration->adm[7]);
+                        ap_qci_info->adm[0], ap_qci_info->adm[1],
+                        ap_qci_info->adm[2], ap_qci_info->adm[3],
+                        ap_qci_info->adm[4], ap_qci_info->adm[5],
+                        ap_qci_info->adm[6], ap_qci_info->adm[7]);
 }
 
 static BUS_ATTR_RO(ap_control_domain_mask);
 
 static ssize_t ap_usage_domain_mask_show(struct bus_type *bus, char *buf)
 {
-       if (!ap_configuration)  /* QCI not supported */
+       if (!ap_qci_info)       /* QCI not supported */
                return scnprintf(buf, PAGE_SIZE, "not supported\n");
 
        return scnprintf(buf, PAGE_SIZE,
                         "0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
-                        ap_configuration->aqm[0], ap_configuration->aqm[1],
-                        ap_configuration->aqm[2], ap_configuration->aqm[3],
-                        ap_configuration->aqm[4], ap_configuration->aqm[5],
-                        ap_configuration->aqm[6], ap_configuration->aqm[7]);
+                        ap_qci_info->aqm[0], ap_qci_info->aqm[1],
+                        ap_qci_info->aqm[2], ap_qci_info->aqm[3],
+                        ap_qci_info->aqm[4], ap_qci_info->aqm[5],
+                        ap_qci_info->aqm[6], ap_qci_info->aqm[7]);
 }
 
 static BUS_ATTR_RO(ap_usage_domain_mask);
 
 static ssize_t ap_adapter_mask_show(struct bus_type *bus, char *buf)
 {
-       if (!ap_configuration)  /* QCI not supported */
+       if (!ap_qci_info)       /* QCI not supported */
                return scnprintf(buf, PAGE_SIZE, "not supported\n");
 
        return scnprintf(buf, PAGE_SIZE,
                         "0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
-                        ap_configuration->apm[0], ap_configuration->apm[1],
-                        ap_configuration->apm[2], ap_configuration->apm[3],
-                        ap_configuration->apm[4], ap_configuration->apm[5],
-                        ap_configuration->apm[6], ap_configuration->apm[7]);
+                        ap_qci_info->apm[0], ap_qci_info->apm[1],
+                        ap_qci_info->apm[2], ap_qci_info->apm[3],
+                        ap_qci_info->apm[4], ap_qci_info->apm[5],
+                        ap_qci_info->apm[6], ap_qci_info->apm[7]);
 }
 
 static BUS_ATTR_RO(ap_adapter_mask);
@@ -1066,17 +1080,18 @@ static BUS_ATTR_RW(poll_timeout);
 
 static ssize_t ap_max_domain_id_show(struct bus_type *bus, char *buf)
 {
-       int max_domain_id;
-
-       if (ap_configuration)
-               max_domain_id = ap_max_domain_id ? : -1;
-       else
-               max_domain_id = 15;
-       return scnprintf(buf, PAGE_SIZE, "%d\n", max_domain_id);
+       return scnprintf(buf, PAGE_SIZE, "%d\n", ap_max_domain_id);
 }
 
 static BUS_ATTR_RO(ap_max_domain_id);
 
+static ssize_t ap_max_adapter_id_show(struct bus_type *bus, char *buf)
+{
+       return scnprintf(buf, PAGE_SIZE, "%d\n", ap_max_adapter_id);
+}
+
+static BUS_ATTR_RO(ap_max_adapter_id);
+
 static ssize_t apmask_show(struct bus_type *bus, char *buf)
 {
        int rc;
@@ -1149,6 +1164,7 @@ static struct bus_attribute *const ap_bus_attrs[] = {
        &bus_attr_ap_interrupts,
        &bus_attr_poll_timeout,
        &bus_attr_ap_max_domain_id,
+       &bus_attr_ap_max_adapter_id,
        &bus_attr_apmask,
        &bus_attr_aqmask,
        NULL,
@@ -1160,47 +1176,42 @@ static struct bus_attribute *const ap_bus_attrs[] = {
  */
 static void ap_select_domain(void)
 {
-       int count, max_count, best_domain;
        struct ap_queue_status status;
-       int i, j;
+       int card, dom;
 
        /*
-        * We want to use a single domain. Either the one specified with
-        * the "domain=" parameter or the domain with the maximum number
-        * of devices.
+        * Choose the default domain. Either the one specified with
+        * the "domain=" parameter or the first domain with at least
+        * one valid APQN.
         */
        spin_lock_bh(&ap_domain_lock);
        if (ap_domain_index >= 0) {
                /* Domain has already been selected. */
-               spin_unlock_bh(&ap_domain_lock);
-               return;
+               goto out;
        }
-       best_domain = -1;
-       max_count = 0;
-       for (i = 0; i < AP_DOMAINS; i++) {
-               if (!ap_test_config_usage_domain(i) ||
-                   !test_bit_inv(i, ap_perms.aqm))
+       for (dom = 0; dom <= ap_max_domain_id; dom++) {
+               if (!ap_test_config_usage_domain(dom) ||
+                   !test_bit_inv(dom, ap_perms.aqm))
                        continue;
-               count = 0;
-               for (j = 0; j < AP_DEVICES; j++) {
-                       if (!ap_test_config_card_id(j))
+               for (card = 0; card <= ap_max_adapter_id; card++) {
+                       if (!ap_test_config_card_id(card) ||
+                           !test_bit_inv(card, ap_perms.apm))
                                continue;
-                       status = ap_test_queue(AP_MKQID(j, i),
+                       status = ap_test_queue(AP_MKQID(card, dom),
                                               ap_apft_available(),
                                               NULL);
-                       if (status.response_code != AP_RESPONSE_NORMAL)
-                               continue;
-                       count++;
-               }
-               if (count > max_count) {
-                       max_count = count;
-                       best_domain = i;
+                       if (status.response_code == AP_RESPONSE_NORMAL)
+                               break;
                }
+               if (card <= ap_max_adapter_id)
+                       break;
        }
-       if (best_domain >= 0) {
-               ap_domain_index = best_domain;
-               AP_DBF(DBF_DEBUG, "new ap_domain_index=%d\n", ap_domain_index);
+       if (dom <= ap_max_domain_id) {
+               ap_domain_index = dom;
+               AP_DBF(DBF_DEBUG, "%s new default domain is %d\n",
+                      __func__, ap_domain_index);
        }
+out:
        spin_unlock_bh(&ap_domain_lock);
 }
 
@@ -1279,12 +1290,13 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da
  */
 static void _ap_scan_bus_adapter(int id)
 {
+       bool broken;
        ap_qid_t qid;
        unsigned int func;
        struct ap_card *ac;
        struct device *dev;
        struct ap_queue *aq;
-       int rc, dom, depth, type, comp_type, borked;
+       int rc, dom, depth, type, comp_type;
 
        /* check if there is a card device registered with this id */
        dev = bus_find_device(&ap_bus_type, NULL,
@@ -1312,23 +1324,23 @@ static void _ap_scan_bus_adapter(int id)
                /* find the first valid queue */
                for (dom = 0; dom < AP_DOMAINS; dom++) {
                        qid = AP_MKQID(id, dom);
-                       if (ap_query_queue(qid, &depth, &type, &func) == 0)
+                       if (ap_queue_info(qid, &type, &func, &depth))
                                break;
                }
-               borked = 0;
+               broken = false;
                if (dom >= AP_DOMAINS) {
                        /* no accessible queue on this card */
-                       borked = 1;
+                       broken = true;
                } else if (ac->raw_hwtype != type) {
                        /* card type has changed */
                        AP_DBF(DBF_INFO, "card=%02x type changed.\n", id);
-                       borked = 1;
+                       broken = true;
                } else if (ac->functions != func) {
                        /* card functions have changed */
                        AP_DBF(DBF_INFO, "card=%02x functions changed.\n", id);
-                       borked = 1;
+                       broken = true;
                }
-               if (borked) {
+               if (broken) {
                        /* unregister card device and associated queues */
                        bus_for_each_dev(&ap_bus_type, NULL,
                                         (void *)(long) id,
@@ -1364,16 +1376,14 @@ static void _ap_scan_bus_adapter(int id)
                        continue;
                }
                /* try to fetch infos about this queue */
-               rc = ap_query_queue(qid, &depth, &type, &func);
+               broken = !ap_queue_info(qid, &type, &func, &depth);
                if (dev) {
-                       if (rc == -ENODEV)
-                               borked = 1;
-                       else {
+                       if (!broken) {
                                spin_lock_bh(&aq->lock);
-                               borked = aq->state == AP_STATE_BORKED;
+                               broken = aq->sm_state == AP_SM_STATE_BORKED;
                                spin_unlock_bh(&aq->lock);
                        }
-                       if (borked) {
+                       if (broken) {
                                /* Remove broken device */
                                AP_DBF(DBF_DEBUG,
                                       "removing broken queue=%02x.%04x\n",
@@ -1383,7 +1393,7 @@ static void _ap_scan_bus_adapter(int id)
                        put_device(dev);
                        continue;
                }
-               if (rc)
+               if (broken)
                        continue;
                /* a new queue device is needed, check out comp type */
                comp_type = ap_get_compatible_type(qid, type, func);
@@ -1435,11 +1445,11 @@ static void ap_scan_bus(struct work_struct *unused)
 {
        int id;
 
-       AP_DBF(DBF_DEBUG, "%s running\n", __func__);
-
-       ap_query_configuration(ap_configuration);
+       ap_fetch_qci_info(ap_qci_info);
        ap_select_domain();
 
+       AP_DBF(DBF_DEBUG, "%s running\n", __func__);
+
        /* loop over all possible adapters */
        for (id = 0; id < AP_DEVICES; id++)
                _ap_scan_bus_adapter(id);
@@ -1505,7 +1515,6 @@ static void __init ap_perms_init(void)
  */
 static int __init ap_module_init(void)
 {
-       int max_domain_id;
        int rc, i;
 
        rc = ap_debug_init();
@@ -1524,14 +1533,10 @@ static int __init ap_module_init(void)
        ap_perms_init();
 
        /* Get AP configuration data if available */
-       ap_init_configuration();
-
-       if (ap_configuration)
-               max_domain_id =
-                       ap_max_domain_id ? ap_max_domain_id : AP_DOMAINS - 1;
-       else
-               max_domain_id = 15;
-       if (ap_domain_index < -1 || ap_domain_index > max_domain_id ||
+       ap_init_qci_info();
+
+       /* check default domain setting */
+       if (ap_domain_index < -1 || ap_domain_index > ap_max_domain_id ||
            (ap_domain_index >= 0 &&
             !test_bit_inv(ap_domain_index, ap_perms.aqm))) {
                pr_warn("%d is not a valid cryptographic domain\n",
@@ -1539,6 +1544,7 @@ static int __init ap_module_init(void)
                ap_domain_index = -1;
        }
 
+       /* enable interrupts if available */
        if (ap_interrupts_available()) {
                rc = register_adapter_interrupt(&ap_airq);
                ap_airq_flag = (rc == 0);
@@ -1581,7 +1587,6 @@ static int __init ap_module_init(void)
        }
 
        queue_work(system_long_wq, &ap_scan_work);
-       initialised = true;
 
        return 0;
 
@@ -1595,7 +1600,7 @@ out_bus:
 out:
        if (ap_using_interrupts())
                unregister_adapter_interrupt(&ap_airq);
-       kfree(ap_configuration);
+       kfree(ap_qci_info);
        return rc;
 }
 device_initcall(ap_module_init);
index 053cc34..1a1d5e3 100644 (file)
@@ -83,39 +83,39 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_INTR_ENABLED                1       /* AP interrupt enabled */
 
 /*
- * AP device states
+ * AP queue state machine states
  */
-enum ap_state {
-       AP_STATE_RESET_START,
-       AP_STATE_RESET_WAIT,
-       AP_STATE_SETIRQ_WAIT,
-       AP_STATE_IDLE,
-       AP_STATE_WORKING,
-       AP_STATE_QUEUE_FULL,
-       AP_STATE_REMOVE,        /* about to be removed from driver */
-       AP_STATE_UNBOUND,       /* momentary not bound to a driver */
-       AP_STATE_BORKED,        /* broken */
-       NR_AP_STATES
+enum ap_sm_state {
+       AP_SM_STATE_RESET_START,
+       AP_SM_STATE_RESET_WAIT,
+       AP_SM_STATE_SETIRQ_WAIT,
+       AP_SM_STATE_IDLE,
+       AP_SM_STATE_WORKING,
+       AP_SM_STATE_QUEUE_FULL,
+       AP_SM_STATE_REMOVE,     /* about to be removed from driver */
+       AP_SM_STATE_UNBOUND,    /* momentary not bound to a driver */
+       AP_SM_STATE_BORKED,     /* broken */
+       NR_AP_SM_STATES
 };
 
 /*
- * AP device events
+ * AP queue state machine events
  */
-enum ap_event {
-       AP_EVENT_POLL,
-       AP_EVENT_TIMEOUT,
-       NR_AP_EVENTS
+enum ap_sm_event {
+       AP_SM_EVENT_POLL,
+       AP_SM_EVENT_TIMEOUT,
+       NR_AP_SM_EVENTS
 };
 
 /*
- * AP wait behaviour
+ * AP queue state wait behaviour
  */
-enum ap_wait {
-       AP_WAIT_AGAIN,          /* retry immediately */
-       AP_WAIT_TIMEOUT,        /* wait for timeout */
-       AP_WAIT_INTERRUPT,      /* wait for thin interrupt (if available) */
-       AP_WAIT_NONE,           /* no wait */
-       NR_AP_WAIT
+enum ap_sm_wait {
+       AP_SM_WAIT_AGAIN,       /* retry immediately */
+       AP_SM_WAIT_TIMEOUT,     /* wait for timeout */
+       AP_SM_WAIT_INTERRUPT,   /* wait for thin interrupt (if available) */
+       AP_SM_WAIT_NONE,        /* no wait */
+       NR_AP_SM_WAIT
 };
 
 struct ap_device;
@@ -172,7 +172,7 @@ struct ap_queue {
        ap_qid_t qid;                   /* AP queue id. */
        int interrupt;                  /* indicate if interrupts are enabled */
        int queue_count;                /* # messages currently on AP queue. */
-       enum ap_state state;            /* State of the AP device. */
+       enum ap_sm_state sm_state;      /* ap queue state machine state */
        int pendingq_count;             /* # requests on pendingq list. */
        int requestq_count;             /* # requests on requestq list. */
        u64 total_request_count;        /* # requests ever for this AP device.*/
@@ -185,22 +185,23 @@ struct ap_queue {
 
 #define to_ap_queue(x) container_of((x), struct ap_queue, ap_dev.device)
 
-typedef enum ap_wait (ap_func_t)(struct ap_queue *queue);
+typedef enum ap_sm_wait (ap_func_t)(struct ap_queue *queue);
 
 struct ap_message {
        struct list_head list;          /* Request queueing. */
        unsigned long long psmid;       /* Message id. */
-       void *message;                  /* Pointer to message buffer. */
-       size_t length;                  /* Message length. */
+       void *msg;                      /* Pointer to message buffer. */
+       unsigned int len;               /* Message length. */
+       u32 flags;                      /* Flags, see AP_MSG_FLAG_xxx */
        int rc;                         /* Return code for this message */
-
        void *private;                  /* ap driver private pointer. */
-       unsigned int special:1;         /* Used for special commands. */
        /* receive is called from tasklet context */
        void (*receive)(struct ap_queue *, struct ap_message *,
                        struct ap_message *);
 };
 
+#define AP_MSG_FLAG_SPECIAL  (1 << 16) /* flag msg as 'special' with NQAP */
+
 /**
  * ap_init_message() - Initialize ap_message.
  * Initialize a message before using. Otherwise this might result in
@@ -218,7 +219,7 @@ static inline void ap_init_message(struct ap_message *ap_msg)
  */
 static inline void ap_release_message(struct ap_message *ap_msg)
 {
-       kzfree(ap_msg->message);
+       kzfree(ap_msg->msg);
        kzfree(ap_msg->private);
 }
 
@@ -230,15 +231,15 @@ static inline void ap_release_message(struct ap_message *ap_msg)
 int ap_send(ap_qid_t, unsigned long long, void *, size_t);
 int ap_recv(ap_qid_t, unsigned long long *, void *, size_t);
 
-enum ap_wait ap_sm_event(struct ap_queue *aq, enum ap_event event);
-enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event);
+enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event);
+enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event);
 
 void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg);
 void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg);
 void ap_flush_queue(struct ap_queue *aq);
 
 void *ap_airq_ptr(void);
-void ap_wait(enum ap_wait wait);
+void ap_wait(enum ap_sm_wait wait);
 void ap_request_timeout(struct timer_list *t);
 void ap_bus_force_rescan(void);
 
index 73b077d..688ebeb 100644 (file)
@@ -69,9 +69,9 @@ static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind)
  */
 static inline struct ap_queue_status
 __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length,
-         unsigned int special)
+         int special)
 {
-       if (special == 1)
+       if (special)
                qid |= 0x400000UL;
        return ap_nqap(qid, psmid, msg, length);
 }
@@ -119,9 +119,9 @@ EXPORT_SYMBOL(ap_recv);
 
 /* State machine definitions and helpers */
 
-static enum ap_wait ap_sm_nop(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_nop(struct ap_queue *aq)
 {
-       return AP_WAIT_NONE;
+       return AP_SM_WAIT_NONE;
 }
 
 /**
@@ -129,7 +129,7 @@ static enum ap_wait ap_sm_nop(struct ap_queue *aq)
  *     not change the state of the device.
  * @aq: pointer to the AP queue
  *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT
  */
 static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
 {
@@ -137,7 +137,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
        struct ap_message *ap_msg;
 
        status = ap_dqap(aq->qid, &aq->reply->psmid,
-                        aq->reply->message, aq->reply->length);
+                        aq->reply->msg, aq->reply->len);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
                aq->queue_count--;
@@ -172,31 +172,31 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
  * ap_sm_read(): Receive pending reply messages from an AP queue.
  * @aq: pointer to the AP queue
  *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT
  */
-static enum ap_wait ap_sm_read(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
 {
        struct ap_queue_status status;
 
        if (!aq->reply)
-               return AP_WAIT_NONE;
+               return AP_SM_WAIT_NONE;
        status = ap_sm_recv(aq);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
                if (aq->queue_count > 0) {
-                       aq->state = AP_STATE_WORKING;
-                       return AP_WAIT_AGAIN;
+                       aq->sm_state = AP_SM_STATE_WORKING;
+                       return AP_SM_WAIT_AGAIN;
                }
-               aq->state = AP_STATE_IDLE;
-               return AP_WAIT_NONE;
+               aq->sm_state = AP_SM_STATE_IDLE;
+               return AP_SM_WAIT_NONE;
        case AP_RESPONSE_NO_PENDING_REPLY:
                if (aq->queue_count > 0)
-                       return AP_WAIT_INTERRUPT;
-               aq->state = AP_STATE_IDLE;
-               return AP_WAIT_NONE;
+                       return AP_SM_WAIT_INTERRUPT;
+               aq->sm_state = AP_SM_STATE_IDLE;
+               return AP_SM_WAIT_NONE;
        default:
-               aq->state = AP_STATE_BORKED;
-               return AP_WAIT_NONE;
+               aq->sm_state = AP_SM_STATE_BORKED;
+               return AP_SM_WAIT_NONE;
        }
 }
 
@@ -204,19 +204,20 @@ static enum ap_wait ap_sm_read(struct ap_queue *aq)
  * ap_sm_write(): Send messages from the request queue to an AP queue.
  * @aq: pointer to the AP queue
  *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT
  */
-static enum ap_wait ap_sm_write(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
 {
        struct ap_queue_status status;
        struct ap_message *ap_msg;
 
        if (aq->requestq_count <= 0)
-               return AP_WAIT_NONE;
+               return AP_SM_WAIT_NONE;
        /* Start the next request on the queue. */
        ap_msg = list_entry(aq->requestq.next, struct ap_message, list);
        status = __ap_send(aq->qid, ap_msg->psmid,
-                          ap_msg->message, ap_msg->length, ap_msg->special);
+                          ap_msg->msg, ap_msg->len,
+                          ap_msg->flags & AP_MSG_FLAG_SPECIAL);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
                aq->queue_count++;
@@ -226,26 +227,26 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq)
                aq->requestq_count--;
                aq->pendingq_count++;
                if (aq->queue_count < aq->card->queue_depth) {
-                       aq->state = AP_STATE_WORKING;
-                       return AP_WAIT_AGAIN;
+                       aq->sm_state = AP_SM_STATE_WORKING;
+                       return AP_SM_WAIT_AGAIN;
                }
                fallthrough;
        case AP_RESPONSE_Q_FULL:
-               aq->state = AP_STATE_QUEUE_FULL;
-               return AP_WAIT_INTERRUPT;
+               aq->sm_state = AP_SM_STATE_QUEUE_FULL;
+               return AP_SM_WAIT_INTERRUPT;
        case AP_RESPONSE_RESET_IN_PROGRESS:
-               aq->state = AP_STATE_RESET_WAIT;
-               return AP_WAIT_TIMEOUT;
+               aq->sm_state = AP_SM_STATE_RESET_WAIT;
+               return AP_SM_WAIT_TIMEOUT;
        case AP_RESPONSE_MESSAGE_TOO_BIG:
        case AP_RESPONSE_REQ_FAC_NOT_INST:
                list_del_init(&ap_msg->list);
                aq->requestq_count--;
                ap_msg->rc = -EINVAL;
                ap_msg->receive(aq, ap_msg, NULL);
-               return AP_WAIT_AGAIN;
+               return AP_SM_WAIT_AGAIN;
        default:
-               aq->state = AP_STATE_BORKED;
-               return AP_WAIT_NONE;
+               aq->sm_state = AP_SM_STATE_BORKED;
+               return AP_SM_WAIT_NONE;
        }
 }
 
@@ -253,9 +254,9 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq)
  * ap_sm_read_write(): Send and receive messages to/from an AP queue.
  * @aq: pointer to the AP queue
  *
- * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT
  */
-static enum ap_wait ap_sm_read_write(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_read_write(struct ap_queue *aq)
 {
        return min(ap_sm_read(aq), ap_sm_write(aq));
 }
@@ -266,7 +267,7 @@ static enum ap_wait ap_sm_read_write(struct ap_queue *aq)
  *
  * Submit the Reset command to an AP queue.
  */
-static enum ap_wait ap_sm_reset(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
 {
        struct ap_queue_status status;
 
@@ -274,17 +275,17 @@ static enum ap_wait ap_sm_reset(struct ap_queue *aq)
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
        case AP_RESPONSE_RESET_IN_PROGRESS:
-               aq->state = AP_STATE_RESET_WAIT;
+               aq->sm_state = AP_SM_STATE_RESET_WAIT;
                aq->interrupt = AP_INTR_DISABLED;
-               return AP_WAIT_TIMEOUT;
+               return AP_SM_WAIT_TIMEOUT;
        case AP_RESPONSE_BUSY:
-               return AP_WAIT_TIMEOUT;
+               return AP_SM_WAIT_TIMEOUT;
        case AP_RESPONSE_Q_NOT_AVAIL:
        case AP_RESPONSE_DECONFIGURED:
        case AP_RESPONSE_CHECKSTOPPED:
        default:
-               aq->state = AP_STATE_BORKED;
-               return AP_WAIT_NONE;
+               aq->sm_state = AP_SM_STATE_BORKED;
+               return AP_SM_WAIT_NONE;
        }
 }
 
@@ -294,7 +295,7 @@ static enum ap_wait ap_sm_reset(struct ap_queue *aq)
  *
  * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
  */
-static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq)
 {
        struct ap_queue_status status;
        void *lsi_ptr;
@@ -310,20 +311,20 @@ static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq)
        case AP_RESPONSE_NORMAL:
                lsi_ptr = ap_airq_ptr();
                if (lsi_ptr && ap_queue_enable_interruption(aq, lsi_ptr) == 0)
-                       aq->state = AP_STATE_SETIRQ_WAIT;
+                       aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
                else
-                       aq->state = (aq->queue_count > 0) ?
-                               AP_STATE_WORKING : AP_STATE_IDLE;
-               return AP_WAIT_AGAIN;
+                       aq->sm_state = (aq->queue_count > 0) ?
+                               AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
+               return AP_SM_WAIT_AGAIN;
        case AP_RESPONSE_BUSY:
        case AP_RESPONSE_RESET_IN_PROGRESS:
-               return AP_WAIT_TIMEOUT;
+               return AP_SM_WAIT_TIMEOUT;
        case AP_RESPONSE_Q_NOT_AVAIL:
        case AP_RESPONSE_DECONFIGURED:
        case AP_RESPONSE_CHECKSTOPPED:
        default:
-               aq->state = AP_STATE_BORKED;
-               return AP_WAIT_NONE;
+               aq->sm_state = AP_SM_STATE_BORKED;
+               return AP_SM_WAIT_NONE;
        }
 }
 
@@ -333,7 +334,7 @@ static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq)
  *
  * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
  */
-static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq)
+static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
 {
        struct ap_queue_status status;
 
@@ -347,75 +348,75 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq)
        if (status.irq_enabled == 1) {
                /* Irqs are now enabled */
                aq->interrupt = AP_INTR_ENABLED;
-               aq->state = (aq->queue_count > 0) ?
-                       AP_STATE_WORKING : AP_STATE_IDLE;
+               aq->sm_state = (aq->queue_count > 0) ?
+                       AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
        }
 
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
                if (aq->queue_count > 0)
-                       return AP_WAIT_AGAIN;
+                       return AP_SM_WAIT_AGAIN;
                fallthrough;
        case AP_RESPONSE_NO_PENDING_REPLY:
-               return AP_WAIT_TIMEOUT;
+               return AP_SM_WAIT_TIMEOUT;
        default:
-               aq->state = AP_STATE_BORKED;
-               return AP_WAIT_NONE;
+               aq->sm_state = AP_SM_STATE_BORKED;
+               return AP_SM_WAIT_NONE;
        }
 }
 
 /*
  * AP state machine jump table
  */
-static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
-       [AP_STATE_RESET_START] = {
-               [AP_EVENT_POLL] = ap_sm_reset,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = {
+       [AP_SM_STATE_RESET_START] = {
+               [AP_SM_EVENT_POLL] = ap_sm_reset,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
-       [AP_STATE_RESET_WAIT] = {
-               [AP_EVENT_POLL] = ap_sm_reset_wait,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+       [AP_SM_STATE_RESET_WAIT] = {
+               [AP_SM_EVENT_POLL] = ap_sm_reset_wait,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
-       [AP_STATE_SETIRQ_WAIT] = {
-               [AP_EVENT_POLL] = ap_sm_setirq_wait,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+       [AP_SM_STATE_SETIRQ_WAIT] = {
+               [AP_SM_EVENT_POLL] = ap_sm_setirq_wait,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
-       [AP_STATE_IDLE] = {
-               [AP_EVENT_POLL] = ap_sm_write,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+       [AP_SM_STATE_IDLE] = {
+               [AP_SM_EVENT_POLL] = ap_sm_write,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
-       [AP_STATE_WORKING] = {
-               [AP_EVENT_POLL] = ap_sm_read_write,
-               [AP_EVENT_TIMEOUT] = ap_sm_reset,
+       [AP_SM_STATE_WORKING] = {
+               [AP_SM_EVENT_POLL] = ap_sm_read_write,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_reset,
        },
-       [AP_STATE_QUEUE_FULL] = {
-               [AP_EVENT_POLL] = ap_sm_read,
-               [AP_EVENT_TIMEOUT] = ap_sm_reset,
+       [AP_SM_STATE_QUEUE_FULL] = {
+               [AP_SM_EVENT_POLL] = ap_sm_read,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_reset,
        },
-       [AP_STATE_REMOVE] = {
-               [AP_EVENT_POLL] = ap_sm_nop,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+       [AP_SM_STATE_REMOVE] = {
+               [AP_SM_EVENT_POLL] = ap_sm_nop,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
-       [AP_STATE_UNBOUND] = {
-               [AP_EVENT_POLL] = ap_sm_nop,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+       [AP_SM_STATE_UNBOUND] = {
+               [AP_SM_EVENT_POLL] = ap_sm_nop,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
-       [AP_STATE_BORKED] = {
-               [AP_EVENT_POLL] = ap_sm_nop,
-               [AP_EVENT_TIMEOUT] = ap_sm_nop,
+       [AP_SM_STATE_BORKED] = {
+               [AP_SM_EVENT_POLL] = ap_sm_nop,
+               [AP_SM_EVENT_TIMEOUT] = ap_sm_nop,
        },
 };
 
-enum ap_wait ap_sm_event(struct ap_queue *aq, enum ap_event event)
+enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event)
 {
-       return ap_jumptable[aq->state][event](aq);
+       return ap_jumptable[aq->sm_state][event](aq);
 }
 
-enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event)
+enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event)
 {
-       enum ap_wait wait;
+       enum ap_sm_wait wait;
 
-       while ((wait = ap_sm_event(aq, event)) == AP_WAIT_AGAIN)
+       while ((wait = ap_sm_event(aq, event)) == AP_SM_WAIT_AGAIN)
                ;
        return wait;
 }
@@ -486,13 +487,13 @@ static ssize_t reset_show(struct device *dev,
        int rc = 0;
 
        spin_lock_bh(&aq->lock);
-       switch (aq->state) {
-       case AP_STATE_RESET_START:
-       case AP_STATE_RESET_WAIT:
+       switch (aq->sm_state) {
+       case AP_SM_STATE_RESET_START:
+       case AP_SM_STATE_RESET_WAIT:
                rc = scnprintf(buf, PAGE_SIZE, "Reset in progress.\n");
                break;
-       case AP_STATE_WORKING:
-       case AP_STATE_QUEUE_FULL:
+       case AP_SM_STATE_WORKING:
+       case AP_SM_STATE_QUEUE_FULL:
                rc = scnprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
                break;
        default:
@@ -510,8 +511,8 @@ static ssize_t reset_store(struct device *dev,
 
        spin_lock_bh(&aq->lock);
        __ap_flush_queue(aq);
-       aq->state = AP_STATE_RESET_START;
-       ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
+       aq->sm_state = AP_SM_STATE_RESET_START;
+       ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL));
        spin_unlock_bh(&aq->lock);
 
        AP_DBF(DBF_INFO, "reset queue=%02x.%04x triggered by user\n",
@@ -529,7 +530,7 @@ static ssize_t interrupt_show(struct device *dev,
        int rc = 0;
 
        spin_lock_bh(&aq->lock);
-       if (aq->state == AP_STATE_SETIRQ_WAIT)
+       if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
                rc = scnprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
        else if (aq->interrupt == AP_INTR_ENABLED)
                rc = scnprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
@@ -586,7 +587,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
        aq->ap_dev.device.type = &ap_queue_type;
        aq->ap_dev.device_type = device_type;
        aq->qid = qid;
-       aq->state = AP_STATE_UNBOUND;
+       aq->sm_state = AP_SM_STATE_UNBOUND;
        aq->interrupt = AP_INTR_DISABLED;
        spin_lock_init(&aq->lock);
        INIT_LIST_HEAD(&aq->pendingq);
@@ -601,7 +602,7 @@ void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *reply)
        aq->reply = reply;
 
        spin_lock_bh(&aq->lock);
-       ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
+       ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL));
        spin_unlock_bh(&aq->lock);
 }
 EXPORT_SYMBOL(ap_queue_init_reply);
@@ -625,7 +626,7 @@ void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg)
        aq->total_request_count++;
        atomic64_inc(&aq->card->total_request_count);
        /* Send/receive as many request from the queue as possible. */
-       ap_wait(ap_sm_event_loop(aq, AP_EVENT_POLL));
+       ap_wait(ap_sm_event_loop(aq, AP_SM_EVENT_POLL));
        spin_unlock_bh(&aq->lock);
 }
 EXPORT_SYMBOL(ap_queue_message);
@@ -698,7 +699,7 @@ void ap_queue_prepare_remove(struct ap_queue *aq)
        /* flush queue */
        __ap_flush_queue(aq);
        /* set REMOVE state to prevent new messages are queued in */
-       aq->state = AP_STATE_REMOVE;
+       aq->sm_state = AP_SM_STATE_REMOVE;
        spin_unlock_bh(&aq->lock);
        del_timer_sync(&aq->timeout);
 }
@@ -707,22 +708,22 @@ void ap_queue_remove(struct ap_queue *aq)
 {
        /*
         * all messages have been flushed and the state is
-        * AP_STATE_REMOVE. Now reset with zero which also
+        * AP_SM_STATE_REMOVE. Now reset with zero which also
         * clears the irq registration and move the state
-        * to AP_STATE_UNBOUND to signal that this queue
+        * to AP_SM_STATE_UNBOUND to signal that this queue
         * is not used by any driver currently.
         */
        spin_lock_bh(&aq->lock);
        ap_zapq(aq->qid);
-       aq->state = AP_STATE_UNBOUND;
+       aq->sm_state = AP_SM_STATE_UNBOUND;
        spin_unlock_bh(&aq->lock);
 }
 
 void ap_queue_init_state(struct ap_queue *aq)
 {
        spin_lock_bh(&aq->lock);
-       aq->state = AP_STATE_RESET_START;
-       ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
+       aq->sm_state = AP_SM_STATE_RESET_START;
+       ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL));
        spin_unlock_bh(&aq->lock);
 }
 EXPORT_SYMBOL(ap_queue_init_state);
index 74e63ec..d5880f5 100644 (file)
@@ -1603,8 +1603,8 @@ static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits,
                if (rc == 0)
                        break;
        }
-               if (rc)
-                       return rc;
+       if (rc)
+               return rc;
 
        if (is_xts) {
                keysize = CCACIPHERTOKENSIZE;
index 56a405d..4dbbfd8 100644 (file)
@@ -634,7 +634,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
 {
        struct zcrypt_card *zc, *pref_zc;
        struct zcrypt_queue *zq, *pref_zq;
-       unsigned int weight, pref_weight;
+       unsigned int weight = 0, pref_weight = 0;
        unsigned int func_code;
        int qid = 0, rc = -ENODEV;
        struct module *mod;
@@ -718,7 +718,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
 {
        struct zcrypt_card *zc, *pref_zc;
        struct zcrypt_queue *zq, *pref_zq;
-       unsigned int weight, pref_weight;
+       unsigned int weight = 0, pref_weight = 0;
        unsigned int func_code;
        int qid = 0, rc = -ENODEV;
        struct module *mod;
@@ -803,7 +803,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
        struct zcrypt_card *zc, *pref_zc;
        struct zcrypt_queue *zq, *pref_zq;
        struct ap_message ap_msg;
-       unsigned int weight, pref_weight;
+       unsigned int weight = 0, pref_weight = 0;
        unsigned int func_code;
        unsigned short *domain, tdom;
        int qid = 0, rc = -ENODEV;
@@ -822,7 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
         * domain but a control only domain, use the default domain as target.
         */
        tdom = *domain;
-       if (tdom >= 0 && tdom < AP_DOMAINS &&
+       if (tdom < AP_DOMAINS &&
            !ap_test_config_usage_domain(tdom) &&
            ap_test_config_ctrl_domain(tdom) &&
            ap_domain_index >= 0)
@@ -931,7 +931,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms,
        struct zcrypt_queue *zq, *pref_zq;
        struct ep11_target_dev *targets;
        unsigned short target_num;
-       unsigned int weight, pref_weight;
+       unsigned int weight = 0, pref_weight = 0;
        unsigned int func_code;
        struct ap_message ap_msg;
        int qid = 0, rc = -ENODEV;
@@ -1040,7 +1040,7 @@ static long zcrypt_rng(char *buffer)
 {
        struct zcrypt_card *zc, *pref_zc;
        struct zcrypt_queue *zq, *pref_zq;
-       unsigned int weight, pref_weight;
+       unsigned int weight = 0, pref_weight = 0;
        unsigned int func_code;
        struct ap_message ap_msg;
        unsigned int domain;
@@ -1298,99 +1298,119 @@ static int zcrypt_requestq_count(void)
        return requestq_count;
 }
 
-static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
-                                 unsigned long arg)
+static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg)
 {
        int rc;
-       struct ap_perms *perms =
-               (struct ap_perms *) filp->private_data;
+       struct ica_rsa_modexpo mex;
+       struct ica_rsa_modexpo __user *umex = (void __user *) arg;
 
-       rc = zcrypt_check_ioctl(perms, cmd);
-       if (rc)
-               return rc;
-
-       switch (cmd) {
-       case ICARSAMODEXPO: {
-               struct ica_rsa_modexpo __user *umex = (void __user *) arg;
-               struct ica_rsa_modexpo mex;
-
-               if (copy_from_user(&mex, umex, sizeof(mex)))
-                       return -EFAULT;
+       if (copy_from_user(&mex, umex, sizeof(mex)))
+               return -EFAULT;
+       do {
+               rc = zcrypt_rsa_modexpo(perms, &mex);
+       } while (rc == -EAGAIN);
+       /* on failure: retry once again after a requested rescan */
+       if ((rc == -ENODEV) && (zcrypt_process_rescan()))
                do {
                        rc = zcrypt_rsa_modexpo(perms, &mex);
                } while (rc == -EAGAIN);
-               /* on failure: retry once again after a requested rescan */
-               if ((rc == -ENODEV) && (zcrypt_process_rescan()))
-                       do {
-                               rc = zcrypt_rsa_modexpo(perms, &mex);
-                       } while (rc == -EAGAIN);
-               if (rc) {
-                       ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc);
-                       return rc;
-               }
-               return put_user(mex.outputdatalength, &umex->outputdatalength);
+       if (rc) {
+               ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc);
+               return rc;
        }
-       case ICARSACRT: {
-               struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg;
-               struct ica_rsa_modexpo_crt crt;
+       return put_user(mex.outputdatalength, &umex->outputdatalength);
+}
 
-               if (copy_from_user(&crt, ucrt, sizeof(crt)))
-                       return -EFAULT;
+static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg)
+{
+       int rc;
+       struct ica_rsa_modexpo_crt crt;
+       struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg;
+
+       if (copy_from_user(&crt, ucrt, sizeof(crt)))
+               return -EFAULT;
+       do {
+               rc = zcrypt_rsa_crt(perms, &crt);
+       } while (rc == -EAGAIN);
+       /* on failure: retry once again after a requested rescan */
+       if ((rc == -ENODEV) && (zcrypt_process_rescan()))
                do {
                        rc = zcrypt_rsa_crt(perms, &crt);
                } while (rc == -EAGAIN);
-               /* on failure: retry once again after a requested rescan */
-               if ((rc == -ENODEV) && (zcrypt_process_rescan()))
-                       do {
-                               rc = zcrypt_rsa_crt(perms, &crt);
-                       } while (rc == -EAGAIN);
-               if (rc) {
-                       ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc);
-                       return rc;
-               }
-               return put_user(crt.outputdatalength, &ucrt->outputdatalength);
+       if (rc) {
+               ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc);
+               return rc;
        }
-       case ZSECSENDCPRB: {
-               struct ica_xcRB __user *uxcRB = (void __user *) arg;
-               struct ica_xcRB xcRB;
+       return put_user(crt.outputdatalength, &ucrt->outputdatalength);
+}
 
-               if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB)))
-                       return -EFAULT;
+static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg)
+{
+       int rc;
+       struct ica_xcRB xcRB;
+       struct ica_xcRB __user *uxcRB = (void __user *) arg;
+
+       if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB)))
+               return -EFAULT;
+       do {
+               rc = _zcrypt_send_cprb(perms, &xcRB);
+       } while (rc == -EAGAIN);
+       /* on failure: retry once again after a requested rescan */
+       if ((rc == -ENODEV) && (zcrypt_process_rescan()))
                do {
                        rc = _zcrypt_send_cprb(perms, &xcRB);
                } while (rc == -EAGAIN);
-               /* on failure: retry once again after a requested rescan */
-               if ((rc == -ENODEV) && (zcrypt_process_rescan()))
-                       do {
-                               rc = _zcrypt_send_cprb(perms, &xcRB);
-                       } while (rc == -EAGAIN);
-               if (rc)
-                       ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n",
-                                  rc, xcRB.status);
-               if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB)))
-                       return -EFAULT;
-               return rc;
-       }
-       case ZSENDEP11CPRB: {
-               struct ep11_urb __user *uxcrb = (void __user *)arg;
-               struct ep11_urb xcrb;
+       if (rc)
+               ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n",
+                          rc, xcRB.status);
+       if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB)))
+               return -EFAULT;
+       return rc;
+}
 
-               if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb)))
-                       return -EFAULT;
+static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg)
+{
+       int rc;
+       struct ep11_urb xcrb;
+       struct ep11_urb __user *uxcrb = (void __user *)arg;
+
+       if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb)))
+               return -EFAULT;
+       do {
+               rc = _zcrypt_send_ep11_cprb(perms, &xcrb);
+       } while (rc == -EAGAIN);
+       /* on failure: retry once again after a requested rescan */
+       if ((rc == -ENODEV) && (zcrypt_process_rescan()))
                do {
                        rc = _zcrypt_send_ep11_cprb(perms, &xcrb);
                } while (rc == -EAGAIN);
-               /* on failure: retry once again after a requested rescan */
-               if ((rc == -ENODEV) && (zcrypt_process_rescan()))
-                       do {
-                               rc = _zcrypt_send_ep11_cprb(perms, &xcrb);
-                       } while (rc == -EAGAIN);
-               if (rc)
-                       ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc);
-               if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
-                       return -EFAULT;
+       if (rc)
+               ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc);
+       if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
+               return -EFAULT;
+       return rc;
+}
+
+static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
+                                 unsigned long arg)
+{
+       int rc;
+       struct ap_perms *perms =
+               (struct ap_perms *) filp->private_data;
+
+       rc = zcrypt_check_ioctl(perms, cmd);
+       if (rc)
                return rc;
-       }
+
+       switch (cmd) {
+       case ICARSAMODEXPO:
+               return icarsamodexpo_ioctl(perms, arg);
+       case ICARSACRT:
+               return icarsacrt_ioctl(perms, arg);
+       case ZSECSENDCPRB:
+               return zsecsendcprb_ioctl(perms, arg);
+       case ZSENDEP11CPRB:
+               return zsendep11cprb_ioctl(perms, arg);
        case ZCRYPT_DEVICE_STATUS: {
                struct zcrypt_device_status_ext *device_status;
                size_t total_size = MAX_ZDEV_ENTRIES_EXT
index 1b83539..3f5b613 100644 (file)
@@ -205,9 +205,9 @@ static int alloc_and_prep_cprbmem(size_t paramblen,
        preqcblk->rpl_msgbl = cprbplusparamblen;
        if (paramblen) {
                preqcblk->req_parmb =
-                       ((u8 *) preqcblk) + sizeof(struct CPRBX);
+                       ((u8 __user *) preqcblk) + sizeof(struct CPRBX);
                preqcblk->rpl_parmb =
-                       ((u8 *) prepcblk) + sizeof(struct CPRBX);
+                       ((u8 __user *) prepcblk) + sizeof(struct CPRBX);
        }
 
        *pcprbmem = cprbmem;
@@ -274,7 +274,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
 {
        int i, rc, keysize;
        int seckeysize;
-       u8 *mem;
+       u8 *mem, *ptr;
        struct CPRBX *preqcblk, *prepcblk;
        struct ica_xcRB xcrb;
        struct kgreqparm {
@@ -320,7 +320,7 @@ int cca_genseckey(u16 cardnr, u16 domain,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with KG request */
-       preqparm = (struct kgreqparm *) preqcblk->req_parmb;
+       preqparm = (struct kgreqparm __force *) preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "KG", 2);
        preqparm->rule_array_len = sizeof(preqparm->rule_array_len);
        preqparm->lv1.len = sizeof(struct lv1);
@@ -377,8 +377,9 @@ int cca_genseckey(u16 cardnr, u16 domain,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct kgrepparm *) prepcblk->rpl_parmb;
+       ptr =  ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct kgrepparm *) ptr;
 
        /* check length of the returned secure key token */
        seckeysize = prepparm->lv3.keyblock.toklen
@@ -415,7 +416,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
                   const u8 *clrkey, u8 seckey[SECKEYBLOBSIZE])
 {
        int rc, keysize, seckeysize;
-       u8 *mem;
+       u8 *mem, *ptr;
        struct CPRBX *preqcblk, *prepcblk;
        struct ica_xcRB xcrb;
        struct cmreqparm {
@@ -460,7 +461,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with CM request */
-       preqparm = (struct cmreqparm *) preqcblk->req_parmb;
+       preqparm = (struct cmreqparm __force *) preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "CM", 2);
        memcpy(preqparm->rule_array, "AES     ", 8);
        preqparm->rule_array_len =
@@ -514,8 +515,9 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct cmrepparm *) prepcblk->rpl_parmb;
+       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct cmrepparm *) ptr;
 
        /* check length of the returned secure key token */
        seckeysize = prepparm->lv3.keyblock.toklen
@@ -554,7 +556,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
                    u8 *protkey, u32 *protkeylen, u32 *protkeytype)
 {
        int rc;
-       u8 *mem;
+       u8 *mem, *ptr;
        struct CPRBX *preqcblk, *prepcblk;
        struct ica_xcRB xcrb;
        struct uskreqparm {
@@ -605,7 +607,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with USK request */
-       preqparm = (struct uskreqparm *) preqcblk->req_parmb;
+       preqparm = (struct uskreqparm __force *) preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "US", 2);
        preqparm->rule_array_len = sizeof(preqparm->rule_array_len);
        preqparm->lv1.len = sizeof(struct lv1);
@@ -646,8 +648,9 @@ int cca_sec2protkey(u16 cardnr, u16 domain,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct uskrepparm *) prepcblk->rpl_parmb;
+       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct uskrepparm *) ptr;
 
        /* check the returned keyblock */
        if (prepparm->lv3.ckb.version != 0x01 &&
@@ -714,7 +717,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
                     u8 *keybuf, size_t *keybufsize)
 {
        int rc;
-       u8 *mem;
+       u8 *mem, *ptr;
        struct CPRBX *preqcblk, *prepcblk;
        struct ica_xcRB xcrb;
        struct gkreqparm {
@@ -796,7 +799,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
        preqcblk->req_parml = sizeof(struct gkreqparm);
 
        /* prepare request param block with GK request */
-       preqparm = (struct gkreqparm *) preqcblk->req_parmb;
+       preqparm = (struct gkreqparm __force *) preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "GK", 2);
        preqparm->rule_array_len =  sizeof(uint16_t) + 2 * 8;
        memcpy(preqparm->rule_array, "AES     OP      ", 2*8);
@@ -867,8 +870,9 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct gkrepparm *) prepcblk->rpl_parmb;
+       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct gkrepparm *) ptr;
 
        /* do some plausibility checks on the key block */
        if (prepparm->kb.len < 120 + 5 * sizeof(uint16_t) ||
@@ -917,7 +921,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
                           int *key_token_size)
 {
        int rc, n;
-       u8 *mem;
+       u8 *mem, *ptr;
        struct CPRBX *preqcblk, *prepcblk;
        struct ica_xcRB xcrb;
        struct rule_array_block {
@@ -974,7 +978,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        preqcblk->req_parml = 0;
 
        /* prepare request param block with IP request */
-       preq_ra_block = (struct rule_array_block *) preqcblk->req_parmb;
+       preq_ra_block = (struct rule_array_block __force *) preqcblk->req_parmb;
        memcpy(preq_ra_block->subfunc_code, "IP", 2);
        preq_ra_block->rule_array_len =  sizeof(uint16_t) + 2 * 8;
        memcpy(preq_ra_block->rule_array, rule_array_1, 8);
@@ -987,7 +991,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        }
 
        /* prepare vud block */
-       preq_vud_block = (struct vud_block *)
+       preq_vud_block = (struct vud_block __force *)
                (preqcblk->req_parmb + preqcblk->req_parml);
        n = complete ? 0 : (clr_key_bit_size + 7) / 8;
        preq_vud_block->len = sizeof(struct vud_block) + n;
@@ -1001,7 +1005,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        preqcblk->req_parml += preq_vud_block->len;
 
        /* prepare key block */
-       preq_key_block = (struct key_block *)
+       preq_key_block = (struct key_block __force *)
                (preqcblk->req_parmb + preqcblk->req_parml);
        n = *key_token_size;
        preq_key_block->len = sizeof(struct key_block) + n;
@@ -1034,8 +1038,9 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct iprepparm *) prepcblk->rpl_parmb;
+       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct iprepparm *) ptr;
 
        /* do some plausibility checks on the key block */
        if (prepparm->kb.len < 120 + 3 * sizeof(uint16_t) ||
@@ -1151,7 +1156,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
                       u8 *protkey, u32 *protkeylen, u32 *protkeytype)
 {
        int rc;
-       u8 *mem;
+       u8 *mem, *ptr;
        struct CPRBX *preqcblk, *prepcblk;
        struct ica_xcRB xcrb;
        struct aureqparm {
@@ -1208,7 +1213,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with AU request */
-       preqparm = (struct aureqparm *) preqcblk->req_parmb;
+       preqparm = (struct aureqparm __force *) preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "AU", 2);
        preqparm->rule_array_len =
                sizeof(preqparm->rule_array_len)
@@ -1257,8 +1262,9 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct aurepparm *) prepcblk->rpl_parmb;
+       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct aurepparm *) ptr;
 
        /* check the returned keyblock */
        if (prepparm->vud.ckb.version != 0x01 &&
@@ -1347,7 +1353,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
        preqcblk->domain = domain;
 
        /* fill request cprb param block with FQ request */
-       preqparm = (struct fqreqparm *) preqcblk->req_parmb;
+       preqparm = (struct fqreqparm __force *) preqcblk->req_parmb;
        memcpy(preqparm->subfunc_code, "FQ", 2);
        memcpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array));
        preqparm->rule_array_len =
@@ -1378,8 +1384,9 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain,
        }
 
        /* process response cprb param block */
-       prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
-       prepparm = (struct fqrepparm *) prepcblk->rpl_parmb;
+       ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX);
+       prepcblk->rpl_parmb = (u8 __user *) ptr;
+       prepparm = (struct fqrepparm *) ptr;
        ptr = prepparm->lvdata;
 
        /* check and possibly copy reply rule array */
index 2664401..f00127a 100644 (file)
@@ -25,6 +25,7 @@
 #include "zcrypt_msgtype6.h"
 #include "zcrypt_cex2c.h"
 #include "zcrypt_cca_key.h"
+#include "zcrypt_ccamisc.h"
 
 #define CEX2C_MIN_MOD_SIZE      16     /*  128 bits    */
 #define CEX2C_MAX_MOD_SIZE     256     /* 2048 bits    */
@@ -58,6 +59,91 @@ static struct ap_device_id zcrypt_cex2c_queue_ids[] = {
 
 MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_queue_ids);
 
+/*
+ * CCA card additional device attributes
+ */
+static ssize_t cca_serialnr_show(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct cca_info ci;
+       struct ap_card *ac = to_ap_card(dev);
+       struct zcrypt_card *zc = ac->private;
+
+       memset(&ci, 0, sizeof(ci));
+
+       if (ap_domain_index >= 0)
+               cca_get_info(ac->id, ap_domain_index, &ci, zc->online);
+
+       return scnprintf(buf, PAGE_SIZE, "%s\n", ci.serial);
+}
+
+static struct device_attribute dev_attr_cca_serialnr =
+       __ATTR(serialnr, 0444, cca_serialnr_show, NULL);
+
+static struct attribute *cca_card_attrs[] = {
+       &dev_attr_cca_serialnr.attr,
+       NULL,
+};
+
+static const struct attribute_group cca_card_attr_grp = {
+       .attrs = cca_card_attrs,
+};
+
+ /*
+  * CCA queue additional device attributes
+  */
+static ssize_t cca_mkvps_show(struct device *dev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       int n = 0;
+       struct cca_info ci;
+       struct zcrypt_queue *zq = to_ap_queue(dev)->private;
+       static const char * const cao_state[] = { "invalid", "valid" };
+       static const char * const new_state[] = { "empty", "partial", "full" };
+
+       memset(&ci, 0, sizeof(ci));
+
+       cca_get_info(AP_QID_CARD(zq->queue->qid),
+                    AP_QID_QUEUE(zq->queue->qid),
+                    &ci, zq->online);
+
+       if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3')
+               n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n",
+                             new_state[ci.new_mk_state - '1'], ci.new_mkvp);
+       else
+               n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n");
+
+       if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2')
+               n += scnprintf(buf + n, PAGE_SIZE - n,
+                              "AES CUR: %s 0x%016llx\n",
+                              cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp);
+       else
+               n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n");
+
+       if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2')
+               n += scnprintf(buf + n, PAGE_SIZE - n,
+                              "AES OLD: %s 0x%016llx\n",
+                              cao_state[ci.old_mk_state - '1'], ci.old_mkvp);
+       else
+               n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n");
+
+       return n;
+}
+
+static struct device_attribute dev_attr_cca_mkvps =
+       __ATTR(mkvps, 0444, cca_mkvps_show, NULL);
+
+static struct attribute *cca_queue_attrs[] = {
+       &dev_attr_cca_mkvps.attr,
+       NULL,
+};
+
+static const struct attribute_group cca_queue_attr_grp = {
+       .attrs = cca_queue_attrs,
+};
+
 /**
  * Large random number detection function. Its sends a message to a CEX2C/CEX3C
  * card to find out if large random numbers are supported.
@@ -87,24 +173,23 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq)
        int rc, i;
 
        ap_init_message(&ap_msg);
-       ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
-       if (!ap_msg.message)
+       ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL);
+       if (!ap_msg.msg)
                return -ENOMEM;
 
        rng_type6CPRB_msgX(&ap_msg, 4, &domain);
 
-       msg = ap_msg.message;
+       msg = ap_msg.msg;
        msg->cprbx.domain = AP_QID_QUEUE(aq->qid);
 
-       rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.message,
-                    ap_msg.length);
+       rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.msg, ap_msg.len);
        if (rc)
                goto out_free;
 
        /* Wait for the test message to complete. */
        for (i = 0; i < 2 * HZ; i++) {
                msleep(1000 / HZ);
-               rc = ap_recv(aq->qid, &psmid, ap_msg.message, 4096);
+               rc = ap_recv(aq->qid, &psmid, ap_msg.msg, 4096);
                if (rc == 0 && psmid == 0x0102030405060708ULL)
                        break;
        }
@@ -115,13 +200,13 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq)
                goto out_free;
        }
 
-       reply = ap_msg.message;
+       reply = ap_msg.msg;
        if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0)
                rc = 1;
        else
                rc = 0;
 out_free:
-       free_page((unsigned long) ap_msg.message);
+       free_page((unsigned long) ap_msg.msg);
        return rc;
 }
 
@@ -179,6 +264,17 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
        if (rc) {
                ac->private = NULL;
                zcrypt_card_free(zc);
+               return rc;
+       }
+
+       if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) {
+               rc = sysfs_create_group(&ap_dev->device.kobj,
+                                       &cca_card_attr_grp);
+               if (rc) {
+                       zcrypt_card_unregister(zc);
+                       ac->private = NULL;
+                       zcrypt_card_free(zc);
+               }
        }
 
        return rc;
@@ -190,8 +286,11 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
  */
 static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev)
 {
+       struct ap_card *ac = to_ap_card(&ap_dev->device);
        struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private;
 
+       if (ap_test_bit(&ac->functions, AP_FUNC_COPRO))
+               sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp);
        if (zc)
                zcrypt_card_unregister(zc);
 }
@@ -240,7 +339,19 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
        if (rc) {
                aq->private = NULL;
                zcrypt_queue_free(zq);
+               return rc;
+       }
+
+       if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) {
+               rc = sysfs_create_group(&ap_dev->device.kobj,
+                                       &cca_queue_attr_grp);
+               if (rc) {
+                       zcrypt_queue_unregister(zq);
+                       aq->private = NULL;
+                       zcrypt_queue_free(zq);
+               }
        }
+
        return rc;
 }
 
@@ -253,6 +364,8 @@ static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev)
        struct ap_queue *aq = to_ap_queue(&ap_dev->device);
        struct zcrypt_queue *zq = aq->private;
 
+       if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO))
+               sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp);
        if (zq)
                zcrypt_queue_unregister(zq);
 }
index cdaa834..dc20d98 100644 (file)
@@ -250,7 +250,7 @@ static ssize_t ep11_card_op_modes_show(struct device *dev,
        ep11_get_card_info(ac->id, &ci, zc->online);
 
        for (i = 0; ep11_op_modes[i].mode_txt; i++) {
-               if (ci.op_mode & (1 << ep11_op_modes[i].mode_bit)) {
+               if (ci.op_mode & (1ULL << ep11_op_modes[i].mode_bit)) {
                        if (n > 0)
                                buf[n++] = ' ';
                        n += scnprintf(buf + n, PAGE_SIZE - n,
@@ -345,7 +345,7 @@ static ssize_t ep11_queue_op_modes_show(struct device *dev,
                                     &di);
 
        for (i = 0; ep11_op_modes[i].mode_txt; i++) {
-               if (di.op_mode & (1 << ep11_op_modes[i].mode_bit)) {
+               if (di.op_mode & (1ULL << ep11_op_modes[i].mode_bit)) {
                        if (n > 0)
                                buf[n++] = ' ';
                        n += scnprintf(buf + n, PAGE_SIZE - n,
@@ -529,22 +529,27 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
        if (rc) {
                ac->private = NULL;
                zcrypt_card_free(zc);
-               goto out;
+               return rc;
        }
 
        if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) {
                rc = sysfs_create_group(&ap_dev->device.kobj,
                                        &cca_card_attr_grp);
-               if (rc)
+               if (rc) {
                        zcrypt_card_unregister(zc);
+                       ac->private = NULL;
+                       zcrypt_card_free(zc);
+               }
        } else if (ap_test_bit(&ac->functions, AP_FUNC_EP11)) {
                rc = sysfs_create_group(&ap_dev->device.kobj,
                                        &ep11_card_attr_grp);
-               if (rc)
+               if (rc) {
                        zcrypt_card_unregister(zc);
+                       ac->private = NULL;
+                       zcrypt_card_free(zc);
+               }
        }
 
-out:
        return rc;
 }
 
@@ -617,22 +622,27 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
        if (rc) {
                aq->private = NULL;
                zcrypt_queue_free(zq);
-               goto out;
+               return rc;
        }
 
        if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) {
                rc = sysfs_create_group(&ap_dev->device.kobj,
                                        &cca_queue_attr_grp);
-               if (rc)
+               if (rc) {
                        zcrypt_queue_unregister(zq);
+                       aq->private = NULL;
+                       zcrypt_queue_free(zq);
+               }
        } else if (ap_test_bit(&aq->card->functions, AP_FUNC_EP11)) {
                rc = sysfs_create_group(&ap_dev->device.kobj,
                                        &ep11_queue_attr_grp);
-               if (rc)
+               if (rc) {
                        zcrypt_queue_unregister(zq);
+                       aq->private = NULL;
+                       zcrypt_queue_free(zq);
+               }
        }
 
-out:
        return rc;
 }
 
index 4f4dd9d..54a04f8 100644 (file)
@@ -80,7 +80,7 @@ struct error_hdr {
 static inline int convert_error(struct zcrypt_queue *zq,
                                struct ap_message *reply)
 {
-       struct error_hdr *ehdr = reply->message;
+       struct error_hdr *ehdr = reply->msg;
        int card = AP_QID_CARD(zq->queue->qid);
        int queue = AP_QID_QUEUE(zq->queue->qid);
 
@@ -127,7 +127,7 @@ static inline int convert_error(struct zcrypt_queue *zq,
                        struct {
                                struct type86_hdr hdr;
                                struct type86_fmt2_ext fmt2;
-                       } __packed * head = reply->message;
+                       } __packed * head = reply->msg;
                        unsigned int apfs = *((u32 *)head->fmt2.apfs);
 
                        ZCRYPT_DBF(DBF_ERR,
index fc4295b..7aedc33 100644 (file)
@@ -207,10 +207,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
        mod_len = mex->inputdatalength;
 
        if (mod_len <= 128) {
-               struct type50_meb1_msg *meb1 = ap_msg->message;
+               struct type50_meb1_msg *meb1 = ap_msg->msg;
 
                memset(meb1, 0, sizeof(*meb1));
-               ap_msg->length = sizeof(*meb1);
+               ap_msg->len = sizeof(*meb1);
                meb1->header.msg_type_code = TYPE50_TYPE_CODE;
                meb1->header.msg_len = sizeof(*meb1);
                meb1->keyblock_type = TYPE50_MEB1_FMT;
@@ -218,10 +218,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
                exp = meb1->exponent + sizeof(meb1->exponent) - mod_len;
                inp = meb1->message + sizeof(meb1->message) - mod_len;
        } else if (mod_len <= 256) {
-               struct type50_meb2_msg *meb2 = ap_msg->message;
+               struct type50_meb2_msg *meb2 = ap_msg->msg;
 
                memset(meb2, 0, sizeof(*meb2));
-               ap_msg->length = sizeof(*meb2);
+               ap_msg->len = sizeof(*meb2);
                meb2->header.msg_type_code = TYPE50_TYPE_CODE;
                meb2->header.msg_len = sizeof(*meb2);
                meb2->keyblock_type = TYPE50_MEB2_FMT;
@@ -229,10 +229,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
                exp = meb2->exponent + sizeof(meb2->exponent) - mod_len;
                inp = meb2->message + sizeof(meb2->message) - mod_len;
        } else if (mod_len <= 512) {
-               struct type50_meb3_msg *meb3 = ap_msg->message;
+               struct type50_meb3_msg *meb3 = ap_msg->msg;
 
                memset(meb3, 0, sizeof(*meb3));
-               ap_msg->length = sizeof(*meb3);
+               ap_msg->len = sizeof(*meb3);
                meb3->header.msg_type_code = TYPE50_TYPE_CODE;
                meb3->header.msg_len = sizeof(*meb3);
                meb3->keyblock_type = TYPE50_MEB3_FMT;
@@ -275,10 +275,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
         * 512 byte modulus (4k keys).
         */
        if (mod_len <= 128) {           /* up to 1024 bit key size */
-               struct type50_crb1_msg *crb1 = ap_msg->message;
+               struct type50_crb1_msg *crb1 = ap_msg->msg;
 
                memset(crb1, 0, sizeof(*crb1));
-               ap_msg->length = sizeof(*crb1);
+               ap_msg->len = sizeof(*crb1);
                crb1->header.msg_type_code = TYPE50_TYPE_CODE;
                crb1->header.msg_len = sizeof(*crb1);
                crb1->keyblock_type = TYPE50_CRB1_FMT;
@@ -289,10 +289,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
                u = crb1->u + sizeof(crb1->u) - short_len;
                inp = crb1->message + sizeof(crb1->message) - mod_len;
        } else if (mod_len <= 256) {    /* up to 2048 bit key size */
-               struct type50_crb2_msg *crb2 = ap_msg->message;
+               struct type50_crb2_msg *crb2 = ap_msg->msg;
 
                memset(crb2, 0, sizeof(*crb2));
-               ap_msg->length = sizeof(*crb2);
+               ap_msg->len = sizeof(*crb2);
                crb2->header.msg_type_code = TYPE50_TYPE_CODE;
                crb2->header.msg_len = sizeof(*crb2);
                crb2->keyblock_type = TYPE50_CRB2_FMT;
@@ -304,10 +304,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
                inp = crb2->message + sizeof(crb2->message) - mod_len;
        } else if ((mod_len <= 512) &&  /* up to 4096 bit key size */
                   (zq->zcard->max_mod_size == CEX3A_MAX_MOD_SIZE)) {
-               struct type50_crb3_msg *crb3 = ap_msg->message;
+               struct type50_crb3_msg *crb3 = ap_msg->msg;
 
                memset(crb3, 0, sizeof(*crb3));
-               ap_msg->length = sizeof(*crb3);
+               ap_msg->len = sizeof(*crb3);
                crb3->header.msg_type_code = TYPE50_TYPE_CODE;
                crb3->header.msg_len = sizeof(*crb3);
                crb3->keyblock_type = TYPE50_CRB3_FMT;
@@ -350,7 +350,7 @@ static int convert_type80(struct zcrypt_queue *zq,
                          char __user *outputdata,
                          unsigned int outputdatalength)
 {
-       struct type80_hdr *t80h = reply->message;
+       struct type80_hdr *t80h = reply->msg;
        unsigned char *data;
 
        if (t80h->len < sizeof(*t80h) + outputdatalength) {
@@ -370,7 +370,7 @@ static int convert_type80(struct zcrypt_queue *zq,
                BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
        else
                BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE);
-       data = reply->message + t80h->len - outputdatalength;
+       data = reply->msg + t80h->len - outputdatalength;
        if (copy_to_user(outputdata, data, outputdatalength))
                return -EFAULT;
        return 0;
@@ -382,7 +382,7 @@ static int convert_response(struct zcrypt_queue *zq,
                            unsigned int outputdatalength)
 {
        /* Response type byte is the second byte in the response. */
-       unsigned char rtype = ((unsigned char *) reply->message)[1];
+       unsigned char rtype = ((unsigned char *) reply->msg)[1];
 
        switch (rtype) {
        case TYPE82_RSP_CODE:
@@ -422,22 +422,20 @@ static void zcrypt_cex2a_receive(struct ap_queue *aq,
                .reply_code = REP82_ERROR_MACHINE_FAILURE,
        };
        struct type80_hdr *t80h;
-       int length;
+       int len;
 
        /* Copy the reply message to the request message buffer. */
        if (!reply)
                goto out;       /* ap_msg->rc indicates the error */
-       t80h = reply->message;
+       t80h = reply->msg;
        if (t80h->type == TYPE80_RSP_CODE) {
                if (aq->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A)
-                       length = min_t(int,
-                                      CEX2A_MAX_RESPONSE_SIZE, t80h->len);
+                       len = min_t(int, CEX2A_MAX_RESPONSE_SIZE, t80h->len);
                else
-                       length = min_t(int,
-                                      CEX3A_MAX_RESPONSE_SIZE, t80h->len);
-               memcpy(msg->message, reply->message, length);
+                       len = min_t(int, CEX3A_MAX_RESPONSE_SIZE, t80h->len);
+               memcpy(msg->msg, reply->msg, len);
        } else
-               memcpy(msg->message, reply->message, sizeof(error_reply));
+               memcpy(msg->msg, reply->msg, sizeof(error_reply));
 out:
        complete((struct completion *) msg->private);
 }
@@ -460,12 +458,10 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
 
        ap_init_message(&ap_msg);
        if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
-               ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE,
-                                        GFP_KERNEL);
+               ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL);
        else
-               ap_msg.message = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE,
-                                        GFP_KERNEL);
-       if (!ap_msg.message)
+               ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL);
+       if (!ap_msg.msg)
                return -ENOMEM;
        ap_msg.receive = zcrypt_cex2a_receive;
        ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
@@ -486,7 +482,7 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
                /* Signal pending. */
                ap_cancel_message(zq->queue, &ap_msg);
 out_free:
-       kfree(ap_msg.message);
+       kfree(ap_msg.msg);
        return rc;
 }
 
@@ -506,12 +502,10 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
 
        ap_init_message(&ap_msg);
        if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
-               ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE,
-                                        GFP_KERNEL);
+               ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL);
        else
-               ap_msg.message = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE,
-                                        GFP_KERNEL);
-       if (!ap_msg.message)
+               ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL);
+       if (!ap_msg.msg)
                return -ENOMEM;
        ap_msg.receive = zcrypt_cex2a_receive;
        ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
@@ -532,7 +526,7 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
                /* Signal pending. */
                ap_cancel_message(zq->queue, &ap_msg);
 out_free:
-       kfree(ap_msg.message);
+       kfree(ap_msg.msg);
        return rc;
 }
 
index fd1cbb2..d77991c 100644 (file)
@@ -266,7 +266,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
                struct function_and_rules_block fr;
                unsigned short length;
                char text[0];
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
        int size;
 
        /*
@@ -301,7 +301,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
 
        msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx);
 
-       ap_msg->length = size;
+       ap_msg->len = size;
        return 0;
 }
 
@@ -336,7 +336,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
                struct function_and_rules_block fr;
                unsigned short length;
                char text[0];
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
        int size;
 
        /*
@@ -370,7 +370,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
 
        msg->fr = static_pkd_fnr;
 
-       ap_msg->length = size;
+       ap_msg->len = size;
        return 0;
 }
 
@@ -400,11 +400,11 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
        struct {
                struct type6_hdr hdr;
                struct CPRBX cprbx;
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
 
        int rcblen = CEIL4(xcRB->request_control_blk_length);
        int replylen, req_sumlen, resp_sumlen;
-       char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen;
+       char *req_data = ap_msg->msg + sizeof(struct type6_hdr) + rcblen;
        char *function_code;
 
        if (CEIL4(xcRB->request_control_blk_length) <
@@ -412,10 +412,10 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
                return -EINVAL; /* overflow after alignment*/
 
        /* length checks */
-       ap_msg->length = sizeof(struct type6_hdr) +
+       ap_msg->len = sizeof(struct type6_hdr) +
                CEIL4(xcRB->request_control_blk_length) +
                xcRB->request_data_length;
-       if (ap_msg->length > MSGTYPE06_MAX_MSG_SIZE)
+       if (ap_msg->len > MSGTYPE06_MAX_MSG_SIZE)
                return -EINVAL;
 
        /*
@@ -480,9 +480,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg,
 
        if (memcmp(function_code, "US", 2) == 0
            || memcmp(function_code, "AU", 2) == 0)
-               ap_msg->special = 1;
-       else
-               ap_msg->special = 0;
+               ap_msg->flags |= AP_MSG_FLAG_SPECIAL;
 
        /* copy data block */
        if (xcRB->request_data_length &&
@@ -512,7 +510,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
                struct ep11_cprb cprbx;
                unsigned char   pld_tag;        /* fixed value 0x30 */
                unsigned char   pld_lenfmt;     /* payload length format */
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
 
        struct pld_hdr {
                unsigned char   func_tag;       /* fixed value 0x4 */
@@ -527,7 +525,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
                return -EINVAL; /* overflow after alignment*/
 
        /* length checks */
-       ap_msg->length = sizeof(struct type6_hdr) + xcRB->req_len;
+       ap_msg->len = sizeof(struct type6_hdr) + xcRB->req_len;
        if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE -
                                   (sizeof(struct type6_hdr)))
                return -EINVAL;
@@ -569,7 +567,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
 
        /* enable special processing based on the cprbs flags special bit */
        if (msg->cprbx.flags & 0x20)
-               ap_msg->special = 1;
+               ap_msg->flags |= AP_MSG_FLAG_SPECIAL;
 
        return 0;
 }
@@ -639,7 +637,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
                0x35, 0x9D, 0xD3, 0xD3, 0xA7, 0x9D, 0x5D, 0x41,
                0x6F, 0x65, 0x1B, 0xCF, 0xA9, 0x87, 0x91, 0x09
        };
-       struct type86x_reply *msg = reply->message;
+       struct type86x_reply *msg = reply->msg;
        unsigned short service_rc, service_rs;
        unsigned int reply_len, pad_len;
        char *data;
@@ -713,8 +711,8 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq,
                               struct ap_message *reply,
                               struct ica_xcRB *xcRB)
 {
-       struct type86_fmt2_msg *msg = reply->message;
-       char *data = reply->message;
+       struct type86_fmt2_msg *msg = reply->msg;
+       char *data = reply->msg;
 
        /* Copy CPRB to user */
        if (copy_to_user(xcRB->reply_control_blk_addr,
@@ -744,8 +742,8 @@ static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq,
                                    struct ap_message *reply,
                                    struct ep11_urb *xcRB)
 {
-       struct type86_fmt2_msg *msg = reply->message;
-       char *data = reply->message;
+       struct type86_fmt2_msg *msg = reply->msg;
+       char *data = reply->msg;
 
        if (xcRB->resp_len < msg->fmt2.count1)
                return -EINVAL;
@@ -766,8 +764,8 @@ static int convert_type86_rng(struct zcrypt_queue *zq,
                struct type86_hdr hdr;
                struct type86_fmt2_ext fmt2;
                struct CPRBX cprbx;
-       } __packed * msg = reply->message;
-       char *data = reply->message;
+       } __packed * msg = reply->msg;
+       char *data = reply->msg;
 
        if (msg->cprbx.ccp_rtcode != 0 || msg->cprbx.ccp_rscode != 0)
                return -EINVAL;
@@ -780,7 +778,7 @@ static int convert_response_ica(struct zcrypt_queue *zq,
                            char __user *outputdata,
                            unsigned int outputdatalength)
 {
-       struct type86x_reply *msg = reply->message;
+       struct type86x_reply *msg = reply->msg;
 
        switch (msg->hdr.type) {
        case TYPE82_RSP_CODE:
@@ -820,7 +818,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq,
                            struct ap_message *reply,
                            struct ica_xcRB *xcRB)
 {
-       struct type86x_reply *msg = reply->message;
+       struct type86x_reply *msg = reply->msg;
 
        switch (msg->hdr.type) {
        case TYPE82_RSP_CODE:
@@ -853,7 +851,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq,
 static int convert_response_ep11_xcrb(struct zcrypt_queue *zq,
        struct ap_message *reply, struct ep11_urb *xcRB)
 {
-       struct type86_ep11_reply *msg = reply->message;
+       struct type86_ep11_reply *msg = reply->msg;
 
        switch (msg->hdr.type) {
        case TYPE82_RSP_CODE:
@@ -883,7 +881,7 @@ static int convert_response_rng(struct zcrypt_queue *zq,
                                 struct ap_message *reply,
                                 char *data)
 {
-       struct type86x_reply *msg = reply->message;
+       struct type86x_reply *msg = reply->msg;
 
        switch (msg->hdr.type) {
        case TYPE82_RSP_CODE:
@@ -928,32 +926,30 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq,
        struct response_type *resp_type =
                (struct response_type *) msg->private;
        struct type86x_reply *t86r;
-       int length;
+       int len;
 
        /* Copy the reply message to the request message buffer. */
        if (!reply)
                goto out;       /* ap_msg->rc indicates the error */
-       t86r = reply->message;
+       t86r = reply->msg;
        if (t86r->hdr.type == TYPE86_RSP_CODE &&
                 t86r->cprbx.cprb_ver_id == 0x02) {
                switch (resp_type->type) {
                case CEXXC_RESPONSE_TYPE_ICA:
-                       length = sizeof(struct type86x_reply)
-                               + t86r->length - 2;
-                       length = min(CEXXC_MAX_ICA_RESPONSE_SIZE, length);
-                       memcpy(msg->message, reply->message, length);
+                       len = sizeof(struct type86x_reply) + t86r->length - 2;
+                       len = min_t(int, CEXXC_MAX_ICA_RESPONSE_SIZE, len);
+                       memcpy(msg->msg, reply->msg, len);
                        break;
                case CEXXC_RESPONSE_TYPE_XCRB:
-                       length = t86r->fmt2.offset2 + t86r->fmt2.count2;
-                       length = min(MSGTYPE06_MAX_MSG_SIZE, length);
-                       memcpy(msg->message, reply->message, length);
+                       len = t86r->fmt2.offset2 + t86r->fmt2.count2;
+                       len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len);
+                       memcpy(msg->msg, reply->msg, len);
                        break;
                default:
-                       memcpy(msg->message, &error_reply,
-                              sizeof(error_reply));
+                       memcpy(msg->msg, &error_reply, sizeof(error_reply));
                }
        } else
-               memcpy(msg->message, reply->message, sizeof(error_reply));
+               memcpy(msg->msg, reply->msg, sizeof(error_reply));
 out:
        complete(&(resp_type->work));
 }
@@ -977,25 +973,25 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq,
        struct response_type *resp_type =
                (struct response_type *)msg->private;
        struct type86_ep11_reply *t86r;
-       int length;
+       int len;
 
        /* Copy the reply message to the request message buffer. */
        if (!reply)
                goto out;       /* ap_msg->rc indicates the error */
-       t86r = reply->message;
+       t86r = reply->msg;
        if (t86r->hdr.type == TYPE86_RSP_CODE &&
            t86r->cprbx.cprb_ver_id == 0x04) {
                switch (resp_type->type) {
                case CEXXC_RESPONSE_TYPE_EP11:
-                       length = t86r->fmt2.offset1 + t86r->fmt2.count1;
-                       length = min(MSGTYPE06_MAX_MSG_SIZE, length);
-                       memcpy(msg->message, reply->message, length);
+                       len = t86r->fmt2.offset1 + t86r->fmt2.count1;
+                       len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len);
+                       memcpy(msg->msg, reply->msg, len);
                        break;
                default:
-                       memcpy(msg->message, &error_reply, sizeof(error_reply));
+                       memcpy(msg->msg, &error_reply, sizeof(error_reply));
                }
        } else {
-               memcpy(msg->message, reply->message, sizeof(error_reply));
+               memcpy(msg->msg, reply->msg, sizeof(error_reply));
        }
 out:
        complete(&(resp_type->work));
@@ -1020,8 +1016,8 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
        int rc;
 
        ap_init_message(&ap_msg);
-       ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
-       if (!ap_msg.message)
+       ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL);
+       if (!ap_msg.msg)
                return -ENOMEM;
        ap_msg.receive = zcrypt_msgtype6_receive;
        ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
@@ -1043,7 +1039,7 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq,
                /* Signal pending. */
                ap_cancel_message(zq->queue, &ap_msg);
 out_free:
-       free_page((unsigned long) ap_msg.message);
+       free_page((unsigned long) ap_msg.msg);
        return rc;
 }
 
@@ -1064,8 +1060,8 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
        int rc;
 
        ap_init_message(&ap_msg);
-       ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
-       if (!ap_msg.message)
+       ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL);
+       if (!ap_msg.msg)
                return -ENOMEM;
        ap_msg.receive = zcrypt_msgtype6_receive;
        ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
@@ -1088,7 +1084,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq,
                ap_cancel_message(zq->queue, &ap_msg);
        }
 out_free:
-       free_page((unsigned long) ap_msg.message);
+       free_page((unsigned long) ap_msg.msg);
        return rc;
 }
 
@@ -1107,8 +1103,8 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
                .type = CEXXC_RESPONSE_TYPE_XCRB,
        };
 
-       ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
-       if (!ap_msg->message)
+       ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
+       if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_msgtype6_receive;
        ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
@@ -1162,8 +1158,8 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
                .type = CEXXC_RESPONSE_TYPE_EP11,
        };
 
-       ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
-       if (!ap_msg->message)
+       ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
+       if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_msgtype6_receive_ep11;
        ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
@@ -1193,7 +1189,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq,
                struct ep11_cprb cprbx;
                unsigned char   pld_tag;        /* fixed value 0x30 */
                unsigned char   pld_lenfmt;     /* payload length format */
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
        struct pld_hdr {
                unsigned char   func_tag;       /* fixed value 0x4 */
                unsigned char   func_len;       /* fixed value 0x4 */
@@ -1256,8 +1252,8 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code,
                .type = CEXXC_RESPONSE_TYPE_XCRB,
        };
 
-       ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
-       if (!ap_msg->message)
+       ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
+       if (!ap_msg->msg)
                return -ENOMEM;
        ap_msg->receive = zcrypt_msgtype6_receive;
        ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
@@ -1290,7 +1286,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq,
                char rule[8];
                short int verb_length;
                short int key_length;
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
        struct response_type *rtype = (struct response_type *)(ap_msg->private);
        int rc;
 
index 41a0df5..0de280a 100644 (file)
@@ -127,7 +127,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
                char rule[8];
                short int verb_length;
                short int key_length;
-       } __packed * msg = ap_msg->message;
+       } __packed * msg = ap_msg->msg;
        static struct type6_hdr static_type6_hdrX = {
                .type           = 0x06,
                .offset1        = 0x00000058,
@@ -154,7 +154,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg,
        memcpy(msg->rule, "RANDOM  ", 8);
        msg->verb_length = 0x02;
        msg->key_length = 0x02;
-       ap_msg->length = sizeof(*msg);
+       ap_msg->len = sizeof(*msg);
        *domain = (unsigned short)msg->cprbx.domain;
 }
 
index b7d9fa5..8bae6ad 100644 (file)
@@ -107,10 +107,10 @@ struct zcrypt_queue *zcrypt_queue_alloc(size_t max_response_size)
        zq = kzalloc(sizeof(struct zcrypt_queue), GFP_KERNEL);
        if (!zq)
                return NULL;
-       zq->reply.message = kmalloc(max_response_size, GFP_KERNEL);
-       if (!zq->reply.message)
+       zq->reply.msg = kmalloc(max_response_size, GFP_KERNEL);
+       if (!zq->reply.msg)
                goto out_free;
-       zq->reply.length = max_response_size;
+       zq->reply.len = max_response_size;
        INIT_LIST_HEAD(&zq->list);
        kref_init(&zq->refcount);
        return zq;
@@ -123,7 +123,7 @@ EXPORT_SYMBOL(zcrypt_queue_alloc);
 
 void zcrypt_queue_free(struct zcrypt_queue *zq)
 {
-       kfree(zq->reply.message);
+       kfree(zq->reply.msg);
        kfree(zq);
 }
 EXPORT_SYMBOL(zcrypt_queue_free);
index 06056e9..c866a4f 100644 (file)
@@ -1594,31 +1594,23 @@ static blk_status_t scsi_mq_prep_fn(struct request *req)
 
 static void scsi_mq_done(struct scsi_cmnd *cmd)
 {
+       if (unlikely(blk_should_fake_timeout(cmd->request->q)))
+               return;
        if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
                return;
        trace_scsi_dispatch_cmd_done(cmd);
-
-       /*
-        * If the block layer didn't complete the request due to a timeout
-        * injection, scsi must clear its internal completed state so that the
-        * timeout handler will see it needs to escalate its own error
-        * recovery.
-        */
-       if (unlikely(!blk_mq_complete_request(cmd->request)))
-               clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
+       blk_mq_complete_request(cmd->request);
 }
 
-static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
+static void scsi_mq_put_budget(struct request_queue *q)
 {
-       struct request_queue *q = hctx->queue;
        struct scsi_device *sdev = q->queuedata;
 
        atomic_dec(&sdev->device_busy);
 }
 
-static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
+static bool scsi_mq_get_budget(struct request_queue *q)
 {
-       struct request_queue *q = hctx->queue;
        struct scsi_device *sdev = q->queuedata;
 
        return scsi_dev_queue_ready(q, sdev);
@@ -1685,7 +1677,7 @@ out_dec_target_busy:
        if (scsi_target(sdev)->can_queue > 0)
                atomic_dec(&scsi_target(sdev)->target_busy);
 out_put_budget:
-       scsi_mq_put_budget(hctx);
+       scsi_mq_put_budget(q);
        switch (ret) {
        case BLK_STS_OK:
                break;
index 7c95afa..a8e39b2 100644 (file)
@@ -403,7 +403,6 @@ static const struct sysrq_key_op sysrq_moom_op = {
        .enable_mask    = SYSRQ_ENABLE_SIGNAL,
 };
 
-#ifdef CONFIG_BLOCK
 static void sysrq_handle_thaw(int key)
 {
        emergency_thaw_all();
@@ -414,7 +413,6 @@ static const struct sysrq_key_op sysrq_thaw_op = {
        .action_msg     = "Emergency Thaw of all frozen filesystems",
        .enable_mask    = SYSRQ_ENABLE_SIGNAL,
 };
-#endif
 
 static void sysrq_handle_kill(int key)
 {
index 6fb4d7e..b22adf0 100644 (file)
@@ -1215,7 +1215,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                        continue;
                }
 
-               switch (v_req.type) {
+               switch (vhost32_to_cpu(vq, v_req.type)) {
                case VIRTIO_SCSI_T_TMF:
                        vc.req = &v_req.tmf;
                        vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
index d7b8df3..74d135e 100644 (file)
@@ -2092,11 +2092,6 @@ static int get_indirect(struct vhost_virtqueue *vq,
                return ret;
        }
        iov_iter_init(&from, READ, vq->indirect, ret, len);
-
-       /* We will use the result as an address to read from, so most
-        * architectures only need a compiler barrier here. */
-       read_barrier_depends();
-
        count = len / sizeof desc;
        /* Buffers are chained via a 16 bit next field, so
         * we can have at most 2^16 of these. */
index 0f559ae..32a2698 100644 (file)
@@ -2198,17 +2198,6 @@ config FB_BROADSHEET
          and could also have been called by other names when coupled with
          a bridge adapter.
 
-config FB_PUV3_UNIGFX
-       tristate "PKUnity v3 Unigfx framebuffer support"
-       depends on FB && UNICORE32 && ARCH_PUV3
-       select FB_SYS_FILLRECT
-       select FB_SYS_COPYAREA
-       select FB_SYS_IMAGEBLIT
-       select FB_SYS_FOPS
-       help
-         Choose this option if you want to use the Unigfx device as a
-         framebuffer device. Without the support of PCI & AGP.
-
 config FB_HYPERV
        tristate "Microsoft Hyper-V Synthetic Video support"
        depends on FB && HYPERV
index aa63527..a0705b9 100644 (file)
@@ -116,7 +116,6 @@ obj-y                             += omap2/
 obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
 obj-$(CONFIG_FB_CARMINE)          += carminefb.o
 obj-$(CONFIG_FB_MB862XX)         += mb862xx/
-obj-$(CONFIG_FB_PUV3_UNIGFX)      += fb-puv3.o
 obj-$(CONFIG_FB_HYPERV)                  += hyperv_fb.o
 obj-$(CONFIG_FB_OPENCORES)       += ocfb.o
 obj-$(CONFIG_FB_SM712)           += sm712fb.o
diff --git a/drivers/video/fbdev/fb-puv3.c b/drivers/video/fbdev/fb-puv3.c
deleted file mode 100644 (file)
index 030e85c..0000000
+++ /dev/null
@@ -1,836 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Frame Buffer Driver for PKUnity-v3 Unigfx
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *     Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *     Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/fb.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/mm.h>
-
-#include <linux/sizes.h>
-#include <mach/hardware.h>
-
-/* Platform_data reserved for unifb registers. */
-#define UNIFB_REGS_NUM         10
-/* RAM reserved for the frame buffer. */
-#define UNIFB_MEMSIZE          (SZ_4M)         /* 4 MB for 1024*768*32b */
-
-/*
- * cause UNIGFX don not have EDID
- * all the modes are organized as follow
- */
-static const struct fb_videomode unifb_modes[] = {
-       /* 0 640x480-60 VESA */
-       { "640x480@60",  60,  640, 480,  25175000,  48, 16, 34, 10,  96, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 1 640x480-75 VESA */
-       { "640x480@75",  75,  640, 480,  31500000, 120, 16, 18,  1,  64, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 2 800x600-60 VESA */
-       { "800x600@60",  60,  800, 600,  40000000,  88, 40, 26,  1, 128, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 3 800x600-75 VESA */
-       { "800x600@75",  75,  800, 600,  49500000, 160, 16, 23,  1,  80, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 4 1024x768-60 VESA */
-       { "1024x768@60", 60, 1024, 768,  65000000, 160, 24, 34,  3, 136, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 5 1024x768-75 VESA */
-       { "1024x768@75", 75, 1024, 768,  78750000, 176, 16, 30,  1,  96, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 6 1280x960-60 VESA */
-       { "1280x960@60", 60, 1280, 960, 108000000, 312, 96, 38,  1, 112, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 7 1440x900-60 VESA */
-       { "1440x900@60", 60, 1440, 900, 106500000, 232, 80, 30,  3, 152, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 8 FIXME 9 1024x600-60 VESA UNTESTED */
-       { "1024x600@60", 60, 1024, 600,  50650000, 160, 24, 26,  1, 136, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 9 FIXME 10 1024x600-75 VESA UNTESTED */
-       { "1024x600@75", 75, 1024, 600,  61500000, 176, 16, 23,  1,  96, 1,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-       /* 10 FIXME 11 1366x768-60 VESA UNTESTED */
-       { "1366x768@60", 60, 1366, 768,  85500000, 256, 58, 18,  1,  112, 3,
-         0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
-};
-
-static const struct fb_var_screeninfo unifb_default = {
-       .xres =         640,
-       .yres =         480,
-       .xres_virtual = 640,
-       .yres_virtual = 480,
-       .bits_per_pixel = 16,
-       .red =          { 11, 5, 0 },
-       .green =        { 5,  6, 0 },
-       .blue =         { 0,  5, 0 },
-       .activate =     FB_ACTIVATE_NOW,
-       .height =       -1,
-       .width =        -1,
-       .pixclock =     25175000,
-       .left_margin =  48,
-       .right_margin = 16,
-       .upper_margin = 33,
-       .lower_margin = 10,
-       .hsync_len =    96,
-       .vsync_len =    2,
-       .vmode =        FB_VMODE_NONINTERLACED,
-};
-
-static struct fb_fix_screeninfo unifb_fix = {
-       .id =           "UNIGFX FB",
-       .type =         FB_TYPE_PACKED_PIXELS,
-       .visual =       FB_VISUAL_TRUECOLOR,
-       .xpanstep =     1,
-       .ypanstep =     1,
-       .ywrapstep =    1,
-       .accel =        FB_ACCEL_NONE,
-};
-
-static void unifb_sync(struct fb_info *info)
-{
-       /* TODO: may, this can be replaced by interrupt */
-       int cnt;
-
-       for (cnt = 0; cnt < 0x10000000; cnt++) {
-               if (readl(UGE_COMMAND) & 0x1000000)
-                       return;
-       }
-
-       if (cnt > 0x8000000)
-               dev_warn(info->device, "Warning: UniGFX GE time out ...\n");
-}
-
-static void unifb_prim_fillrect(struct fb_info *info,
-                               const struct fb_fillrect *region)
-{
-       int awidth = region->width;
-       int aheight = region->height;
-       int m_iBpp = info->var.bits_per_pixel;
-       int screen_width = info->var.xres;
-       int src_sel = 1;        /* from fg_color */
-       int pat_sel = 1;
-       int src_x0 = 0;
-       int dst_x0 = region->dx;
-       int src_y0 = 0;
-       int dst_y0 = region->dy;
-       int rop_alpha_sel = 0;
-       int rop_alpha_code = 0xCC;
-       int x_dir = 1;
-       int y_dir = 1;
-       int alpha_r = 0;
-       int alpha_sel = 0;
-       int dst_pitch = screen_width * (m_iBpp / 8);
-       int dst_offset = dst_y0 * dst_pitch + dst_x0 * (m_iBpp / 8);
-       int src_pitch = screen_width * (m_iBpp / 8);
-       int src_offset = src_y0 * src_pitch + src_x0 * (m_iBpp / 8);
-       unsigned int command = 0;
-       int clip_region = 0;
-       int clip_en = 0;
-       int tp_en = 0;
-       int fg_color = 0;
-       int bottom = info->var.yres - 1;
-       int right = info->var.xres - 1;
-       int top = 0;
-
-       bottom = (bottom << 16) | right;
-       command = (rop_alpha_sel << 26) | (pat_sel << 18) | (src_sel << 16)
-               | (x_dir << 20) | (y_dir << 21) | (command << 24)
-               | (clip_region << 23) | (clip_en << 22) | (tp_en << 27);
-       src_pitch = (dst_pitch << 16) | src_pitch;
-       awidth = awidth | (aheight << 16);
-       alpha_r = ((rop_alpha_code & 0xff) << 8) | (alpha_r & 0xff)
-               | (alpha_sel << 16);
-       src_x0 = (src_x0 & 0x1fff) | ((src_y0 & 0x1fff) << 16);
-       dst_x0 = (dst_x0 & 0x1fff) | ((dst_y0 & 0x1fff) << 16);
-       fg_color = region->color;
-
-       unifb_sync(info);
-
-       writel(((u32 *)(info->pseudo_palette))[fg_color], UGE_FCOLOR);
-       writel(0, UGE_BCOLOR);
-       writel(src_pitch, UGE_PITCH);
-       writel(src_offset, UGE_SRCSTART);
-       writel(dst_offset, UGE_DSTSTART);
-       writel(awidth, UGE_WIDHEIGHT);
-       writel(top, UGE_CLIP0);
-       writel(bottom, UGE_CLIP1);
-       writel(alpha_r, UGE_ROPALPHA);
-       writel(src_x0, UGE_SRCXY);
-       writel(dst_x0, UGE_DSTXY);
-       writel(command, UGE_COMMAND);
-}
-
-static void unifb_fillrect(struct fb_info *info,
-               const struct fb_fillrect *region)
-{
-       struct fb_fillrect modded;
-       int vxres, vyres;
-
-       if (info->flags & FBINFO_HWACCEL_DISABLED) {
-               sys_fillrect(info, region);
-               return;
-       }
-
-       vxres = info->var.xres_virtual;
-       vyres = info->var.yres_virtual;
-
-       memcpy(&modded, region, sizeof(struct fb_fillrect));
-
-       if (!modded.width || !modded.height ||
-           modded.dx >= vxres || modded.dy >= vyres)
-               return;
-
-       if (modded.dx + modded.width > vxres)
-               modded.width = vxres - modded.dx;
-       if (modded.dy + modded.height > vyres)
-               modded.height = vyres - modded.dy;
-
-       unifb_prim_fillrect(info, &modded);
-}
-
-static void unifb_prim_copyarea(struct fb_info *info,
-                               const struct fb_copyarea *area)
-{
-       int awidth = area->width;
-       int aheight = area->height;
-       int m_iBpp = info->var.bits_per_pixel;
-       int screen_width = info->var.xres;
-       int src_sel = 2;        /* from mem */
-       int pat_sel = 0;
-       int src_x0 = area->sx;
-       int dst_x0 = area->dx;
-       int src_y0 = area->sy;
-       int dst_y0 = area->dy;
-
-       int rop_alpha_sel = 0;
-       int rop_alpha_code = 0xCC;
-       int x_dir = 1;
-       int y_dir = 1;
-
-       int alpha_r = 0;
-       int alpha_sel = 0;
-       int dst_pitch = screen_width * (m_iBpp / 8);
-       int dst_offset = dst_y0 * dst_pitch + dst_x0 * (m_iBpp / 8);
-       int src_pitch = screen_width * (m_iBpp / 8);
-       int src_offset = src_y0 * src_pitch + src_x0 * (m_iBpp / 8);
-       unsigned int command = 0;
-       int clip_region = 0;
-       int clip_en = 1;
-       int tp_en = 0;
-       int top = 0;
-       int bottom = info->var.yres;
-       int right = info->var.xres;
-       int fg_color = 0;
-       int bg_color = 0;
-
-       if (src_x0 < 0)
-               src_x0 = 0;
-       if (src_y0 < 0)
-               src_y0 = 0;
-
-       if (src_y0 - dst_y0 > 0) {
-               y_dir = 1;
-       } else {
-               y_dir = 0;
-               src_offset = (src_y0 + aheight) * src_pitch +
-                               src_x0 * (m_iBpp / 8);
-               dst_offset = (dst_y0 + aheight) * dst_pitch +
-                               dst_x0 * (m_iBpp / 8);
-               src_y0 += aheight;
-               dst_y0 += aheight;
-       }
-
-       command = (rop_alpha_sel << 26) | (pat_sel << 18) | (src_sel << 16) |
-               (x_dir << 20) | (y_dir << 21) | (command << 24) |
-               (clip_region << 23) | (clip_en << 22) | (tp_en << 27);
-       src_pitch = (dst_pitch << 16) | src_pitch;
-       awidth = awidth | (aheight << 16);
-       alpha_r = ((rop_alpha_code & 0xff) << 8) | (alpha_r & 0xff) |
-               (alpha_sel << 16);
-       src_x0 = (src_x0 & 0x1fff) | ((src_y0 & 0x1fff) << 16);
-       dst_x0 = (dst_x0 & 0x1fff) | ((dst_y0 & 0x1fff) << 16);
-       bottom = (bottom << 16) | right;
-
-       unifb_sync(info);
-
-       writel(src_pitch, UGE_PITCH);
-       writel(src_offset, UGE_SRCSTART);
-       writel(dst_offset, UGE_DSTSTART);
-       writel(awidth, UGE_WIDHEIGHT);
-       writel(top, UGE_CLIP0);
-       writel(bottom, UGE_CLIP1);
-       writel(bg_color, UGE_BCOLOR);
-       writel(fg_color, UGE_FCOLOR);
-       writel(alpha_r, UGE_ROPALPHA);
-       writel(src_x0, UGE_SRCXY);
-       writel(dst_x0, UGE_DSTXY);
-       writel(command, UGE_COMMAND);
-}
-
-static void unifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
-{
-       struct fb_copyarea modded;
-       u32 vxres, vyres;
-       modded.sx = area->sx;
-       modded.sy = area->sy;
-       modded.dx = area->dx;
-       modded.dy = area->dy;
-       modded.width = area->width;
-       modded.height = area->height;
-
-       if (info->flags & FBINFO_HWACCEL_DISABLED) {
-               sys_copyarea(info, area);
-               return;
-       }
-
-       vxres = info->var.xres_virtual;
-       vyres = info->var.yres_virtual;
-
-       if (!modded.width || !modded.height ||
-           modded.sx >= vxres || modded.sy >= vyres ||
-           modded.dx >= vxres || modded.dy >= vyres)
-               return;
-
-       if (modded.sx + modded.width > vxres)
-               modded.width = vxres - modded.sx;
-       if (modded.dx + modded.width > vxres)
-               modded.width = vxres - modded.dx;
-       if (modded.sy + modded.height > vyres)
-               modded.height = vyres - modded.sy;
-       if (modded.dy + modded.height > vyres)
-               modded.height = vyres - modded.dy;
-
-       unifb_prim_copyarea(info, &modded);
-}
-
-static void unifb_imageblit(struct fb_info *info, const struct fb_image *image)
-{
-       sys_imageblit(info, image);
-}
-
-static u_long get_line_length(int xres_virtual, int bpp)
-{
-       u_long length;
-
-       length = xres_virtual * bpp;
-       length = (length + 31) & ~31;
-       length >>= 3;
-       return length;
-}
-
-/*
- *  Setting the video mode has been split into two parts.
- *  First part, xxxfb_check_var, must not write anything
- *  to hardware, it should only verify and adjust var.
- *  This means it doesn't alter par but it does use hardware
- *  data from it to check this var.
- */
-static int unifb_check_var(struct fb_var_screeninfo *var,
-                        struct fb_info *info)
-{
-       u_long line_length;
-
-       /*
-        *  FB_VMODE_CONUPDATE and FB_VMODE_SMOOTH_XPAN are equal!
-        *  as FB_VMODE_SMOOTH_XPAN is only used internally
-        */
-
-       if (var->vmode & FB_VMODE_CONUPDATE) {
-               var->vmode |= FB_VMODE_YWRAP;
-               var->xoffset = info->var.xoffset;
-               var->yoffset = info->var.yoffset;
-       }
-
-       /*
-        *  Some very basic checks
-        */
-       if (!var->xres)
-               var->xres = 1;
-       if (!var->yres)
-               var->yres = 1;
-       if (var->xres > var->xres_virtual)
-               var->xres_virtual = var->xres;
-       if (var->yres > var->yres_virtual)
-               var->yres_virtual = var->yres;
-       if (var->bits_per_pixel <= 1)
-               var->bits_per_pixel = 1;
-       else if (var->bits_per_pixel <= 8)
-               var->bits_per_pixel = 8;
-       else if (var->bits_per_pixel <= 16)
-               var->bits_per_pixel = 16;
-       else if (var->bits_per_pixel <= 24)
-               var->bits_per_pixel = 24;
-       else if (var->bits_per_pixel <= 32)
-               var->bits_per_pixel = 32;
-       else
-               return -EINVAL;
-
-       if (var->xres_virtual < var->xoffset + var->xres)
-               var->xres_virtual = var->xoffset + var->xres;
-       if (var->yres_virtual < var->yoffset + var->yres)
-               var->yres_virtual = var->yoffset + var->yres;
-
-       /*
-        *  Memory limit
-        */
-       line_length =
-           get_line_length(var->xres_virtual, var->bits_per_pixel);
-       if (line_length * var->yres_virtual > UNIFB_MEMSIZE)
-               return -ENOMEM;
-
-       /*
-        * Now that we checked it we alter var. The reason being is that the
-        * video mode passed in might not work but slight changes to it might
-        * make it work. This way we let the user know what is acceptable.
-        */
-       switch (var->bits_per_pixel) {
-       case 1:
-       case 8:
-               var->red.offset = 0;
-               var->red.length = 8;
-               var->green.offset = 0;
-               var->green.length = 8;
-               var->blue.offset = 0;
-               var->blue.length = 8;
-               var->transp.offset = 0;
-               var->transp.length = 0;
-               break;
-       case 16:                /* RGBA 5551 */
-               if (var->transp.length) {
-                       var->red.offset = 0;
-                       var->red.length = 5;
-                       var->green.offset = 5;
-                       var->green.length = 5;
-                       var->blue.offset = 10;
-                       var->blue.length = 5;
-                       var->transp.offset = 15;
-                       var->transp.length = 1;
-               } else {        /* RGB 565 */
-                       var->red.offset = 11;
-                       var->red.length = 5;
-                       var->green.offset = 5;
-                       var->green.length = 6;
-                       var->blue.offset = 0;
-                       var->blue.length = 5;
-                       var->transp.offset = 0;
-                       var->transp.length = 0;
-               }
-               break;
-       case 24:                /* RGB 888 */
-               var->red.offset = 0;
-               var->red.length = 8;
-               var->green.offset = 8;
-               var->green.length = 8;
-               var->blue.offset = 16;
-               var->blue.length = 8;
-               var->transp.offset = 0;
-               var->transp.length = 0;
-               break;
-       case 32:                /* RGBA 8888 */
-               var->red.offset = 16;
-               var->red.length = 8;
-               var->green.offset = 8;
-               var->green.length = 8;
-               var->blue.offset = 0;
-               var->blue.length = 8;
-               var->transp.offset = 24;
-               var->transp.length = 8;
-               break;
-       }
-       var->red.msb_right = 0;
-       var->green.msb_right = 0;
-       var->blue.msb_right = 0;
-       var->transp.msb_right = 0;
-
-       return 0;
-}
-
-/*
- * This routine actually sets the video mode. It's in here where we
- * the hardware state info->par and fix which can be affected by the
- * change in par. For this driver it doesn't do much.
- */
-static int unifb_set_par(struct fb_info *info)
-{
-       int hTotal, vTotal, hSyncStart, hSyncEnd, vSyncStart, vSyncEnd;
-       int format;
-
-#ifdef CONFIG_PUV3_PM
-       struct clk *clk_vga;
-       u32 pixclk = 0;
-       int i;
-
-       for (i = 0; i <= 10; i++) {
-               if    (info->var.xres         == unifb_modes[i].xres
-                   && info->var.yres         == unifb_modes[i].yres
-                   && info->var.upper_margin == unifb_modes[i].upper_margin
-                   && info->var.lower_margin == unifb_modes[i].lower_margin
-                   && info->var.left_margin  == unifb_modes[i].left_margin
-                   && info->var.right_margin == unifb_modes[i].right_margin
-                   && info->var.hsync_len    == unifb_modes[i].hsync_len
-                   && info->var.vsync_len    == unifb_modes[i].vsync_len) {
-                       pixclk = unifb_modes[i].pixclock;
-                       break;
-               }
-       }
-
-       /* set clock rate */
-       clk_vga = clk_get(info->device, "VGA_CLK");
-       if (clk_vga == ERR_PTR(-ENOENT))
-               return -ENOENT;
-
-       if (pixclk != 0) {
-               if (clk_set_rate(clk_vga, pixclk)) { /* set clock failed */
-                       info->fix = unifb_fix;
-                       info->var = unifb_default;
-                       if (clk_set_rate(clk_vga, unifb_default.pixclock))
-                               return -EINVAL;
-               }
-       }
-#endif
-
-       info->fix.line_length = get_line_length(info->var.xres_virtual,
-                                               info->var.bits_per_pixel);
-
-       hSyncStart = info->var.xres + info->var.right_margin;
-       hSyncEnd = hSyncStart + info->var.hsync_len;
-       hTotal = hSyncEnd + info->var.left_margin;
-
-       vSyncStart = info->var.yres + info->var.lower_margin;
-       vSyncEnd = vSyncStart + info->var.vsync_len;
-       vTotal = vSyncEnd + info->var.upper_margin;
-
-       switch (info->var.bits_per_pixel) {
-       case 8:
-               format = UDE_CFG_DST8;
-               break;
-       case 16:
-               format = UDE_CFG_DST16;
-               break;
-       case 24:
-               format = UDE_CFG_DST24;
-               break;
-       case 32:
-               format = UDE_CFG_DST32;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       writel(info->fix.smem_start, UDE_FSA);
-       writel(info->var.yres, UDE_LS);
-       writel(get_line_length(info->var.xres,
-                       info->var.bits_per_pixel) >> 3, UDE_PS);
-                       /* >> 3 for hardware required. */
-       writel((hTotal << 16) | (info->var.xres), UDE_HAT);
-       writel(((hTotal - 1) << 16) | (info->var.xres - 1), UDE_HBT);
-       writel(((hSyncEnd - 1) << 16) | (hSyncStart - 1), UDE_HST);
-       writel((vTotal << 16) | (info->var.yres), UDE_VAT);
-       writel(((vTotal - 1) << 16) | (info->var.yres - 1), UDE_VBT);
-       writel(((vSyncEnd - 1) << 16) | (vSyncStart - 1), UDE_VST);
-       writel(UDE_CFG_GDEN_ENABLE | UDE_CFG_TIMEUP_ENABLE
-                       | format | 0xC0000001, UDE_CFG);
-
-       return 0;
-}
-
-/*
- *  Set a single color register. The values supplied are already
- *  rounded down to the hardware's capabilities (according to the
- *  entries in the var structure). Return != 0 for invalid regno.
- */
-static int unifb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
-                        u_int transp, struct fb_info *info)
-{
-       if (regno >= 256)       /* no. of hw registers */
-               return 1;
-
-       /* grayscale works only partially under directcolor */
-       if (info->var.grayscale) {
-               /* grayscale = 0.30*R + 0.59*G + 0.11*B */
-               red = green = blue =
-                   (red * 77 + green * 151 + blue * 28) >> 8;
-       }
-
-#define CNVT_TOHW(val, width) ((((val)<<(width))+0x7FFF-(val))>>16)
-       switch (info->fix.visual) {
-       case FB_VISUAL_TRUECOLOR:
-       case FB_VISUAL_PSEUDOCOLOR:
-               red = CNVT_TOHW(red, info->var.red.length);
-               green = CNVT_TOHW(green, info->var.green.length);
-               blue = CNVT_TOHW(blue, info->var.blue.length);
-               transp = CNVT_TOHW(transp, info->var.transp.length);
-               break;
-       case FB_VISUAL_DIRECTCOLOR:
-               red = CNVT_TOHW(red, 8);        /* expect 8 bit DAC */
-               green = CNVT_TOHW(green, 8);
-               blue = CNVT_TOHW(blue, 8);
-               /* hey, there is bug in transp handling... */
-               transp = CNVT_TOHW(transp, 8);
-               break;
-       }
-#undef CNVT_TOHW
-       /* Truecolor has hardware independent palette */
-       if (info->fix.visual == FB_VISUAL_TRUECOLOR) {
-               u32 v;
-
-               if (regno >= 16)
-                       return 1;
-
-               v = (red << info->var.red.offset) |
-                   (green << info->var.green.offset) |
-                   (blue << info->var.blue.offset) |
-                   (transp << info->var.transp.offset);
-               switch (info->var.bits_per_pixel) {
-               case 8:
-                       break;
-               case 16:
-               case 24:
-               case 32:
-                       ((u32 *) (info->pseudo_palette))[regno] = v;
-                       break;
-               default:
-                       return 1;
-               }
-               return 0;
-       }
-       return 0;
-}
-
-/*
- *  Pan or Wrap the Display
- *
- *  This call looks only at xoffset, yoffset and the FB_VMODE_YWRAP flag
- */
-static int unifb_pan_display(struct fb_var_screeninfo *var,
-                          struct fb_info *info)
-{
-       if (var->vmode & FB_VMODE_YWRAP) {
-               if (var->yoffset < 0
-                   || var->yoffset >= info->var.yres_virtual
-                   || var->xoffset)
-                       return -EINVAL;
-       } else {
-               if (var->xoffset + info->var.xres > info->var.xres_virtual ||
-                   var->yoffset + info->var.yres > info->var.yres_virtual)
-                       return -EINVAL;
-       }
-       info->var.xoffset = var->xoffset;
-       info->var.yoffset = var->yoffset;
-       if (var->vmode & FB_VMODE_YWRAP)
-               info->var.vmode |= FB_VMODE_YWRAP;
-       else
-               info->var.vmode &= ~FB_VMODE_YWRAP;
-       return 0;
-}
-
-int unifb_mmap(struct fb_info *info,
-                   struct vm_area_struct *vma)
-{
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-       return vm_iomap_memory(vma, info->fix.smem_start, info->fix.smem_len);
-}
-
-static const struct fb_ops unifb_ops = {
-       .fb_read        = fb_sys_read,
-       .fb_write       = fb_sys_write,
-       .fb_check_var   = unifb_check_var,
-       .fb_set_par     = unifb_set_par,
-       .fb_setcolreg   = unifb_setcolreg,
-       .fb_pan_display = unifb_pan_display,
-       .fb_fillrect    = unifb_fillrect,
-       .fb_copyarea    = unifb_copyarea,
-       .fb_imageblit   = unifb_imageblit,
-       .fb_mmap        = unifb_mmap,
-};
-
-/*
- *  Initialisation
- */
-static int unifb_probe(struct platform_device *dev)
-{
-       struct fb_info *info;
-       u32 unifb_regs[UNIFB_REGS_NUM];
-       int retval = -ENOMEM;
-       struct resource *iomem;
-       void *videomemory;
-
-       videomemory = (void *)__get_free_pages(GFP_KERNEL | __GFP_COMP,
-                               get_order(UNIFB_MEMSIZE));
-       if (!videomemory)
-               goto err;
-
-       memset(videomemory, 0, UNIFB_MEMSIZE);
-
-       unifb_fix.smem_start = virt_to_phys(videomemory);
-       unifb_fix.smem_len = UNIFB_MEMSIZE;
-
-       iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
-       unifb_fix.mmio_start = iomem->start;
-
-       info = framebuffer_alloc(sizeof(u32)*256, &dev->dev);
-       if (!info)
-               goto err;
-
-       info->screen_base = (char __iomem *)videomemory;
-       info->fbops = &unifb_ops;
-
-       retval = fb_find_mode(&info->var, info, NULL,
-                             unifb_modes, 10, &unifb_modes[0], 16);
-
-       if (!retval || (retval == 4))
-               info->var = unifb_default;
-
-       info->fix = unifb_fix;
-       info->pseudo_palette = info->par;
-       info->par = NULL;
-       info->flags = FBINFO_FLAG_DEFAULT;
-#ifdef FB_ACCEL_PUV3_UNIGFX
-       info->fix.accel = FB_ACCEL_PUV3_UNIGFX;
-#endif
-
-       retval = fb_alloc_cmap(&info->cmap, 256, 0);
-       if (retval < 0)
-               goto err1;
-
-       retval = register_framebuffer(info);
-       if (retval < 0)
-               goto err2;
-       platform_set_drvdata(dev, info);
-       platform_device_add_data(dev, unifb_regs, sizeof(u32) * UNIFB_REGS_NUM);
-
-       fb_info(info, "Virtual frame buffer device, using %dM of video memory\n",
-               UNIFB_MEMSIZE >> 20);
-       return 0;
-err2:
-       fb_dealloc_cmap(&info->cmap);
-err1:
-       framebuffer_release(info);
-err:
-       return retval;
-}
-
-static int unifb_remove(struct platform_device *dev)
-{
-       struct fb_info *info = platform_get_drvdata(dev);
-
-       if (info) {
-               unregister_framebuffer(info);
-               fb_dealloc_cmap(&info->cmap);
-               framebuffer_release(info);
-       }
-       return 0;
-}
-
-#ifdef CONFIG_PM
-static int unifb_resume(struct platform_device *dev)
-{
-       int rc = 0;
-       u32 *unifb_regs = dev->dev.platform_data;
-
-       if (dev->dev.power.power_state.event == PM_EVENT_ON)
-               return 0;
-
-       console_lock();
-
-       if (dev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
-               writel(unifb_regs[0], UDE_FSA);
-               writel(unifb_regs[1], UDE_LS);
-               writel(unifb_regs[2], UDE_PS);
-               writel(unifb_regs[3], UDE_HAT);
-               writel(unifb_regs[4], UDE_HBT);
-               writel(unifb_regs[5], UDE_HST);
-               writel(unifb_regs[6], UDE_VAT);
-               writel(unifb_regs[7], UDE_VBT);
-               writel(unifb_regs[8], UDE_VST);
-               writel(unifb_regs[9], UDE_CFG);
-       }
-       dev->dev.power.power_state = PMSG_ON;
-
-       console_unlock();
-
-       return rc;
-}
-
-static int unifb_suspend(struct platform_device *dev, pm_message_t mesg)
-{
-       u32 *unifb_regs = dev->dev.platform_data;
-
-       unifb_regs[0] = readl(UDE_FSA);
-       unifb_regs[1] = readl(UDE_LS);
-       unifb_regs[2] = readl(UDE_PS);
-       unifb_regs[3] = readl(UDE_HAT);
-       unifb_regs[4] = readl(UDE_HBT);
-       unifb_regs[5] = readl(UDE_HST);
-       unifb_regs[6] = readl(UDE_VAT);
-       unifb_regs[7] = readl(UDE_VBT);
-       unifb_regs[8] = readl(UDE_VST);
-       unifb_regs[9] = readl(UDE_CFG);
-
-       if (mesg.event == dev->dev.power.power_state.event)
-               return 0;
-
-       switch (mesg.event) {
-       case PM_EVENT_FREEZE:           /* about to take snapshot */
-       case PM_EVENT_PRETHAW:          /* before restoring snapshot */
-               goto done;
-       }
-
-       console_lock();
-
-       /* do nothing... */
-
-       console_unlock();
-
-done:
-       dev->dev.power.power_state = mesg;
-
-       return 0;
-}
-#else
-#define        unifb_resume    NULL
-#define unifb_suspend  NULL
-#endif
-
-static struct platform_driver unifb_driver = {
-       .probe   = unifb_probe,
-       .remove  = unifb_remove,
-       .resume  = unifb_resume,
-       .suspend = unifb_suspend,
-       .driver  = {
-               .name   = "PKUnity-v3-UNIGFX",
-       },
-};
-
-static int __init unifb_init(void)
-{
-#ifndef MODULE
-       if (fb_get_options("unifb", NULL))
-               return -ENODEV;
-#endif
-
-       return platform_driver_register(&unifb_driver);
-}
-
-module_init(unifb_init);
-
-static void __exit unifb_exit(void)
-{
-       platform_driver_unregister(&unifb_driver);
-}
-
-module_exit(unifb_exit);
-
-MODULE_LICENSE("GPL v2");
index 5809e5f..5c92e4a 100644 (file)
@@ -85,7 +85,7 @@ config VIRTIO_MEM
        depends on VIRTIO
        depends on MEMORY_HOTPLUG_SPARSE
        depends on MEMORY_HOTREMOVE
-       select CONTIG_ALLOC
+       depends on CONTIG_ALLOC
        help
         This driver provides access to virtio-mem paravirtualized memory
         devices, allowing to hotplug and hotunplug memory.
index 1f157d2..8be02f3 100644 (file)
@@ -578,10 +578,14 @@ static int init_vqs(struct virtio_balloon *vb)
 static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb)
 {
        if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID,
-                              &vb->config_read_bitmap))
+                              &vb->config_read_bitmap)) {
                virtio_cread(vb->vdev, struct virtio_balloon_config,
                             free_page_hint_cmd_id,
                             &vb->cmd_id_received_cache);
+               /* Legacy balloon config space is LE, unlike all other devices. */
+               if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+                       vb->cmd_id_received_cache = le32_to_cpu((__force __le32)vb->cmd_id_received_cache);
+       }
 
        return vb->cmd_id_received_cache;
 }
@@ -974,6 +978,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
                /*
                 * Let the hypervisor know that we are expecting a
                 * specific value to be written back in balloon pages.
+                *
+                * If the PAGE_POISON value was larger than a byte we would
+                * need to byte swap poison_val here to guarantee it is
+                * little-endian. However for now it is a single byte so we
+                * can pass it as-is.
                 */
                if (!want_init_on_free())
                        memset(&poison_val, PAGE_POISON, sizeof(poison_val));
index a3cc8ec..d553bb5 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/user_namespace.h>
+#include <linux/blkdev.h>
 #include "adfs.h"
 #include "dir_f.h"
 #include "dir_fplus.h"
index a85817f..a26a0f9 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include <linux/uio.h>
+#include <linux/blkdev.h>
 #include "affs.h"
 
 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
index 64cdf4d..2482032 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/cred.h>
 #include <linux/exportfs.h>
 #include <linux/seq_file.h>
+#include <linux/blkdev.h>
 
 #include "befs.h"
 #include "btree.h"
index 0ae656e..8ae833e 100644 (file)
@@ -105,16 +105,7 @@ EXPORT_SYMBOL(invalidate_bdev);
 
 static void set_init_blocksize(struct block_device *bdev)
 {
-       unsigned bsize = bdev_logical_block_size(bdev);
-       loff_t size = i_size_read(bdev->bd_inode);
-
-       while (bsize < PAGE_SIZE) {
-               if (size & bsize)
-                       break;
-               bsize <<= 1;
-       }
-       bdev->bd_block_size = bsize;
-       bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+       bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev));
 }
 
 int set_blocksize(struct block_device *bdev, int size)
@@ -128,9 +119,8 @@ int set_blocksize(struct block_device *bdev, int size)
                return -EINVAL;
 
        /* Don't change the size if it is same as current */
-       if (bdev->bd_block_size != size) {
+       if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
                sync_blockdev(bdev);
-               bdev->bd_block_size = size;
                bdev->bd_inode->i_blkbits = blksize_bits(size);
                kill_bdev(bdev);
        }
@@ -703,12 +693,12 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
        if (!ops->rw_page || bdev_get_integrity(bdev))
                return result;
 
-       result = blk_queue_enter(bdev->bd_queue, 0);
+       result = blk_queue_enter(bdev->bd_disk->queue, 0);
        if (result)
                return result;
        result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
                              REQ_OP_READ);
-       blk_queue_exit(bdev->bd_queue);
+       blk_queue_exit(bdev->bd_disk->queue);
        return result;
 }
 
@@ -739,7 +729,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 
        if (!ops->rw_page || bdev_get_integrity(bdev))
                return -EOPNOTSUPP;
-       result = blk_queue_enter(bdev->bd_queue, 0);
+       result = blk_queue_enter(bdev->bd_disk->queue, 0);
        if (result)
                return result;
 
@@ -752,7 +742,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
                clean_page_buffers(page);
                unlock_page(page);
        }
-       blk_queue_exit(bdev->bd_queue);
+       blk_queue_exit(bdev->bd_disk->queue);
        return result;
 }
 
@@ -783,7 +773,6 @@ static void init_once(void *foo)
 
        memset(bdev, 0, sizeof(*bdev));
        mutex_init(&bdev->bd_mutex);
-       INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
        INIT_LIST_HEAD(&bdev->bd_holder_disks);
 #endif
@@ -799,9 +788,6 @@ static void bdev_evict_inode(struct inode *inode)
        truncate_inode_pages_final(&inode->i_data);
        invalidate_inode_buffers(inode); /* is it needed here? */
        clear_inode(inode);
-       spin_lock(&bdev_lock);
-       list_del_init(&bdev->bd_list);
-       spin_unlock(&bdev_lock);
        /* Detach inode from wb early as bdi_put() may free bdi->wb */
        inode_detach_wb(inode);
        if (bdev->bd_bdi != &noop_backing_dev_info) {
@@ -876,8 +862,6 @@ static int bdev_set(struct inode *inode, void *data)
        return 0;
 }
 
-static LIST_HEAD(all_bdevs);
-
 struct block_device *bdget(dev_t dev)
 {
        struct block_device *bdev;
@@ -895,7 +879,6 @@ struct block_device *bdget(dev_t dev)
                bdev->bd_contains = NULL;
                bdev->bd_super = NULL;
                bdev->bd_inode = inode;
-               bdev->bd_block_size = i_blocksize(inode);
                bdev->bd_part_count = 0;
                bdev->bd_invalidated = 0;
                inode->i_mode = S_IFBLK;
@@ -903,9 +886,6 @@ struct block_device *bdget(dev_t dev)
                inode->i_bdev = bdev;
                inode->i_data.a_ops = &def_blk_aops;
                mapping_set_gfp_mask(&inode->i_data, GFP_USER);
-               spin_lock(&bdev_lock);
-               list_add(&bdev->bd_list, &all_bdevs);
-               spin_unlock(&bdev_lock);
                unlock_new_inode(inode);
        }
        return bdev;
@@ -926,13 +906,14 @@ EXPORT_SYMBOL(bdgrab);
 
 long nr_blockdev_pages(void)
 {
-       struct block_device *bdev;
+       struct inode *inode;
        long ret = 0;
-       spin_lock(&bdev_lock);
-       list_for_each_entry(bdev, &all_bdevs, bd_list) {
-               ret += bdev->bd_inode->i_mapping->nrpages;
-       }
-       spin_unlock(&bdev_lock);
+
+       spin_lock(&blockdev_superblock->s_inode_list_lock);
+       list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
+               ret += inode->i_mapping->nrpages;
+       spin_unlock(&blockdev_superblock->s_inode_list_lock);
+
        return ret;
 }
 
@@ -1034,30 +1015,28 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
 }
 
 /**
- * bd_prepare_to_claim - prepare to claim a block device
+ * bd_prepare_to_claim - claim a block device
  * @bdev: block device of interest
  * @whole: the whole device containing @bdev, may equal @bdev
  * @holder: holder trying to claim @bdev
  *
- * Prepare to claim @bdev.  This function fails if @bdev is already
- * claimed by another holder and waits if another claiming is in
- * progress.  This function doesn't actually claim.  On successful
- * return, the caller has ownership of bd_claiming and bd_holder[s].
- *
- * CONTEXT:
- * spin_lock(&bdev_lock).  Might release bdev_lock, sleep and regrab
- * it multiple times.
+ * Claim @bdev.  This function fails if @bdev is already claimed by another
+ * holder and waits if another claiming is in progress. return, the caller
+ * has ownership of bd_claiming and bd_holder[s].
  *
  * RETURNS:
  * 0 if @bdev can be claimed, -EBUSY otherwise.
  */
-static int bd_prepare_to_claim(struct block_device *bdev,
-                              struct block_device *whole, void *holder)
+int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
+               void *holder)
 {
 retry:
+       spin_lock(&bdev_lock);
        /* if someone else claimed, fail */
-       if (!bd_may_claim(bdev, whole, holder))
+       if (!bd_may_claim(bdev, whole, holder)) {
+               spin_unlock(&bdev_lock);
                return -EBUSY;
+       }
 
        /* if claiming is already in progress, wait for it to finish */
        if (whole->bd_claiming) {
@@ -1068,13 +1047,15 @@ retry:
                spin_unlock(&bdev_lock);
                schedule();
                finish_wait(wq, &wait);
-               spin_lock(&bdev_lock);
                goto retry;
        }
 
        /* yay, all mine */
+       whole->bd_claiming = holder;
+       spin_unlock(&bdev_lock);
        return 0;
 }
+EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
 
 static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
 {
@@ -1097,78 +1078,6 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
        return disk;
 }
 
-/**
- * bd_start_claiming - start claiming a block device
- * @bdev: block device of interest
- * @holder: holder trying to claim @bdev
- *
- * @bdev is about to be opened exclusively.  Check @bdev can be opened
- * exclusively and mark that an exclusive open is in progress.  Each
- * successful call to this function must be matched with a call to
- * either bd_finish_claiming() or bd_abort_claiming() (which do not
- * fail).
- *
- * This function is used to gain exclusive access to the block device
- * without actually causing other exclusive open attempts to fail. It
- * should be used when the open sequence itself requires exclusive
- * access but may subsequently fail.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Pointer to the block device containing @bdev on success, ERR_PTR()
- * value on failure.
- */
-struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
-{
-       struct gendisk *disk;
-       struct block_device *whole;
-       int partno, err;
-
-       might_sleep();
-
-       /*
-        * @bdev might not have been initialized properly yet, look up
-        * and grab the outer block device the hard way.
-        */
-       disk = bdev_get_gendisk(bdev, &partno);
-       if (!disk)
-               return ERR_PTR(-ENXIO);
-
-       /*
-        * Normally, @bdev should equal what's returned from bdget_disk()
-        * if partno is 0; however, some drivers (floppy) use multiple
-        * bdev's for the same physical device and @bdev may be one of the
-        * aliases.  Keep @bdev if partno is 0.  This means claimer
-        * tracking is broken for those devices but it has always been that
-        * way.
-        */
-       if (partno)
-               whole = bdget_disk(disk, 0);
-       else
-               whole = bdgrab(bdev);
-
-       put_disk_and_module(disk);
-       if (!whole)
-               return ERR_PTR(-ENOMEM);
-
-       /* prepare to claim, if successful, mark claiming in progress */
-       spin_lock(&bdev_lock);
-
-       err = bd_prepare_to_claim(bdev, whole, holder);
-       if (err == 0) {
-               whole->bd_claiming = holder;
-               spin_unlock(&bdev_lock);
-               return whole;
-       } else {
-               spin_unlock(&bdev_lock);
-               bdput(whole);
-               return ERR_PTR(err);
-       }
-}
-EXPORT_SYMBOL(bd_start_claiming);
-
 static void bd_clear_claiming(struct block_device *whole, void *holder)
 {
        lockdep_assert_held(&bdev_lock);
@@ -1181,14 +1090,14 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
 /**
  * bd_finish_claiming - finish claiming of a block device
  * @bdev: block device of interest
- * @whole: whole block device (returned from bd_start_claiming())
+ * @whole: whole block device
  * @holder: holder that has claimed @bdev
  *
  * Finish exclusive open of a block device. Mark the device as exlusively
  * open by the holder and wake up all waiters for exclusive open to finish.
  */
-void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
-                       void *holder)
+static void bd_finish_claiming(struct block_device *bdev,
+               struct block_device *whole, void *holder)
 {
        spin_lock(&bdev_lock);
        BUG_ON(!bd_may_claim(bdev, whole, holder));
@@ -1203,12 +1112,11 @@ void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
        bd_clear_claiming(whole, holder);
        spin_unlock(&bdev_lock);
 }
-EXPORT_SYMBOL(bd_finish_claiming);
 
 /**
  * bd_abort_claiming - abort claiming of a block device
  * @bdev: block device of interest
- * @whole: whole block device (returned from bd_start_claiming())
+ * @whole: whole block device
  * @holder: holder that has claimed @bdev
  *
  * Abort claiming of a block device when the exclusive open failed. This can be
@@ -1367,26 +1275,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
 
-/**
- * flush_disk - invalidates all buffer-cache entries on a disk
- *
- * @bdev:      struct block device to be flushed
- * @kill_dirty: flag to guide handling of dirty inodes
- *
- * Invalidates all buffer-cache entries on a disk. It should be called
- * when a disk has been changed -- either by a media change or online
- * resize.
- */
-static void flush_disk(struct block_device *bdev, bool kill_dirty)
-{
-       if (__invalidate_device(bdev, kill_dirty)) {
-               printk(KERN_WARNING "VFS: busy inodes on changed media or "
-                      "resized disk %s\n",
-                      bdev->bd_disk ? bdev->bd_disk->disk_name : "");
-       }
-       bdev->bd_invalidated = 1;
-}
-
 /**
  * check_disk_size_change - checks for disk size change and adjusts bdev size.
  * @disk: struct gendisk to check
@@ -1411,8 +1299,9 @@ static void check_disk_size_change(struct gendisk *disk,
                               disk->disk_name, bdev_size, disk_size);
                }
                i_size_write(bdev->bd_inode, disk_size);
-               if (bdev_size > disk_size)
-                       flush_disk(bdev, false);
+               if (bdev_size > disk_size && __invalidate_device(bdev, false))
+                       pr_warn("VFS: busy inodes on resized disk %s\n",
+                               disk->disk_name);
        }
        bdev->bd_invalidated = 0;
 }
@@ -1471,7 +1360,10 @@ int check_disk_change(struct block_device *bdev)
        if (!(events & DISK_EVENT_MEDIA_CHANGE))
                return 0;
 
-       flush_disk(bdev, true);
+       if (__invalidate_device(bdev, true))
+               pr_warn("VFS: busy inodes on changed media %s\n",
+                       disk->disk_name);
+       bdev->bd_invalidated = 1;
        if (bdops->revalidate_disk)
                bdops->revalidate_disk(bdev->bd_disk);
        return 1;
@@ -1547,13 +1439,15 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
  *    mutex_lock_nested(whole->bd_mutex, 1)
  */
 
-static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
+static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
+               int for_part)
 {
+       struct block_device *whole = NULL, *claiming = NULL;
        struct gendisk *disk;
        int ret;
        int partno;
        int perm = 0;
-       bool first_open = false;
+       bool first_open = false, unblock_events = true, need_restart;
 
        if (mode & FMODE_READ)
                perm |= MAY_READ;
@@ -1569,18 +1463,36 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        }
 
  restart:
-
+       need_restart = false;
        ret = -ENXIO;
        disk = bdev_get_gendisk(bdev, &partno);
        if (!disk)
                goto out;
 
+       if (partno) {
+               whole = bdget_disk(disk, 0);
+               if (!whole) {
+                       ret = -ENOMEM;
+                       goto out_put_disk;
+               }
+       }
+
+       if (!for_part && (mode & FMODE_EXCL)) {
+               WARN_ON_ONCE(!holder);
+               if (whole)
+                       claiming = whole;
+               else
+                       claiming = bdev;
+               ret = bd_prepare_to_claim(bdev, claiming, holder);
+               if (ret)
+                       goto out_put_whole;
+       }
+
        disk_block_events(disk);
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
                first_open = true;
                bdev->bd_disk = disk;
-               bdev->bd_queue = disk->queue;
                bdev->bd_contains = bdev;
                bdev->bd_partno = partno;
 
@@ -1593,20 +1505,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        ret = 0;
                        if (disk->fops->open) {
                                ret = disk->fops->open(bdev, mode);
-                               if (ret == -ERESTARTSYS) {
-                                       /* Lost a race with 'disk' being
-                                        * deleted, try again.
-                                        * See md.c
-                                        */
-                                       disk_put_part(bdev->bd_part);
-                                       bdev->bd_part = NULL;
-                                       bdev->bd_disk = NULL;
-                                       bdev->bd_queue = NULL;
-                                       mutex_unlock(&bdev->bd_mutex);
-                                       disk_unblock_events(disk);
-                                       put_disk_and_module(disk);
-                                       goto restart;
-                               }
+                               /*
+                                * If we lost a race with 'disk' being deleted,
+                                * try again.  See md.c
+                                */
+                               if (ret == -ERESTARTSYS)
+                                       need_restart = true;
                        }
 
                        if (!ret) {
@@ -1627,18 +1531,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        if (ret)
                                goto out_clear;
                } else {
-                       struct block_device *whole;
-                       whole = bdget_disk(disk, 0);
-                       ret = -ENOMEM;
-                       if (!whole)
-                               goto out_clear;
                        BUG_ON(for_part);
-                       ret = __blkdev_get(whole, mode, 1);
-                       if (ret) {
-                               bdput(whole);
+                       ret = __blkdev_get(whole, mode, NULL, 1);
+                       if (ret)
                                goto out_clear;
-                       }
-                       bdev->bd_contains = whole;
+                       bdev->bd_contains = bdgrab(whole);
                        bdev->bd_part = disk_get_part(disk, partno);
                        if (!(disk->flags & GENHD_FL_UP) ||
                            !bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1667,27 +1564,52 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
        bdev->bd_openers++;
        if (for_part)
                bdev->bd_part_count++;
+       if (claiming)
+               bd_finish_claiming(bdev, claiming, holder);
+
+       /*
+        * Block event polling for write claims if requested.  Any write holder
+        * makes the write_holder state stick until all are released.  This is
+        * good enough and tracking individual writeable reference is too
+        * fragile given the way @mode is used in blkdev_get/put().
+        */
+       if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+           (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+               bdev->bd_write_holder = true;
+               unblock_events = false;
+       }
        mutex_unlock(&bdev->bd_mutex);
-       disk_unblock_events(disk);
+
+       if (unblock_events)
+               disk_unblock_events(disk);
+
        /* only one opener holds refs to the module and disk */
        if (!first_open)
                put_disk_and_module(disk);
+       if (whole)
+               bdput(whole);
        return 0;
 
  out_clear:
        disk_put_part(bdev->bd_part);
        bdev->bd_disk = NULL;
        bdev->bd_part = NULL;
-       bdev->bd_queue = NULL;
        if (bdev != bdev->bd_contains)
                __blkdev_put(bdev->bd_contains, mode, 1);
        bdev->bd_contains = NULL;
  out_unlock_bdev:
+       if (claiming)
+               bd_abort_claiming(bdev, claiming, holder);
        mutex_unlock(&bdev->bd_mutex);
        disk_unblock_events(disk);
+ out_put_whole:
+       if (whole)
+               bdput(whole);
+ out_put_disk:
        put_disk_and_module(disk);
+       if (need_restart)
+               goto restart;
  out:
-
        return ret;
 }
 
@@ -1712,50 +1634,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  */
 int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 {
-       struct block_device *whole = NULL;
        int res;
 
-       WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
-
-       if ((mode & FMODE_EXCL) && holder) {
-               whole = bd_start_claiming(bdev, holder);
-               if (IS_ERR(whole)) {
-                       bdput(bdev);
-                       return PTR_ERR(whole);
-               }
-       }
-
-       res = __blkdev_get(bdev, mode, 0);
-
-       if (whole) {
-               struct gendisk *disk = whole->bd_disk;
-
-               /* finish claiming */
-               mutex_lock(&bdev->bd_mutex);
-               if (!res)
-                       bd_finish_claiming(bdev, whole, holder);
-               else
-                       bd_abort_claiming(bdev, whole, holder);
-               /*
-                * Block event polling for write claims if requested.  Any
-                * write holder makes the write_holder state stick until
-                * all are released.  This is good enough and tracking
-                * individual writeable reference is too fragile given the
-                * way @mode is used in blkdev_get/put().
-                */
-               if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
-                   (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
-                       bdev->bd_write_holder = true;
-                       disk_block_events(disk);
-               }
-
-               mutex_unlock(&bdev->bd_mutex);
-               bdput(whole);
-       }
-
+       res =__blkdev_get(bdev, mode, holder, 0);
        if (res)
                bdput(bdev);
-
        return res;
 }
 EXPORT_SYMBOL(blkdev_get);
@@ -1851,7 +1734,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
         */
        filp->f_flags |= O_LARGEFILE;
 
-       filp->f_mode |= FMODE_NOWAIT;
+       filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 
        if (filp->f_flags & O_NDELAY)
                filp->f_mode |= FMODE_NDELAY;
index c037ef5..613920c 100644 (file)
@@ -65,11 +65,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
        spin_lock(&fs_info->balance_lock);
        target = get_restripe_target(fs_info, flags);
        if (target) {
-               /* Pick target profile only if it's already available */
-               if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
-                       spin_unlock(&fs_info->balance_lock);
-                       return extended_to_chunk(target);
-               }
+               spin_unlock(&fs_info->balance_lock);
+               return extended_to_chunk(target);
        }
        spin_unlock(&fs_info->balance_lock);
 
@@ -118,12 +115,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
 
 void btrfs_get_block_group(struct btrfs_block_group *cache)
 {
-       atomic_inc(&cache->count);
+       refcount_inc(&cache->refs);
 }
 
 void btrfs_put_block_group(struct btrfs_block_group *cache)
 {
-       if (atomic_dec_and_test(&cache->count)) {
+       if (refcount_dec_and_test(&cache->refs)) {
                WARN_ON(cache->pinned > 0);
                WARN_ON(cache->reserved > 0);
 
@@ -1111,7 +1108,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
 
-       mutex_lock(&fs_info->chunk_mutex);
        spin_lock(&block_group->lock);
        block_group->removed = 1;
        /*
@@ -1143,8 +1139,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        remove_em = (atomic_read(&block_group->frozen) == 0);
        spin_unlock(&block_group->lock);
 
-       mutex_unlock(&fs_info->chunk_mutex);
-
        if (remove_em) {
                struct extent_map_tree *em_tree;
 
@@ -1532,21 +1526,70 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
        spin_unlock(&fs_info->unused_bgs_lock);
 }
 
+static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
+                          struct btrfs_path *path)
+{
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct btrfs_block_group_item bg;
+       struct extent_buffer *leaf;
+       int slot;
+       u64 flags;
+       int ret = 0;
+
+       slot = path->slots[0];
+       leaf = path->nodes[0];
+
+       em_tree = &fs_info->mapping_tree;
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
+       read_unlock(&em_tree->lock);
+       if (!em) {
+               btrfs_err(fs_info,
+                         "logical %llu len %llu found bg but no related chunk",
+                         key->objectid, key->offset);
+               return -ENOENT;
+       }
+
+       if (em->start != key->objectid || em->len != key->offset) {
+               btrfs_err(fs_info,
+                       "block group %llu len %llu mismatch with chunk %llu len %llu",
+                       key->objectid, key->offset, em->start, em->len);
+               ret = -EUCLEAN;
+               goto out_free_em;
+       }
+
+       read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
+                          sizeof(bg));
+       flags = btrfs_stack_block_group_flags(&bg) &
+               BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+       if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+               btrfs_err(fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+                         key->objectid, key->offset, flags,
+                         (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
+               ret = -EUCLEAN;
+       }
+
+out_free_em:
+       free_extent_map(em);
+       return ret;
+}
+
 static int find_first_block_group(struct btrfs_fs_info *fs_info,
                                  struct btrfs_path *path,
                                  struct btrfs_key *key)
 {
        struct btrfs_root *root = fs_info->extent_root;
-       int ret = 0;
+       int ret;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
-       struct btrfs_block_group_item bg;
-       u64 flags;
        int slot;
 
        ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
        if (ret < 0)
-               goto out;
+               return ret;
 
        while (1) {
                slot = path->slots[0];
@@ -1563,49 +1606,10 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
 
                if (found_key.objectid >= key->objectid &&
                    found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
-                       struct extent_map_tree *em_tree;
-                       struct extent_map *em;
-
-                       em_tree = &root->fs_info->mapping_tree;
-                       read_lock(&em_tree->lock);
-                       em = lookup_extent_mapping(em_tree, found_key.objectid,
-                                                  found_key.offset);
-                       read_unlock(&em_tree->lock);
-                       if (!em) {
-                               btrfs_err(fs_info,
-                       "logical %llu len %llu found bg but no related chunk",
-                                         found_key.objectid, found_key.offset);
-                               ret = -ENOENT;
-                       } else if (em->start != found_key.objectid ||
-                                  em->len != found_key.offset) {
-                               btrfs_err(fs_info,
-               "block group %llu len %llu mismatch with chunk %llu len %llu",
-                                         found_key.objectid, found_key.offset,
-                                         em->start, em->len);
-                               ret = -EUCLEAN;
-                       } else {
-                               read_extent_buffer(leaf, &bg,
-                                       btrfs_item_ptr_offset(leaf, slot),
-                                       sizeof(bg));
-                               flags = btrfs_stack_block_group_flags(&bg) &
-                                       BTRFS_BLOCK_GROUP_TYPE_MASK;
-
-                               if (flags != (em->map_lookup->type &
-                                             BTRFS_BLOCK_GROUP_TYPE_MASK)) {
-                                       btrfs_err(fs_info,
-"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
-                                               found_key.objectid,
-                                               found_key.offset, flags,
-                                               (BTRFS_BLOCK_GROUP_TYPE_MASK &
-                                                em->map_lookup->type));
-                                       ret = -EUCLEAN;
-                               } else {
-                                       ret = 0;
-                               }
-                       }
-                       free_extent_map(em);
-                       goto out;
+                       ret = read_bg_from_eb(fs_info, &found_key, path);
+                       break;
                }
+
                path->slots[0]++;
        }
 out:
@@ -1657,19 +1661,12 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
                return -EIO;
 
        map = em->map_lookup;
-       data_stripe_length = em->len;
+       data_stripe_length = em->orig_block_len;
        io_stripe_size = map->stripe_len;
 
-       if (map->type & BTRFS_BLOCK_GROUP_RAID10)
-               data_stripe_length = div_u64(data_stripe_length,
-                                            map->num_stripes / map->sub_stripes);
-       else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-               data_stripe_length = div_u64(data_stripe_length, map->num_stripes);
-       else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               data_stripe_length = div_u64(data_stripe_length,
-                                            nr_data_stripes(map));
+       /* For RAID5/6 adjust to a full IO stripe length */
+       if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
                io_stripe_size = map->stripe_len * nr_data_stripes(map);
-       }
 
        buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
        if (!buf) {
@@ -1748,25 +1745,12 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
                        return ret;
 
                while (nr--) {
-                       u64 start, len;
-
-                       if (logical[nr] > cache->start + cache->length)
-                               continue;
-
-                       if (logical[nr] + stripe_len <= cache->start)
-                               continue;
-
-                       start = logical[nr];
-                       if (start < cache->start) {
-                               start = cache->start;
-                               len = (logical[nr] + stripe_len) - start;
-                       } else {
-                               len = min_t(u64, stripe_len,
-                                           cache->start + cache->length - start);
-                       }
+                       u64 len = min_t(u64, stripe_len,
+                               cache->start + cache->length - logical[nr]);
 
                        cache->bytes_super += len;
-                       ret = btrfs_add_excluded_extent(fs_info, start, len);
+                       ret = btrfs_add_excluded_extent(fs_info, logical[nr],
+                                                       len);
                        if (ret) {
                                kfree(logical);
                                return ret;
@@ -1818,7 +1802,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
 
        cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
 
-       atomic_set(&cache->count, 1);
+       refcount_set(&cache->refs, 1);
        spin_lock_init(&cache->lock);
        init_rwsem(&cache->data_rwsem);
        INIT_LIST_HEAD(&cache->list);
@@ -2207,54 +2191,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
        return 0;
 }
 
-static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
-{
-       u64 num_devices;
-       u64 stripped;
-
-       /*
-        * if restripe for this chunk_type is on pick target profile and
-        * return, otherwise do the usual balance
-        */
-       stripped = get_restripe_target(fs_info, flags);
-       if (stripped)
-               return extended_to_chunk(stripped);
-
-       num_devices = fs_info->fs_devices->rw_devices;
-
-       stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
-               BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
-
-       if (num_devices == 1) {
-               stripped |= BTRFS_BLOCK_GROUP_DUP;
-               stripped = flags & ~stripped;
-
-               /* turn raid0 into single device chunks */
-               if (flags & BTRFS_BLOCK_GROUP_RAID0)
-                       return stripped;
-
-               /* turn mirroring into duplication */
-               if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
-                            BTRFS_BLOCK_GROUP_RAID10))
-                       return stripped | BTRFS_BLOCK_GROUP_DUP;
-       } else {
-               /* they already had raid on here, just return */
-               if (flags & stripped)
-                       return flags;
-
-               stripped |= BTRFS_BLOCK_GROUP_DUP;
-               stripped = flags & ~stripped;
-
-               /* switch duplicated blocks with raid1 */
-               if (flags & BTRFS_BLOCK_GROUP_DUP)
-                       return stripped | BTRFS_BLOCK_GROUP_RAID1;
-
-               /* this is drive concat, leave it alone */
-       }
-
-       return flags;
-}
-
 /*
  * Mark one block group RO, can be called several times for the same block
  * group.
@@ -2300,7 +2236,7 @@ again:
                 * If we are changing raid levels, try to allocate a
                 * corresponding block group with the new raid level.
                 */
-               alloc_flags = update_block_group_flags(fs_info, cache->flags);
+               alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
                if (alloc_flags != cache->flags) {
                        ret = btrfs_chunk_alloc(trans, alloc_flags,
                                                CHUNK_ALLOC_FORCE);
@@ -2327,7 +2263,7 @@ again:
        ret = inc_block_group_ro(cache, 0);
 out:
        if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
-               alloc_flags = update_block_group_flags(fs_info, cache->flags);
+               alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
                mutex_lock(&fs_info->chunk_mutex);
                check_system_chunk(trans, alloc_flags);
                mutex_unlock(&fs_info->chunk_mutex);
@@ -2521,7 +2457,8 @@ again:
        num_pages *= 16;
        num_pages *= PAGE_SIZE;
 
-       ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
+       ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
+                                         num_pages);
        if (ret)
                goto out_put;
 
@@ -3392,7 +3329,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                ASSERT(list_empty(&block_group->dirty_list));
                ASSERT(list_empty(&block_group->io_list));
                ASSERT(list_empty(&block_group->bg_list));
-               ASSERT(atomic_read(&block_group->count) == 1);
+               ASSERT(refcount_read(&block_group->refs) == 1);
                btrfs_put_block_group(block_group);
 
                spin_lock(&info->block_group_cache_lock);
@@ -3447,7 +3384,6 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
        spin_unlock(&block_group->lock);
 
        if (cleanup) {
-               mutex_lock(&fs_info->chunk_mutex);
                em_tree = &fs_info->mapping_tree;
                write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, block_group->start,
@@ -3455,7 +3391,6 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
                BUG_ON(!em); /* logic error, can't happen */
                remove_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
-               mutex_unlock(&fs_info->chunk_mutex);
 
                /* once for us and once for the tree */
                free_extent_map(em);
index b6ee70a..adfd758 100644 (file)
@@ -114,8 +114,7 @@ struct btrfs_block_group {
        /* For block groups in the same raid type */
        struct list_head list;
 
-       /* Usage count */
-       atomic_t count;
+       refcount_t refs;
 
        /*
         * List of struct btrfs_free_clusters for this block group.
index e7d7095..c47b6c6 100644 (file)
@@ -151,6 +151,17 @@ struct btrfs_inode {
         */
        u64 last_unlink_trans;
 
+       /*
+        * The id/generation of the last transaction where this inode was
+        * either the source or the destination of a clone/dedupe operation.
+        * Used when logging an inode to know if there are shared extents that
+        * need special care when logging checksum items, to avoid duplicate
+        * checksum items in a log (which can lead to a corruption where we end
+        * up with missing checksum ranges after log replay).
+        * Protected by the vfs inode lock.
+        */
+       u64 last_reflink_trans;
+
        /*
         * Number of bytes outstanding that are going to need csums.  This is
         * used in ENOSPC accounting.
index 32e11a2..81a8c87 100644 (file)
@@ -631,10 +631,8 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
        int pass;
 
        selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
-       if (NULL == selected_super) {
-               pr_info("btrfsic: error, kmalloc failed!\n");
+       if (!selected_super)
                return -ENOMEM;
-       }
 
        list_for_each_entry(device, dev_head, dev_list) {
                int i;
@@ -795,7 +793,6 @@ static int btrfsic_process_superblock_dev_mirror(
        if (NULL == superblock_tmp) {
                superblock_tmp = btrfsic_block_alloc();
                if (NULL == superblock_tmp) {
-                       pr_info("btrfsic: error, kmalloc failed!\n");
                        ret = -1;
                        goto out;
                }
@@ -921,9 +918,7 @@ static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
        struct btrfsic_stack_frame *sf;
 
        sf = kzalloc(sizeof(*sf), GFP_NOFS);
-       if (NULL == sf)
-               pr_info("btrfsic: alloc memory failed!\n");
-       else
+       if (sf)
                sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
        return sf;
 }
@@ -1313,7 +1308,6 @@ static int btrfsic_create_link_to_next_block(
        if (NULL == l) {
                l = btrfsic_block_link_alloc();
                if (NULL == l) {
-                       pr_info("btrfsic: error, kmalloc failed!\n");
                        btrfsic_release_block_ctx(next_block_ctx);
                        *next_blockp = NULL;
                        return -1;
@@ -1470,7 +1464,6 @@ static int btrfsic_handle_extent_data(
                                        mirror_num,
                                        &block_was_created);
                        if (NULL == next_block) {
-                               pr_info("btrfsic: error, kmalloc failed!\n");
                                btrfsic_release_block_ctx(&next_block_ctx);
                                return -1;
                        }
@@ -2013,7 +2006,6 @@ again:
 
                block = btrfsic_block_alloc();
                if (NULL == block) {
-                       pr_info("btrfsic: error, kmalloc failed!\n");
                        btrfsic_release_block_ctx(&block_ctx);
                        goto continue_loop;
                }
@@ -2234,7 +2226,6 @@ static int btrfsic_process_written_superblock(
                                        mirror_num,
                                        &was_created);
                        if (NULL == next_block) {
-                               pr_info("btrfsic: error, kmalloc failed!\n");
                                btrfsic_release_block_ctx(&tmp_next_block_ctx);
                                return -1;
                        }
@@ -2542,10 +2533,8 @@ static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
                                                &state->block_link_hashtable);
        if (NULL == l) {
                l = btrfsic_block_link_alloc();
-               if (NULL == l) {
-                       pr_info("btrfsic: error, kmalloc failed!\n");
+               if (!l)
                        return NULL;
-               }
 
                l->block_ref_to = next_block;
                l->block_ref_from = from_block;
@@ -2589,10 +2578,9 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
                struct btrfsic_dev_state *dev_state;
 
                block = btrfsic_block_alloc();
-               if (NULL == block) {
-                       pr_info("btrfsic: error, kmalloc failed!\n");
+               if (!block)
                        return NULL;
-               }
+
                dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
                if (NULL == dev_state) {
                        pr_info("btrfsic: error, lookup dev_state failed!\n");
@@ -2797,10 +2785,8 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
                return -1;
        }
        state = kvzalloc(sizeof(*state), GFP_KERNEL);
-       if (!state) {
-               pr_info("btrfs check-integrity: allocation failed!\n");
+       if (!state)
                return -ENOMEM;
-       }
 
        if (!btrfsic_is_initialized) {
                mutex_init(&btrfsic_mutex);
@@ -2829,7 +2815,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
 
                ds = btrfsic_dev_state_alloc();
                if (NULL == ds) {
-                       pr_info("btrfs check-integrity: kmalloc() failed!\n");
                        mutex_unlock(&btrfsic_mutex);
                        return -ENOMEM;
                }
index c6e6486..1ab56a7 100644 (file)
@@ -172,18 +172,17 @@ static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
                (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
 }
 
-static int check_compressed_csum(struct btrfs_inode *inode,
-                                struct compressed_bio *cb,
+static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
                                 u64 disk_start)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
-       int ret;
        struct page *page;
        unsigned long i;
        char *kaddr;
        u8 csum[BTRFS_CSUM_SIZE];
+       struct compressed_bio *cb = bio->bi_private;
        u8 *cb_sum = cb->sums;
 
        if (inode->flags & BTRFS_INODE_NODATASUM)
@@ -201,15 +200,15 @@ static int check_compressed_csum(struct btrfs_inode *inode,
                if (memcmp(&csum, cb_sum, csum_size)) {
                        btrfs_print_data_csum_error(inode, disk_start,
                                        csum, cb_sum, cb->mirror_num);
-                       ret = -EIO;
-                       goto fail;
+                       if (btrfs_io_bio(bio)->device)
+                               btrfs_dev_stat_inc_and_print(
+                                       btrfs_io_bio(bio)->device,
+                                       BTRFS_DEV_STAT_CORRUPTION_ERRS);
+                       return -EIO;
                }
                cb_sum += csum_size;
-
        }
-       ret = 0;
-fail:
-       return ret;
+       return 0;
 }
 
 /* when we finish reading compressed pages from the disk, we
@@ -244,7 +243,6 @@ static void end_compressed_bio_read(struct bio *bio)
         * Record the correct mirror_num in cb->orig_bio so that
         * read-repair can work properly.
         */
-       ASSERT(btrfs_io_bio(cb->orig_bio));
        btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
        cb->mirror_num = mirror;
 
@@ -256,7 +254,7 @@ static void end_compressed_bio_read(struct bio *bio)
                goto csum_failed;
 
        inode = cb->inode;
-       ret = check_compressed_csum(BTRFS_I(inode), cb,
+       ret = check_compressed_csum(BTRFS_I(inode), bio,
                                    (u64)bio->bi_iter.bi_sector << 9);
        if (ret)
                goto csum_failed;
@@ -405,7 +403,7 @@ out:
  * This also checksums the file bytes and gets things ready for
  * the end io hooks.
  */
-blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                                 unsigned long len, u64 disk_start,
                                 unsigned long compressed_len,
                                 struct page **compressed_pages,
@@ -413,7 +411,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                 unsigned int write_flags,
                                 struct cgroup_subsys_state *blkcg_css)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct bio *bio = NULL;
        struct compressed_bio *cb;
        unsigned long bytes_left;
@@ -421,7 +419,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
        struct page *page;
        u64 first_byte = disk_start;
        blk_status_t ret;
-       int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+       int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
 
        WARN_ON(!PAGE_ALIGNED(start));
        cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -429,7 +427,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                return BLK_STS_RESOURCE;
        refcount_set(&cb->pending_bios, 0);
        cb->errors = 0;
-       cb->inode = inode;
+       cb->inode = &inode->vfs_inode;
        cb->start = start;
        cb->len = len;
        cb->mirror_num = 0;
@@ -455,7 +453,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                int submit = 0;
 
                page = compressed_pages[pg_index];
-               page->mapping = inode->i_mapping;
+               page->mapping = inode->vfs_inode.i_mapping;
                if (bio->bi_iter.bi_size)
                        submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
                                                          0);
index 284a3ad..9f3dbe3 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/sizes.h>
 
+struct btrfs_inode;
+
 /*
  * We want to make sure that amount of RAM required to uncompress an extent is
  * reasonable, so we limit the total size in ram of a compressed extent to
@@ -88,7 +90,7 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
                              unsigned long total_out, u64 disk_start,
                              struct bio *bio);
 
-blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                                  unsigned long len, u64 disk_start,
                                  unsigned long compressed_len,
                                  struct page **compressed_pages,
index 82ab6e5..70e49d8 100644 (file)
@@ -1501,6 +1501,22 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
        return 0;
 }
 
+#ifdef __LITTLE_ENDIAN
+
+/*
+ * Compare two keys, on little-endian the disk order is same as CPU order and
+ * we can avoid the conversion.
+ */
+static int comp_keys(const struct btrfs_disk_key *disk_key,
+                    const struct btrfs_key *k2)
+{
+       const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
+
+       return btrfs_comp_cpu_keys(k1, k2);
+}
+
+#else
+
 /*
  * compare two keys in a memcmp fashion
  */
@@ -1513,6 +1529,7 @@ static int comp_keys(const struct btrfs_disk_key *disk,
 
        return btrfs_comp_cpu_keys(&k1, k2);
 }
+#endif
 
 /*
  * same as comp_keys only with two btrfs_key's
index d404cce..9c7e466 100644 (file)
@@ -545,11 +545,6 @@ enum {
         * (device replace, resize, device add/delete, balance)
         */
        BTRFS_FS_EXCL_OP,
-       /*
-        * To info transaction_kthread we need an immediate commit so it
-        * doesn't need to wait for commit_interval
-        */
-       BTRFS_FS_NEED_ASYNC_COMMIT,
        /*
         * Indicate that balance has been set up from the ioctl and is in the
         * main phase. The fs_info::balance_ctl is initialized.
@@ -779,6 +774,7 @@ struct btrfs_fs_info {
        u32 thread_pool_size;
 
        struct kobject *space_info_kobj;
+       struct kobject *qgroups_kobj;
 
        u64 total_pinned;
 
@@ -1011,6 +1007,8 @@ enum {
        BTRFS_ROOT_DEAD_TREE,
        /* The root has a log tree. Used only for subvolume roots. */
        BTRFS_ROOT_HAS_LOG_TREE,
+       /* Qgroup flushing is in progress */
+       BTRFS_ROOT_QGROUP_FLUSHING,
 };
 
 /*
@@ -1059,8 +1057,10 @@ struct btrfs_root {
        wait_queue_head_t log_writer_wait;
        wait_queue_head_t log_commit_wait[2];
        struct list_head log_ctxs[2];
+       /* Used only for log trees of subvolumes, not for the log root tree */
        atomic_t log_writers;
        atomic_t log_commit[2];
+       /* Used only for log trees of subvolumes, not for the log root tree */
        atomic_t log_batch;
        int log_transid;
        /* No matter the commit succeeds or not*/
@@ -1075,7 +1075,6 @@ struct btrfs_root {
 
        u64 highest_objectid;
 
-       u64 defrag_trans_start;
        struct btrfs_key defrag_progress;
        struct btrfs_key defrag_max;
 
@@ -1162,6 +1161,7 @@ struct btrfs_root {
        spinlock_t qgroup_meta_rsv_lock;
        u64 qgroup_meta_rsv_pertrans;
        u64 qgroup_meta_rsv_prealloc;
+       wait_queue_head_t qgroup_flush_wait;
 
        /* Number of active swapfiles */
        atomic_t nr_swapfiles;
@@ -1277,18 +1277,18 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
                                         BTRFS_MOUNT_##opt)
 
 #define btrfs_set_and_info(fs_info, opt, fmt, args...)                 \
-{                                                                      \
+do {                                                                   \
        if (!btrfs_test_opt(fs_info, opt))                              \
                btrfs_info(fs_info, fmt, ##args);                       \
        btrfs_set_opt(fs_info->mount_opt, opt);                         \
-}
+} while (0)
 
 #define btrfs_clear_and_info(fs_info, opt, fmt, args...)               \
-{                                                                      \
+do {                                                                   \
        if (btrfs_test_opt(fs_info, opt))                               \
                btrfs_info(fs_info, fmt, ##args);                       \
        btrfs_clear_opt(fs_info->mount_opt, opt);                       \
-}
+} while (0)
 
 /*
  * Requests for changes that need to be done during transaction commit.
@@ -1895,6 +1895,52 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
 BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
 BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
 
+#ifdef __LITTLE_ENDIAN
+
+/*
+ * Optimized helpers for little-endian architectures where CPU and on-disk
+ * structures have the same endianness and we can skip conversions.
+ */
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu_key,
+                                        const struct btrfs_disk_key *disk_key)
+{
+       memcpy(cpu_key, disk_key, sizeof(struct btrfs_key));
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk_key,
+                                        const struct btrfs_key *cpu_key)
+{
+       memcpy(disk_key, cpu_key, sizeof(struct btrfs_key));
+}
+
+static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb,
+                                        struct btrfs_key *cpu_key, int nr)
+{
+       struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key;
+
+       btrfs_node_key(eb, disk_key, nr);
+}
+
+static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb,
+                                        struct btrfs_key *cpu_key, int nr)
+{
+       struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key;
+
+       btrfs_item_key(eb, disk_key, nr);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
+                                            const struct btrfs_dir_item *item,
+                                            struct btrfs_key *cpu_key)
+{
+       struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key;
+
+       btrfs_dir_item_key(eb, item, disk_key);
+}
+
+#else
+
 static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
                                         const struct btrfs_disk_key *disk)
 {
@@ -1936,6 +1982,8 @@ static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
        btrfs_disk_key_to_cpu(key, &disk_key);
 }
 
+#endif
+
 /* struct btrfs_header */
 BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
 BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
@@ -2232,7 +2280,8 @@ static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
 }
 
 /* struct btrfs_file_extent_item */
-BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item,
+                        type, 8);
 BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
                         struct btrfs_file_extent_item, disk_bytenr, 64);
 BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset,
@@ -2241,6 +2290,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
                         struct btrfs_file_extent_item, generation, 64);
 BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
                         struct btrfs_file_extent_item, num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes,
+                        struct btrfs_file_extent_item, ram_bytes, 64);
 BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
                         struct btrfs_file_extent_item, disk_num_bytes, 64);
 BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
@@ -2257,6 +2308,7 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
        return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize;
 }
 
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
 BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
                   disk_bytenr, 64);
 BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
@@ -2508,16 +2560,46 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 
+/*
+ * Different levels for to flush space when doing space reservations.
+ *
+ * The higher the level, the more methods we try to reclaim space.
+ */
 enum btrfs_reserve_flush_enum {
        /* If we are in the transaction, we can't flush anything.*/
        BTRFS_RESERVE_NO_FLUSH,
+
        /*
-        * Flushing delalloc may cause deadlock somewhere, in this
-        * case, use FLUSH LIMIT
+        * Flush space by:
+        * - Running delayed inode items
+        * - Allocating a new chunk
         */
        BTRFS_RESERVE_FLUSH_LIMIT,
+
+       /*
+        * Flush space by:
+        * - Running delayed inode items
+        * - Running delayed refs
+        * - Running delalloc and waiting for ordered extents
+        * - Allocating a new chunk
+        */
        BTRFS_RESERVE_FLUSH_EVICT,
+
+       /*
+        * Flush space by above mentioned methods and by:
+        * - Running delayed iputs
+        * - Commiting transaction
+        *
+        * Can be interruped by fatal signal.
+        */
        BTRFS_RESERVE_FLUSH_ALL,
+
+       /*
+        * Pretty much the same as FLUSH_ALL, but can also steal space from
+        * global rsv.
+        *
+        * Can be interruped by fatal signal.
+        */
        BTRFS_RESERVE_FLUSH_ALL_STEAL,
 };
 
@@ -2831,8 +2913,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums);
-blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
-                      u64 file_start, int contig);
+blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
+                               u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                             struct list_head *list, int search_commit);
 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
@@ -2875,7 +2957,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
 int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
-int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
+int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                              unsigned int extra_bits,
                              struct extent_state **cached_state);
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -2928,7 +3010,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
                                    struct btrfs_trans_handle *trans, int mode,
                                    u64 start, u64 num_bytes, u64 min_size,
                                    loff_t actual_len, u64 *alloc_hint);
-int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
+int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
                u64 start, u64 end, int *page_started, unsigned long *nr_written,
                struct writeback_control *wbc);
 int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
@@ -2962,7 +3044,7 @@ void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
                             int skip_pinned);
 extern const struct file_operations btrfs_file_operations;
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root, struct inode *inode,
+                        struct btrfs_root *root, struct btrfs_inode *inode,
                         struct btrfs_path *path, u64 start, u64 end,
                         u64 *drop_end, int drop_cache,
                         int replace_extent,
@@ -2978,10 +3060,13 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                              struct btrfs_inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
-int btrfs_dirty_pages(struct inode *inode, struct page **pages,
+int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
                      size_t num_pages, loff_t pos, size_t write_bytes,
                      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
+int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
+                          size_t *write_bytes);
+void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
@@ -3194,7 +3279,7 @@ do {                                                              \
        /* Report first abort since mount */                    \
        if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,     \
                        &((trans)->fs_info->fs_state))) {       \
-               if ((errno) != -EIO) {                          \
+               if ((errno) != -EIO && (errno) != -EROFS) {             \
                        WARN(1, KERN_DEBUG                              \
                        "BTRFS: Transaction aborted (error %d)\n",      \
                        (errno));                                       \
@@ -3378,7 +3463,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root);
 int btrfs_recover_relocation(struct btrfs_root *root);
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
+int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct extent_buffer *buf,
                          struct extent_buffer *cow);
index 1245739..0e354e9 100644 (file)
@@ -237,10 +237,10 @@ commit_trans:
        return 0;
 }
 
-int btrfs_check_data_free_space(struct inode *inode,
+int btrfs_check_data_free_space(struct btrfs_inode *inode,
                        struct extent_changeset **reserved, u64 start, u64 len)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        int ret;
 
        /* align the range */
@@ -248,14 +248,14 @@ int btrfs_check_data_free_space(struct inode *inode,
              round_down(start, fs_info->sectorsize);
        start = round_down(start, fs_info->sectorsize);
 
-       ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
+       ret = btrfs_alloc_data_chunk_ondemand(inode, len);
        if (ret < 0)
                return ret;
 
        /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
        ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
        if (ret < 0)
-               btrfs_free_reserved_data_space_noquota(inode, start, len);
+               btrfs_free_reserved_data_space_noquota(fs_info, len);
        else
                ret = 0;
        return ret;
@@ -269,16 +269,12 @@ int btrfs_check_data_free_space(struct inode *inode,
  * which we can't sleep and is sure it won't affect qgroup reserved space.
  * Like clear_bit_hook().
  */
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
                                            u64 len)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_space_info *data_sinfo;
 
-       /* Make sure the range is aligned to sectorsize */
-       len = round_up(start + len, fs_info->sectorsize) -
-             round_down(start, fs_info->sectorsize);
-       start = round_down(start, fs_info->sectorsize);
+       ASSERT(IS_ALIGNED(len, fs_info->sectorsize));
 
        data_sinfo = fs_info->data_sinfo;
        spin_lock(&data_sinfo->lock);
@@ -293,17 +289,17 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
  * This one will handle the per-inode data rsv map for accurate reserved
  * space framework.
  */
-void btrfs_free_reserved_data_space(struct inode *inode,
+void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
                        struct extent_changeset *reserved, u64 start, u64 len)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
 
        /* Make sure the range is aligned to sectorsize */
-       len = round_up(start + len, root->fs_info->sectorsize) -
-             round_down(start, root->fs_info->sectorsize);
-       start = round_down(start, root->fs_info->sectorsize);
+       len = round_up(start + len, fs_info->sectorsize) -
+             round_down(start, fs_info->sectorsize);
+       start = round_down(start, fs_info->sectorsize);
 
-       btrfs_free_reserved_data_space_noquota(inode, start, len);
+       btrfs_free_reserved_data_space_noquota(fs_info, len);
        btrfs_qgroup_free_data(inode, reserved, start, len);
 }
 
@@ -557,7 +553,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
  * Return 0 for success
  * Return <0 for error(-ENOSPC or -EQUOT)
  */
-int btrfs_delalloc_reserve_space(struct inode *inode,
+int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
                        struct extent_changeset **reserved, u64 start, u64 len)
 {
        int ret;
@@ -565,7 +561,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
        ret = btrfs_check_data_free_space(inode, reserved, start, len);
        if (ret < 0)
                return ret;
-       ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
+       ret = btrfs_delalloc_reserve_metadata(inode, len);
        if (ret < 0)
                btrfs_free_reserved_data_space(inode, *reserved, start, len);
        return ret;
@@ -583,10 +579,10 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
  * list if there are no delalloc bytes left.
  * Also it will handle the qgroup reserved space.
  */
-void btrfs_delalloc_release_space(struct inode *inode,
+void btrfs_delalloc_release_space(struct btrfs_inode *inode,
                                  struct extent_changeset *reserved,
                                  u64 start, u64 len, bool qgroup_free)
 {
-       btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
+       btrfs_delalloc_release_metadata(inode, len, qgroup_free);
        btrfs_free_reserved_data_space(inode, reserved, start, len);
 }
index 54466fb..28bf5c3 100644 (file)
@@ -6,18 +6,18 @@
 struct extent_changeset;
 
 int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
-int btrfs_check_data_free_space(struct inode *inode,
+int btrfs_check_data_free_space(struct btrfs_inode *inode,
                        struct extent_changeset **reserved, u64 start, u64 len);
-void btrfs_free_reserved_data_space(struct inode *inode,
+void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
                        struct extent_changeset *reserved, u64 start, u64 len);
-void btrfs_delalloc_release_space(struct inode *inode,
+void btrfs_delalloc_release_space(struct btrfs_inode *inode,
                                  struct extent_changeset *reserved,
                                  u64 start, u64 len, bool qgroup_free);
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
                                            u64 len);
 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
                                     bool qgroup_free);
-int btrfs_delalloc_reserve_space(struct inode *inode,
+int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
                        struct extent_changeset **reserved, u64 start, u64 len);
 
 #endif /* BTRFS_DELALLOC_SPACE_H */
index b1a1480..9ae25f6 100644 (file)
@@ -1116,6 +1116,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        mutex_init(&root->log_mutex);
        mutex_init(&root->ordered_extent_mutex);
        mutex_init(&root->delalloc_mutex);
+       init_waitqueue_head(&root->qgroup_flush_wait);
        init_waitqueue_head(&root->log_writer_wait);
        init_waitqueue_head(&root->log_commit_wait[0]);
        init_waitqueue_head(&root->log_commit_wait[1]);
@@ -1141,10 +1142,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
        memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
-       if (!dummy)
-               root->defrag_trans_start = fs_info->generation;
-       else
-               root->defrag_trans_start = 0;
        root->root_key.objectid = objectid;
        root->anon_dev = 0;
 
@@ -1395,7 +1392,12 @@ alloc_fail:
        goto out;
 }
 
-static int btrfs_init_fs_root(struct btrfs_root *root)
+/*
+ * Initialize subvolume root in-memory structure
+ *
+ * @anon_dev:  anonymous device to attach to the root, if zero, allocate new
+ */
+static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
 {
        int ret;
        unsigned int nofs_flag;
@@ -1428,9 +1430,20 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
        spin_lock_init(&root->ino_cache_lock);
        init_waitqueue_head(&root->ino_cache_wait);
 
-       ret = get_anon_bdev(&root->anon_dev);
-       if (ret)
-               goto fail;
+       /*
+        * Don't assign anonymous block device to roots that are not exposed to
+        * userspace, the id pool is limited to 1M
+        */
+       if (is_fstree(root->root_key.objectid) &&
+           btrfs_root_refs(&root->root_item) > 0) {
+               if (!anon_dev) {
+                       ret = get_anon_bdev(&root->anon_dev);
+                       if (ret)
+                               goto fail;
+               } else {
+                       root->anon_dev = anon_dev;
+               }
+       }
 
        mutex_lock(&root->objectid_mutex);
        ret = btrfs_find_highest_objectid(root,
@@ -1534,8 +1547,27 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 }
 
 
-struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
-                                    u64 objectid, bool check_ref)
+/*
+ * Get an in-memory reference of a root structure.
+ *
+ * For essential trees like root/extent tree, we grab it from fs_info directly.
+ * For subvolume trees, we check the cached filesystem roots first. If not
+ * found, then read it from disk and add it to cached fs roots.
+ *
+ * Caller should release the root by calling btrfs_put_root() after the usage.
+ *
+ * NOTE: Reloc and log trees can't be read by this function as they share the
+ *      same root objectid.
+ *
+ * @objectid:  root id
+ * @anon_dev:  preallocated anonymous block device number for new roots,
+ *             pass 0 for new allocation.
+ * @check_ref: whether to check root item references, If true, return -ENOENT
+ *             for orphan roots
+ */
+static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+                                            u64 objectid, dev_t anon_dev,
+                                            bool check_ref)
 {
        struct btrfs_root *root;
        struct btrfs_path *path;
@@ -1564,6 +1596,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
 again:
        root = btrfs_lookup_fs_root(fs_info, objectid);
        if (root) {
+               /* Shouldn't get preallocated anon_dev for cached roots */
+               ASSERT(!anon_dev);
                if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
                        btrfs_put_root(root);
                        return ERR_PTR(-ENOENT);
@@ -1583,7 +1617,7 @@ again:
                goto fail;
        }
 
-       ret = btrfs_init_fs_root(root);
+       ret = btrfs_init_fs_root(root, anon_dev);
        if (ret)
                goto fail;
 
@@ -1616,25 +1650,31 @@ fail:
        return ERR_PTR(ret);
 }
 
-static int btrfs_congested_fn(void *congested_data, int bdi_bits)
+/*
+ * Get in-memory reference of a root structure
+ *
+ * @objectid:  tree objectid
+ * @check_ref: if set, verify that the tree exists and the item has at least
+ *             one reference
+ */
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+                                    u64 objectid, bool check_ref)
 {
-       struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
-       int ret = 0;
-       struct btrfs_device *device;
-       struct backing_dev_info *bdi;
+       return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+}
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
-               if (!device->bdev)
-                       continue;
-               bdi = device->bdev->bd_bdi;
-               if (bdi_congested(bdi, bdi_bits)) {
-                       ret = 1;
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       return ret;
+/*
+ * Get in-memory reference of a root structure, created as new, optionally pass
+ * the anonymous block device id
+ *
+ * @objectid:  tree objectid
+ * @anon_dev:  if zero, allocate a new anonymous block device or use the
+ *             parameter value
+ */
+struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
+                                        u64 objectid, dev_t anon_dev)
+{
+       return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
 }
 
 /*
@@ -1749,7 +1789,6 @@ static int transaction_kthread(void *arg)
 
                now = ktime_get_seconds();
                if (cur->state < TRANS_STATE_COMMIT_START &&
-                   !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
                    (now < cur->start_time ||
                     now - cur->start_time < fs_info->commit_interval)) {
                        spin_unlock(&fs_info->trans_lock);
@@ -2001,8 +2040,7 @@ void btrfs_put_root(struct btrfs_root *root)
                if (root->anon_dev)
                        free_anon_bdev(root->anon_dev);
                btrfs_drew_lock_destroy(&root->snapshot_lock);
-               free_extent_buffer(root->node);
-               free_extent_buffer(root->commit_root);
+               free_root_extent_buffers(root);
                kfree(root->free_ino_ctl);
                kfree(root->free_ino_pinned);
 #ifdef CONFIG_BTRFS_DEBUG
@@ -3053,8 +3091,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
                goto fail_sb_buffer;
        }
 
-       sb->s_bdi->congested_fn = btrfs_congested_fn;
-       sb->s_bdi->congested_data = fs_info;
        sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
        sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
        sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
@@ -4058,6 +4094,11 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
        ASSERT(list_empty(&fs_info->delayed_iputs));
        set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
 
+       if (btrfs_check_quota_leak(fs_info)) {
+               WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+               btrfs_err(fs_info, "qgroup reserved space leaked");
+       }
+
        btrfs_free_qgroup_config(fs_info);
        ASSERT(list_empty(&fs_info->delalloc_roots));
 
index bf43245..00dc39d 100644 (file)
@@ -67,6 +67,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
 
 struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
                                     u64 objectid, bool check_ref);
+struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
+                                        u64 objectid, dev_t anon_dev);
 
 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
index b656145..f39d47a 100644 (file)
@@ -233,14 +233,11 @@ bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
                               struct extent_state **cached_state);
 
 /* This should be reworked in the future and put elsewhere. */
-int get_state_failrec(struct extent_io_tree *tree, u64 start,
-                     struct io_failure_record **failrec);
+struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start);
 int set_state_failrec(struct extent_io_tree *tree, u64 start,
                      struct io_failure_record *failrec);
 void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
                u64 end);
-int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
-                               struct io_failure_record **failrec_ret);
 int free_io_failure(struct extent_io_tree *failure_tree,
                    struct extent_io_tree *io_tree,
                    struct io_failure_record *rec);
index c0bc35f..61ede33 100644 (file)
@@ -5298,7 +5298,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
                goto out;
        }
 
-       trans = btrfs_start_transaction(tree_root, 0);
+       /*
+        * Use join to avoid potential EINTR from transaction start. See
+        * wait_reserve_ticket and the whole reservation callchain.
+        */
+       if (for_reloc)
+               trans = btrfs_join_transaction(tree_root);
+       else
+               trans = btrfs_start_transaction(tree_root, 0);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out_free;
@@ -5466,6 +5473,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
                }
        }
 
+       /*
+        * This subvolume is going to be completely dropped, and won't be
+        * recorded as dirty roots, thus pertrans meta rsv will not be freed at
+        * commit transaction time.  So free it here manually.
+        */
+       btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
+       btrfs_qgroup_free_meta_all_pertrans(root);
+
        if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
                btrfs_add_dropped_root(trans, root);
        else
index 60278e5..6def411 100644 (file)
@@ -2018,15 +2018,14 @@ out:
        return err;
 }
 
-void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                                  struct page *locked_page,
                                  unsigned clear_bits,
                                  unsigned long page_ops)
 {
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
-                        NULL);
+       clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
 
-       __process_pages_contig(inode->i_mapping, locked_page,
+       __process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
                               start >> PAGE_SHIFT, end >> PAGE_SHIFT,
                               page_ops, NULL);
 }
@@ -2123,12 +2122,11 @@ out:
        return ret;
 }
 
-int get_state_failrec(struct extent_io_tree *tree, u64 start,
-                     struct io_failure_record **failrec)
+struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start)
 {
        struct rb_node *node;
        struct extent_state *state;
-       int ret = 0;
+       struct io_failure_record *failrec;
 
        spin_lock(&tree->lock);
        /*
@@ -2137,18 +2135,19 @@ int get_state_failrec(struct extent_io_tree *tree, u64 start,
         */
        node = tree_search(tree, start);
        if (!node) {
-               ret = -ENOENT;
+               failrec = ERR_PTR(-ENOENT);
                goto out;
        }
        state = rb_entry(node, struct extent_state, rb_node);
        if (state->start != start) {
-               ret = -ENOENT;
+               failrec = ERR_PTR(-ENOENT);
                goto out;
        }
-       *failrec = state->failrec;
+
+       failrec = state->failrec;
 out:
        spin_unlock(&tree->lock);
-       return ret;
+       return failrec;
 }
 
 /*
@@ -2378,8 +2377,8 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
        if (!ret)
                return 0;
 
-       ret = get_state_failrec(failure_tree, start, &failrec);
-       if (ret)
+       failrec = get_state_failrec(failure_tree, start);
+       if (IS_ERR(failrec))
                return 0;
 
        BUG_ON(!failrec->this_mirror);
@@ -2451,8 +2450,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
        spin_unlock(&failure_tree->lock);
 }
 
-int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
-               struct io_failure_record **failrec_ret)
+static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
+                                                            u64 start, u64 end)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct io_failure_record *failrec;
@@ -2463,65 +2462,8 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
        int ret;
        u64 logical;
 
-       ret = get_state_failrec(failure_tree, start, &failrec);
-       if (ret) {
-               failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
-               if (!failrec)
-                       return -ENOMEM;
-
-               failrec->start = start;
-               failrec->len = end - start + 1;
-               failrec->this_mirror = 0;
-               failrec->bio_flags = 0;
-               failrec->in_validation = 0;
-
-               read_lock(&em_tree->lock);
-               em = lookup_extent_mapping(em_tree, start, failrec->len);
-               if (!em) {
-                       read_unlock(&em_tree->lock);
-                       kfree(failrec);
-                       return -EIO;
-               }
-
-               if (em->start > start || em->start + em->len <= start) {
-                       free_extent_map(em);
-                       em = NULL;
-               }
-               read_unlock(&em_tree->lock);
-               if (!em) {
-                       kfree(failrec);
-                       return -EIO;
-               }
-
-               logical = start - em->start;
-               logical = em->block_start + logical;
-               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-                       logical = em->block_start;
-                       failrec->bio_flags = EXTENT_BIO_COMPRESSED;
-                       extent_set_compress_type(&failrec->bio_flags,
-                                                em->compress_type);
-               }
-
-               btrfs_debug(fs_info,
-                       "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
-                       logical, start, failrec->len);
-
-               failrec->logical = logical;
-               free_extent_map(em);
-
-               /* set the bits in the private failure tree */
-               ret = set_extent_bits(failure_tree, start, end,
-                                       EXTENT_LOCKED | EXTENT_DIRTY);
-               if (ret >= 0)
-                       ret = set_state_failrec(failure_tree, start, failrec);
-               /* set the bits in the inode's tree */
-               if (ret >= 0)
-                       ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
-               if (ret < 0) {
-                       kfree(failrec);
-                       return ret;
-               }
-       } else {
+       failrec = get_state_failrec(failure_tree, start);
+       if (!IS_ERR(failrec)) {
                btrfs_debug(fs_info,
                        "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
                        failrec->logical, failrec->start, failrec->len,
@@ -2531,11 +2473,66 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                 * (e.g. with a list for failed_mirror) to make
                 * clean_io_failure() clean all those errors at once.
                 */
+
+               return failrec;
        }
 
-       *failrec_ret = failrec;
+       failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
+       if (!failrec)
+               return ERR_PTR(-ENOMEM);
 
-       return 0;
+       failrec->start = start;
+       failrec->len = end - start + 1;
+       failrec->this_mirror = 0;
+       failrec->bio_flags = 0;
+       failrec->in_validation = 0;
+
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, start, failrec->len);
+       if (!em) {
+               read_unlock(&em_tree->lock);
+               kfree(failrec);
+               return ERR_PTR(-EIO);
+       }
+
+       if (em->start > start || em->start + em->len <= start) {
+               free_extent_map(em);
+               em = NULL;
+       }
+       read_unlock(&em_tree->lock);
+       if (!em) {
+               kfree(failrec);
+               return ERR_PTR(-EIO);
+       }
+
+       logical = start - em->start;
+       logical = em->block_start + logical;
+       if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+               logical = em->block_start;
+               failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+               extent_set_compress_type(&failrec->bio_flags, em->compress_type);
+       }
+
+       btrfs_debug(fs_info,
+                   "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
+                   logical, start, failrec->len);
+
+       failrec->logical = logical;
+       free_extent_map(em);
+
+       /* Set the bits in the private failure tree */
+       ret = set_extent_bits(failure_tree, start, end,
+                             EXTENT_LOCKED | EXTENT_DIRTY);
+       if (ret >= 0) {
+               ret = set_state_failrec(failure_tree, start, failrec);
+               /* Set the bits in the inode's tree */
+               ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
+       } else if (ret < 0) {
+               kfree(failrec);
+               return ERR_PTR(ret);
+       }
+
+       return failrec;
 }
 
 static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
@@ -2660,16 +2657,15 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
        struct bio *repair_bio;
        struct btrfs_io_bio *repair_io_bio;
        blk_status_t status;
-       int ret;
 
        btrfs_debug(fs_info,
                   "repair read error: read error at %llu", start);
 
        BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
-       ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
-       if (ret)
-               return errno_to_blk_status(ret);
+       failrec = btrfs_get_io_failure_record(inode, start, end);
+       if (IS_ERR(failrec))
+               return errno_to_blk_status(PTR_ERR(failrec));
 
        need_validation = btrfs_io_needs_validation(inode, failed_bio);
 
@@ -3420,7 +3416,7 @@ static void update_nr_written(struct writeback_control *wbc,
  * This returns 0 if all went well (page still locked)
  * This returns < 0 if there were errors (page still locked)
  */
-static noinline_for_stack int writepage_delalloc(struct inode *inode,
+static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                struct page *page, struct writeback_control *wbc,
                u64 delalloc_start, unsigned long *nr_written)
 {
@@ -3433,7 +3429,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
 
 
        while (delalloc_end < page_end) {
-               found = find_lock_delalloc_range(inode, page,
+               found = find_lock_delalloc_range(&inode->vfs_inode, page,
                                               &delalloc_start,
                                               &delalloc_end);
                if (!found) {
@@ -3450,8 +3446,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
                         * started, so we don't want to return > 0 unless
                         * things are going well.
                         */
-                       ret = ret < 0 ? ret : -EIO;
-                       goto done;
+                       return ret < 0 ? ret : -EIO;
                }
                /*
                 * delalloc_end is already one less than the total length, so
@@ -3483,10 +3478,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
                return 1;
        }
 
-       ret = 0;
-
-done:
-       return ret;
+       return 0;
 }
 
 /*
@@ -3497,7 +3489,7 @@ done:
  * 0 if all went well (page still locked)
  * < 0 if there were errors (page still locked)
  */
-static noinline_for_stack int __extent_writepage_io(struct inode *inode,
+static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
                                 struct page *page,
                                 struct writeback_control *wbc,
                                 struct extent_page_data *epd,
@@ -3505,7 +3497,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                                 unsigned long nr_written,
                                 int *nr_ret)
 {
-       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+       struct extent_io_tree *tree = &inode->io_tree;
        u64 start = page_offset(page);
        u64 page_end = start + PAGE_SIZE - 1;
        u64 end;
@@ -3537,7 +3529,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
        update_nr_written(wbc, nr_written + 1);
 
        end = page_end;
-       blocksize = inode->i_sb->s_blocksize;
+       blocksize = inode->vfs_inode.i_sb->s_blocksize;
 
        while (cur <= end) {
                u64 em_end;
@@ -3548,8 +3540,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                                                             page_end, 1);
                        break;
                }
-               em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur,
-                                     end - cur + 1);
+               em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
                if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
                        ret = PTR_ERR_OR_ZERO(em);
@@ -3586,7 +3577,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
                btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
                if (!PageWriteback(page)) {
-                       btrfs_err(BTRFS_I(inode)->root->fs_info,
+                       btrfs_err(inode->root->fs_info,
                                   "page %lu not writeback, cur %llu end %llu",
                               page->index, cur, end);
                }
@@ -3659,15 +3650,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        set_page_extent_mapped(page);
 
        if (!epd->extent_locked) {
-               ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
+               ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
+                                        &nr_written);
                if (ret == 1)
                        return 0;
                if (ret)
                        goto done;
        }
 
-       ret = __extent_writepage_io(inode, page, wbc, epd,
-                                   i_size, nr_written, &nr);
+       ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size,
+                                   nr_written, &nr);
        if (ret == 1)
                return 0;
 
@@ -4127,7 +4119,7 @@ retry:
        if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                ret = flush_write_bio(&epd);
        } else {
-               ret = -EUCLEAN;
+               ret = -EROFS;
                end_write_bio(&epd, ret);
        }
        return ret;
@@ -4489,6 +4481,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
            page->mapping->host->i_size > SZ_16M) {
                u64 len;
                while (start <= end) {
+                       struct btrfs_fs_info *fs_info;
+                       u64 cur_gen;
+
                        len = end - start + 1;
                        write_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
@@ -4502,20 +4497,52 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
                                free_extent_map(em);
                                break;
                        }
-                       if (!test_range_bit(tree, em->start,
-                                           extent_map_end(em) - 1,
-                                           EXTENT_LOCKED, 0, NULL)) {
-                               set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                                       &btrfs_inode->runtime_flags);
-                               remove_extent_mapping(map, em);
-                               /* once for the rb tree */
-                               free_extent_map(em);
-                       }
+                       if (test_range_bit(tree, em->start,
+                                          extent_map_end(em) - 1,
+                                          EXTENT_LOCKED, 0, NULL))
+                               goto next;
+                       /*
+                        * If it's not in the list of modified extents, used
+                        * by a fast fsync, we can remove it. If it's being
+                        * logged we can safely remove it since fsync took an
+                        * extra reference on the em.
+                        */
+                       if (list_empty(&em->list) ||
+                           test_bit(EXTENT_FLAG_LOGGING, &em->flags))
+                               goto remove_em;
+                       /*
+                        * If it's in the list of modified extents, remove it
+                        * only if its generation is older then the current one,
+                        * in which case we don't need it for a fast fsync.
+                        * Otherwise don't remove it, we could be racing with an
+                        * ongoing fast fsync that could miss the new extent.
+                        */
+                       fs_info = btrfs_inode->root->fs_info;
+                       spin_lock(&fs_info->trans_lock);
+                       cur_gen = fs_info->generation;
+                       spin_unlock(&fs_info->trans_lock);
+                       if (em->generation >= cur_gen)
+                               goto next;
+remove_em:
+                       /*
+                        * We only remove extent maps that are not in the list of
+                        * modified extents or that are in the list but with a
+                        * generation lower then the current generation, so there
+                        * is no need to set the full fsync flag on the inode (it
+                        * hurts the fsync performance for workloads with a data
+                        * size that exceeds or is close to the system's memory).
+                        */
+                       remove_extent_mapping(map, em);
+                       /* once for the rb tree */
+                       free_extent_map(em);
+next:
                        start = extent_map_end(em);
                        write_unlock(&map->lock);
 
                        /* once for us */
                        free_extent_map(em);
+
+                       cond_resched(); /* Allow large-extent preemption. */
                }
        }
        return try_release_extent_state(tree, page, mask);
@@ -4670,7 +4697,7 @@ static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
 }
 
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-               __u64 start, __u64 len)
+                 u64 start, u64 len)
 {
        int ret = 0;
        u64 off = start;
index 87f60a4..00a88f2 100644 (file)
@@ -204,7 +204,7 @@ int btree_write_cache_pages(struct address_space *mapping,
                            struct writeback_control *wbc);
 void extent_readahead(struct readahead_control *rac);
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-               __u64 start, __u64 len);
+                 u64 start, u64 len);
 void set_page_extent_mapped(struct page *page);
 
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
@@ -277,7 +277,7 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb);
 int extent_buffer_under_io(const struct extent_buffer *eb);
 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
-void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                                  struct page *locked_page,
                                  unsigned bits_to_clear,
                                  unsigned long page_ops);
index 706a312..7d5ec71 100644 (file)
@@ -522,10 +522,10 @@ fail:
  *              means this bio can contains potentially discontigous bio vecs
  *              so the logical offset of each should be calculated separately.
  */
-blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
                       u64 file_start, int contig)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        struct btrfs_ordered_sum *sums;
        struct btrfs_ordered_extent *ordered = NULL;
index b0d2c97..bb824c7 100644 (file)
@@ -500,18 +500,18 @@ next:
  * this also makes the decision about creating an inline extent vs
  * doing real data extents, marking pages dirty and delalloc as required.
  */
-int btrfs_dirty_pages(struct inode *inode, struct page **pages,
+int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
                      size_t num_pages, loff_t pos, size_t write_bytes,
                      struct extent_state **cached)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        int err = 0;
        int i;
        u64 num_bytes;
        u64 start_pos;
        u64 end_of_last_block;
        u64 end_pos = pos + write_bytes;
-       loff_t isize = i_size_read(inode);
+       loff_t isize = i_size_read(&inode->vfs_inode);
        unsigned int extra_bits = 0;
 
        start_pos = pos & ~((u64) fs_info->sectorsize - 1);
@@ -524,13 +524,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
         * The pages may have already been dirty, clear out old accounting so
         * we can set things up properly
         */
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block,
+       clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
                         EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                         0, 0, cached);
 
-       if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
+       if (!btrfs_is_free_space_inode(inode)) {
                if (start_pos >= isize &&
-                   !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
+                   !(inode->flags & BTRFS_INODE_PREALLOC)) {
                        /*
                         * There can't be any extents following eof in this case
                         * so just set the delalloc new bit for the range
@@ -538,8 +538,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
                         */
                        extra_bits |= EXTENT_DELALLOC_NEW;
                } else {
-                       err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
-                                                           start_pos,
+                       err = btrfs_find_new_delalloc_bytes(inode, start_pos,
                                                            num_bytes, cached);
                        if (err)
                                return err;
@@ -564,7 +563,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
         * at this time.
         */
        if (end_pos > isize)
-               i_size_write(inode, end_pos);
+               i_size_write(&inode->vfs_inode, end_pos);
        return 0;
 }
 
@@ -731,7 +730,7 @@ next:
  * is deleted from the tree.
  */
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root, struct inode *inode,
+                        struct btrfs_root *root, struct btrfs_inode *inode,
                         struct btrfs_path *path, u64 start, u64 end,
                         u64 *drop_end, int drop_cache,
                         int replace_extent,
@@ -744,7 +743,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
        struct btrfs_ref ref = { 0 };
        struct btrfs_key key;
        struct btrfs_key new_key;
-       u64 ino = btrfs_ino(BTRFS_I(inode));
+       struct inode *vfs_inode = &inode->vfs_inode;
+       u64 ino = btrfs_ino(inode);
        u64 search_start = start;
        u64 disk_bytenr = 0;
        u64 num_bytes = 0;
@@ -762,9 +762,9 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
        int leafs_visited = 0;
 
        if (drop_cache)
-               btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
+               btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
-       if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
+       if (start >= inode->disk_i_size && !replace_extent)
                modify_tree = 0;
 
        update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
@@ -935,7 +935,7 @@ next_slot:
                                                        extent_end - end);
                        btrfs_mark_buffer_dirty(leaf);
                        if (update_refs && disk_bytenr > 0)
-                               inode_sub_bytes(inode, end - key.offset);
+                               inode_sub_bytes(vfs_inode, end - key.offset);
                        break;
                }
 
@@ -955,7 +955,7 @@ next_slot:
                                                        start - key.offset);
                        btrfs_mark_buffer_dirty(leaf);
                        if (update_refs && disk_bytenr > 0)
-                               inode_sub_bytes(inode, extent_end - start);
+                               inode_sub_bytes(vfs_inode, extent_end - start);
                        if (end == extent_end)
                                break;
 
@@ -979,7 +979,7 @@ delete_extent_item:
 
                        if (update_refs &&
                            extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                               inode_sub_bytes(inode,
+                               inode_sub_bytes(vfs_inode,
                                                extent_end - key.offset);
                                extent_end = ALIGN(extent_end,
                                                   fs_info->sectorsize);
@@ -993,7 +993,7 @@ delete_extent_item:
                                                key.offset - extent_offset);
                                ret = btrfs_free_extent(trans, &ref);
                                BUG_ON(ret); /* -ENOMEM */
-                               inode_sub_bytes(inode,
+                               inode_sub_bytes(vfs_inode,
                                                extent_end - key.offset);
                        }
 
@@ -1082,8 +1082,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
-                                  drop_cache, 0, 0, NULL);
+       ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start,
+                                  end, NULL, drop_cache, 0, 0, NULL);
        btrfs_free_path(path);
        return ret;
 }
@@ -1532,8 +1532,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
        return ret;
 }
 
-static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
-                                   size_t *write_bytes, bool nowait)
+static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
+                          size_t *write_bytes, bool nowait)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_root *root = inode->root;
@@ -1541,6 +1541,9 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
        u64 num_bytes;
        int ret;
 
+       if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
+               return 0;
+
        if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
                return -EAGAIN;
 
@@ -1583,6 +1586,42 @@ out_unlock:
        return ret;
 }
 
+static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
+                             size_t *write_bytes)
+{
+       return check_can_nocow(inode, pos, write_bytes, true);
+}
+
+/*
+ * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
+ *
+ * @pos:        File offset
+ * @write_bytes: The length to write, will be updated to the nocow writeable
+ *              range
+ *
+ * This function will flush ordered extents in the range to ensure proper
+ * nocow checks.
+ *
+ * Return:
+ * >0          and update @write_bytes if we can do nocow write
+ *  0          if we can't do nocow write
+ * -EAGAIN     if we can't get the needed lock or there are ordered extents
+ *             for * (nowait == true) case
+ * <0          if other error happened
+ *
+ * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
+ */
+int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
+                          size_t *write_bytes)
+{
+       return check_can_nocow(inode, pos, write_bytes, false);
+}
+
+void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
+{
+       btrfs_drew_write_unlock(&inode->root->snapshot_lock);
+}
+
 static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                               struct iov_iter *i)
 {
@@ -1590,7 +1629,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
        loff_t pos = iocb->ki_pos;
        struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
        struct extent_changeset *data_reserved = NULL;
        u64 release_bytes = 0;
@@ -1643,13 +1681,12 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                fs_info->sectorsize);
 
                extent_changeset_release(data_reserved);
-               ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
+               ret = btrfs_check_data_free_space(BTRFS_I(inode),
+                                                 &data_reserved, pos,
                                                  write_bytes);
                if (ret < 0) {
-                       if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-                                                     BTRFS_INODE_PREALLOC)) &&
-                           check_can_nocow(BTRFS_I(inode), pos,
-                                           &write_bytes, false) > 0) {
+                       if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
+                                                  &write_bytes) > 0) {
                                /*
                                 * For nodata cow case, no need to reserve
                                 * data space.
@@ -1674,11 +1711,11 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
                                reserve_bytes);
                if (ret) {
                        if (!only_release_metadata)
-                               btrfs_free_reserved_data_space(inode,
+                               btrfs_free_reserved_data_space(BTRFS_I(inode),
                                                data_reserved, pos,
                                                write_bytes);
                        else
-                               btrfs_drew_write_unlock(&root->snapshot_lock);
+                               btrfs_check_nocow_unlock(BTRFS_I(inode));
                        break;
                }
 
@@ -1748,7 +1785,7 @@ again:
                                __pos = round_down(pos,
                                                   fs_info->sectorsize) +
                                        (dirty_pages << PAGE_SHIFT);
-                               btrfs_delalloc_release_space(inode,
+                               btrfs_delalloc_release_space(BTRFS_I(inode),
                                                data_reserved, __pos,
                                                release_bytes, true);
                        }
@@ -1758,8 +1795,9 @@ again:
                                        fs_info->sectorsize);
 
                if (copied > 0)
-                       ret = btrfs_dirty_pages(inode, pages, dirty_pages,
-                                               pos, copied, &cached_state);
+                       ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
+                                               dirty_pages, pos, copied,
+                                               &cached_state);
 
                /*
                 * If we have not locked the extent range, because the range's
@@ -1782,7 +1820,7 @@ again:
 
                release_bytes = 0;
                if (only_release_metadata)
-                       btrfs_drew_write_unlock(&root->snapshot_lock);
+                       btrfs_check_nocow_unlock(BTRFS_I(inode));
 
                if (only_release_metadata && copied > 0) {
                        lockstart = round_down(pos,
@@ -1800,8 +1838,6 @@ again:
                cond_resched();
 
                balance_dirty_pages_ratelimited(inode->i_mapping);
-               if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
-                       btrfs_btree_balance_dirty(fs_info);
 
                pos += copied;
                num_written += copied;
@@ -1811,11 +1847,12 @@ again:
 
        if (release_bytes) {
                if (only_release_metadata) {
-                       btrfs_drew_write_unlock(&root->snapshot_lock);
+                       btrfs_check_nocow_unlock(BTRFS_I(inode));
                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
                                        release_bytes, true);
                } else {
-                       btrfs_delalloc_release_space(inode, data_reserved,
+                       btrfs_delalloc_release_space(BTRFS_I(inode),
+                                       data_reserved,
                                        round_down(pos, fs_info->sectorsize),
                                        release_bytes, true);
                }
@@ -1926,10 +1963,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
                 * We will allocate space in case nodatacow is not set,
                 * so bail
                 */
-               if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-                                             BTRFS_INODE_PREALLOC)) ||
-                   check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
-                                   true) <= 0) {
+               if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes)
+                   <= 0) {
                        inode_unlock(inode);
                        return -EAGAIN;
                }
@@ -2598,7 +2633,7 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
 
        cur_offset = start;
        while (cur_offset < end) {
-               ret = __btrfs_drop_extents(trans, root, inode, path,
+               ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path,
                                           cur_offset, end + 1, &drop_end,
                                           1, 0, 0, NULL);
                if (ret != -ENOSPC) {
@@ -3176,14 +3211,14 @@ reserve_space:
                if (ret < 0)
                        goto out;
                space_reserved = true;
-               ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
-                                               alloc_start, bytes_to_reserve);
-               if (ret)
-                       goto out;
                ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
                                                  &cached_state);
                if (ret)
                        goto out;
+               ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
+                                               alloc_start, bytes_to_reserve);
+               if (ret)
+                       goto out;
                ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
                                                alloc_end - alloc_start,
                                                i_blocksize(inode),
@@ -3199,7 +3234,7 @@ reserve_space:
        ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
  out:
        if (ret && space_reserved)
-               btrfs_free_reserved_data_space(inode, data_reserved,
+               btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
                                               alloc_start, bytes_to_reserve);
        extent_changeset_free(data_reserved);
 
@@ -3350,8 +3385,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                                free_extent_map(em);
                                break;
                        }
-                       ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
-                                       cur_offset, last_byte - cur_offset);
+                       ret = btrfs_qgroup_reserve_data(BTRFS_I(inode),
+                                       &data_reserved, cur_offset,
+                                       last_byte - cur_offset);
                        if (ret < 0) {
                                cur_offset = last_byte;
                                free_extent_map(em);
@@ -3363,8 +3399,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                         * range, free reserved data space first, otherwise
                         * it'll result in false ENOSPC error.
                         */
-                       btrfs_free_reserved_data_space(inode, data_reserved,
-                                       cur_offset, last_byte - cur_offset);
+                       btrfs_free_reserved_data_space(BTRFS_I(inode),
+                               data_reserved, cur_offset,
+                               last_byte - cur_offset);
                }
                free_extent_map(em);
                cur_offset = last_byte;
@@ -3381,7 +3418,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                                        range->len, i_blocksize(inode),
                                        offset + len, &alloc_hint);
                else
-                       btrfs_free_reserved_data_space(inode,
+                       btrfs_free_reserved_data_space(BTRFS_I(inode),
                                        data_reserved, range->start,
                                        range->len);
                list_del(&range->list);
@@ -3402,7 +3439,7 @@ out:
        inode_unlock(inode);
        /* Let go of our reservation. */
        if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
-               btrfs_free_reserved_data_space(inode, data_reserved,
+               btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
                                cur_offset, alloc_end - cur_offset);
        extent_changeset_free(data_reserved);
        return ret;
@@ -3500,7 +3537,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
 
 static int btrfs_file_open(struct inode *inode, struct file *filp)
 {
-       filp->f_mode |= FMODE_NOWAIT;
+       filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
        return generic_file_open(inode, filp);
 }
 
index 55955bd..6d961e1 100644 (file)
@@ -1334,8 +1334,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        io_ctl_zero_remaining_pages(io_ctl);
 
        /* Everything is written out, now we dirty the pages in the file. */
-       ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0,
-                               i_size_read(inode), &cached_state);
+       ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
+                               io_ctl->num_pages, 0, i_size_read(inode),
+                               &cached_state);
        if (ret)
                goto out_nospc;
 
@@ -2703,8 +2704,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
  * pointed to by the cluster, someone else raced in and freed the
  * cluster already.  In that case, we just return without changing anything
  */
-static int
-__btrfs_return_cluster_to_free_space(
+static void __btrfs_return_cluster_to_free_space(
                             struct btrfs_block_group *block_group,
                             struct btrfs_free_cluster *cluster)
 {
@@ -2756,7 +2756,6 @@ __btrfs_return_cluster_to_free_space(
 out:
        spin_unlock(&cluster->lock);
        btrfs_put_block_group(block_group);
-       return 0;
 }
 
 static void __btrfs_remove_free_space_cache_locked(
@@ -2907,12 +2906,11 @@ out:
  * Otherwise, it'll get a reference on the block group pointed to by the
  * cluster and remove the cluster from it.
  */
-int btrfs_return_cluster_to_free_space(
+void btrfs_return_cluster_to_free_space(
                               struct btrfs_block_group *block_group,
                               struct btrfs_free_cluster *cluster)
 {
        struct btrfs_free_space_ctl *ctl;
-       int ret;
 
        /* first, get a safe pointer to the block group */
        spin_lock(&cluster->lock);
@@ -2920,28 +2918,27 @@ int btrfs_return_cluster_to_free_space(
                block_group = cluster->block_group;
                if (!block_group) {
                        spin_unlock(&cluster->lock);
-                       return 0;
+                       return;
                }
        } else if (cluster->block_group != block_group) {
                /* someone else has already freed it don't redo their work */
                spin_unlock(&cluster->lock);
-               return 0;
+               return;
        }
-       atomic_inc(&block_group->count);
+       btrfs_get_block_group(block_group);
        spin_unlock(&cluster->lock);
 
        ctl = block_group->free_space_ctl;
 
        /* now return any extents the cluster had on it */
        spin_lock(&ctl->tree_lock);
-       ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
+       __btrfs_return_cluster_to_free_space(block_group, cluster);
        spin_unlock(&ctl->tree_lock);
 
        btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
 
        /* finally drop our ref */
        btrfs_put_block_group(block_group);
-       return ret;
 }
 
 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
@@ -3358,7 +3355,7 @@ int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
                list_del_init(&entry->list);
 
        if (!ret) {
-               atomic_inc(&block_group->count);
+               btrfs_get_block_group(block_group);
                list_add_tail(&cluster->block_group_list,
                              &block_group->cluster_list);
                cluster->block_group = block_group;
index 2e0a807..e3d5e0a 100644 (file)
@@ -136,7 +136,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
                             struct btrfs_free_cluster *cluster, u64 bytes,
                             u64 min_start, u64 *max_extent_size);
-int btrfs_return_cluster_to_free_space(
+void btrfs_return_cluster_to_free_space(
                               struct btrfs_block_group *block_group,
                               struct btrfs_free_cluster *cluster);
 int btrfs_trim_block_group(struct btrfs_block_group *block_group,
index 6009e0e..76d2e43 100644 (file)
@@ -495,7 +495,8 @@ again:
        /* Just to make sure we have enough space */
        prealloc += 8 * PAGE_SIZE;
 
-       ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
+       ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, 0,
+                                          prealloc);
        if (ret)
                goto out_put;
 
index 6862cd7..611b341 100644 (file)
@@ -80,17 +80,17 @@ struct kmem_cache *btrfs_free_space_bitmap_cachep;
 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct inode *inode, bool skip_writeback);
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
-static noinline int cow_file_range(struct inode *inode,
+static noinline int cow_file_range(struct btrfs_inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
-static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
-                                      u64 orig_start, u64 block_start,
+static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+                                      u64 len, u64 orig_start, u64 block_start,
                                       u64 block_len, u64 orig_block_len,
                                       u64 ram_bytes, int compress_type,
                                       int type);
 
-static void __endio_write_update_ordered(struct inode *inode,
+static void __endio_write_update_ordered(struct btrfs_inode *inode,
                                         const u64 offset, const u64 bytes,
                                         const bool uptodate);
 
@@ -104,7 +104,7 @@ static void __endio_write_update_ordered(struct inode *inode,
  * to be released, which we want to happen only when finishing the ordered
  * extent (btrfs_finish_ordered_io()).
  */
-static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
+static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
                                                 struct page *locked_page,
                                                 u64 offset, u64 bytes)
 {
@@ -116,7 +116,7 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
        struct page *page;
 
        while (index <= end_index) {
-               page = find_get_page(inode->i_mapping, index);
+               page = find_get_page(inode->vfs_inode.i_mapping, index);
                index++;
                if (!page)
                        continue;
@@ -274,15 +274,15 @@ fail:
  * does the checks required to make sure the data is small enough
  * to fit as an inline extent.
  */
-static noinline int cow_file_range_inline(struct inode *inode, u64 start,
+static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
                                          u64 end, size_t compressed_size,
                                          int compress_type,
                                          struct page **compressed_pages)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_root *root = inode->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_trans_handle *trans;
-       u64 isize = i_size_read(inode);
+       u64 isize = i_size_read(&inode->vfs_inode);
        u64 actual_end = min(end + 1, isize);
        u64 inline_len = actual_end - start;
        u64 aligned_end = ALIGN(end, fs_info->sectorsize);
@@ -314,7 +314,7 @@ static noinline int cow_file_range_inline(struct inode *inode, u64 start,
                btrfs_free_path(path);
                return PTR_ERR(trans);
        }
-       trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+       trans->block_rsv = &inode->block_rsv;
 
        if (compressed_size && compressed_pages)
                extent_item_size = btrfs_file_extent_calc_inline_size(
@@ -323,9 +323,9 @@ static noinline int cow_file_range_inline(struct inode *inode, u64 start,
                extent_item_size = btrfs_file_extent_calc_inline_size(
                    inline_len);
 
-       ret = __btrfs_drop_extents(trans, root, inode, path,
-                                  start, aligned_end, NULL,
-                                  1, 1, extent_item_size, &extent_inserted);
+       ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
+                                  NULL, 1, 1, extent_item_size,
+                                  &extent_inserted);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto out;
@@ -334,7 +334,7 @@ static noinline int cow_file_range_inline(struct inode *inode, u64 start,
        if (isize > actual_end)
                inline_len = min_t(u64, isize, actual_end);
        ret = insert_inline_extent(trans, path, extent_inserted,
-                                  root, inode, start,
+                                  root, &inode->vfs_inode, start,
                                   inline_len, compressed_size,
                                   compress_type, compressed_pages);
        if (ret && ret != -ENOSPC) {
@@ -345,8 +345,8 @@ static noinline int cow_file_range_inline(struct inode *inode, u64 start,
                goto out;
        }
 
-       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
-       btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
+       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+       btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
 out:
        /*
         * Don't forget to free the reserved space, as for inlined extent
@@ -412,10 +412,10 @@ static noinline int add_async_extent(struct async_chunk *cow,
 /*
  * Check if the inode has flags compatible with compression
  */
-static inline bool inode_can_compress(struct inode *inode)
+static inline bool inode_can_compress(struct btrfs_inode *inode)
 {
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
-           BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+       if (inode->flags & BTRFS_INODE_NODATACOW ||
+           inode->flags & BTRFS_INODE_NODATASUM)
                return false;
        return true;
 }
@@ -424,29 +424,30 @@ static inline bool inode_can_compress(struct inode *inode)
  * Check if the inode needs to be submitted to compression, based on mount
  * options, defragmentation, properties or heuristics.
  */
-static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
+static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
+                                     u64 end)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
 
        if (!inode_can_compress(inode)) {
                WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
                        KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
-                       btrfs_ino(BTRFS_I(inode)));
+                       btrfs_ino(inode));
                return 0;
        }
        /* force compress */
        if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
                return 1;
        /* defrag ioctl */
-       if (BTRFS_I(inode)->defrag_compress)
+       if (inode->defrag_compress)
                return 1;
        /* bad compression ratios */
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
+       if (inode->flags & BTRFS_INODE_NOCOMPRESS)
                return 0;
        if (btrfs_test_opt(fs_info, COMPRESS) ||
-           BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
-           BTRFS_I(inode)->prop_compress)
-               return btrfs_compress_heuristic(inode, start, end);
+           inode->flags & BTRFS_INODE_COMPRESS ||
+           inode->prop_compress)
+               return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
        return 0;
 }
 
@@ -552,7 +553,7 @@ again:
         * inode has not been flagged as nocompress.  This flag can
         * change at any time if we discover bad compression ratios.
         */
-       if (inode_need_compress(inode, start, end)) {
+       if (inode_need_compress(BTRFS_I(inode), start, end)) {
                WARN_ON(pages);
                pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
                if (!pages) {
@@ -616,11 +617,12 @@ cont:
                        /* we didn't compress the entire range, try
                         * to make an uncompressed inline extent.
                         */
-                       ret = cow_file_range_inline(inode, start, end, 0,
-                                                   BTRFS_COMPRESS_NONE, NULL);
+                       ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+                                                   0, BTRFS_COMPRESS_NONE,
+                                                   NULL);
                } else {
                        /* try making a compressed inline extent */
-                       ret = cow_file_range_inline(inode, start, end,
+                       ret = cow_file_range_inline(BTRFS_I(inode), start, end,
                                                    total_compressed,
                                                    compress_type, pages);
                }
@@ -642,7 +644,8 @@ cont:
                         * our outstanding extent for clearing delalloc for this
                         * range.
                         */
-                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                       extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
+                                                    NULL,
                                                     clear_flags,
                                                     PAGE_UNLOCK |
                                                     PAGE_CLEAR_DIRTY |
@@ -762,14 +765,14 @@ static void free_async_extent_pages(struct async_extent *async_extent)
  */
 static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 {
-       struct inode *inode = async_chunk->inode;
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct async_extent *async_extent;
        u64 alloc_hint = 0;
        struct btrfs_key ins;
        struct extent_map *em;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct btrfs_root *root = inode->root;
+       struct extent_io_tree *io_tree = &inode->io_tree;
        int ret = 0;
 
 again:
@@ -802,7 +805,7 @@ retry:
                         * all those pages down to the drive.
                         */
                        if (!page_started && !ret)
-                               extent_write_locked_range(inode,
+                               extent_write_locked_range(&inode->vfs_inode,
                                                  async_extent->start,
                                                  async_extent->start +
                                                  async_extent->ram_size - 1,
@@ -832,7 +835,7 @@ retry:
                                 * will not submit these pages down to lower
                                 * layers.
                                 */
-                               extent_range_redirty_for_io(inode,
+                               extent_range_redirty_for_io(&inode->vfs_inode,
                                                async_extent->start,
                                                async_extent->start +
                                                async_extent->ram_size - 1);
@@ -867,8 +870,7 @@ retry:
                                                BTRFS_ORDERED_COMPRESSED,
                                                async_extent->compress_type);
                if (ret) {
-                       btrfs_drop_extent_cache(BTRFS_I(inode),
-                                               async_extent->start,
+                       btrfs_drop_extent_cache(inode, async_extent->start,
                                                async_extent->start +
                                                async_extent->ram_size - 1, 0);
                        goto out_free_reserve;
@@ -884,8 +886,7 @@ retry:
                                NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
                                PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
                                PAGE_SET_WRITEBACK);
-               if (btrfs_submit_compressed_write(inode,
-                                   async_extent->start,
+               if (btrfs_submit_compressed_write(inode, async_extent->start,
                                    async_extent->ram_size,
                                    ins.objectid,
                                    ins.offset, async_extent->pages,
@@ -896,12 +897,11 @@ retry:
                        const u64 start = async_extent->start;
                        const u64 end = start + async_extent->ram_size - 1;
 
-                       p->mapping = inode->i_mapping;
+                       p->mapping = inode->vfs_inode.i_mapping;
                        btrfs_writepage_endio_finish_ordered(p, start, end, 0);
 
                        p->mapping = NULL;
-                       extent_clear_unlock_delalloc(inode, start, end,
-                                                    NULL, 0,
+                       extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
                                                     PAGE_END_WRITEBACK |
                                                     PAGE_SET_ERROR);
                        free_async_extent_pages(async_extent);
@@ -929,10 +929,10 @@ out_free:
        goto again;
 }
 
-static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
+static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
                                      u64 num_bytes)
 {
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map_tree *em_tree = &inode->extent_tree;
        struct extent_map *em;
        u64 alloc_hint = 0;
 
@@ -974,13 +974,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
  * required to start IO on it.  It may be clean and already done with
  * IO when we return.
  */
-static noinline int cow_file_range(struct inode *inode,
+static noinline int cow_file_range(struct btrfs_inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        u64 alloc_hint = 0;
        u64 num_bytes;
        unsigned long ram_size;
@@ -994,7 +994,7 @@ static noinline int cow_file_range(struct inode *inode,
        bool extent_reserved = false;
        int ret = 0;
 
-       if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+       if (btrfs_is_free_space_inode(inode)) {
                WARN_ON_ONCE(1);
                ret = -EINVAL;
                goto out_unlock;
@@ -1004,7 +1004,7 @@ static noinline int cow_file_range(struct inode *inode,
        num_bytes = max(blocksize,  num_bytes);
        ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
 
-       inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
+       inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
 
        if (start == 0) {
                /* lets try to make an inline extent */
@@ -1033,8 +1033,7 @@ static noinline int cow_file_range(struct inode *inode,
        }
 
        alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
-       btrfs_drop_extent_cache(BTRFS_I(inode), start,
-                       start + num_bytes - 1, 0);
+       btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 
        /*
         * Relocation relies on the relocated extents to have exactly the same
@@ -1098,7 +1097,7 @@ static noinline int cow_file_range(struct inode *inode,
                         * skip current ordered extent.
                         */
                        if (ret)
-                               btrfs_drop_extent_cache(BTRFS_I(inode), start,
+                               btrfs_drop_extent_cache(inode, start,
                                                start + ram_size - 1, 0);
                }
 
@@ -1114,8 +1113,7 @@ static noinline int cow_file_range(struct inode *inode,
                page_ops = unlock ? PAGE_UNLOCK : 0;
                page_ops |= PAGE_SET_PRIVATE2;
 
-               extent_clear_unlock_delalloc(inode, start,
-                                            start + ram_size - 1,
+               extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
                                             locked_page,
                                             EXTENT_LOCKED | EXTENT_DELALLOC,
                                             page_ops);
@@ -1139,7 +1137,7 @@ out:
        return ret;
 
 out_drop_extent_cache:
-       btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
+       btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
 out_reserve:
        btrfs_dec_block_group_reservations(fs_info, ins.objectid);
        btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
@@ -1236,13 +1234,13 @@ static noinline void async_cow_free(struct btrfs_work *work)
                kvfree(async_chunk->pending);
 }
 
-static int cow_file_range_async(struct inode *inode,
+static int cow_file_range_async(struct btrfs_inode *inode,
                                struct writeback_control *wbc,
                                struct page *locked_page,
                                u64 start, u64 end, int *page_started,
                                unsigned long *nr_written)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
        struct async_cow *ctx;
        struct async_chunk *async_chunk;
@@ -1254,9 +1252,9 @@ static int cow_file_range_async(struct inode *inode,
        unsigned nofs_flag;
        const unsigned int write_flags = wbc_to_write_flags(wbc);
 
-       unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+       unlock_extent(&inode->io_tree, start, end);
 
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+       if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
            !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
                num_chunks = 1;
                should_compress = false;
@@ -1294,9 +1292,9 @@ static int cow_file_range_async(struct inode *inode,
                 * igrab is called higher up in the call chain, take only the
                 * lightweight reference for the callback lifetime
                 */
-               ihold(inode);
+               ihold(&inode->vfs_inode);
                async_chunk[i].pending = &ctx->num_chunks;
-               async_chunk[i].inode = inode;
+               async_chunk[i].inode = &inode->vfs_inode;
                async_chunk[i].start = start;
                async_chunk[i].end = cur_end;
                async_chunk[i].write_flags = write_flags;
@@ -1373,15 +1371,15 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
        return 1;
 }
 
-static int fallback_to_cow(struct inode *inode, struct page *locked_page,
+static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
                           const u64 start, const u64 end,
                           int *page_started, unsigned long *nr_written)
 {
-       const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
-       const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+       const bool is_space_ino = btrfs_is_free_space_inode(inode);
+       const bool is_reloc_ino = (inode->root->root_key.objectid ==
                                   BTRFS_DATA_RELOC_TREE_OBJECTID);
        const u64 range_bytes = end + 1 - start;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_io_tree *io_tree = &inode->io_tree;
        u64 range_start = start;
        u64 count;
 
@@ -1421,7 +1419,7 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
                                 EXTENT_NORESERVE, 0);
        if (count > 0 || is_space_ino || is_reloc_ino) {
                u64 bytes = count;
-               struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+               struct btrfs_fs_info *fs_info = inode->root->fs_info;
                struct btrfs_space_info *sinfo = fs_info->data_sinfo;
 
                if (is_space_ino || is_reloc_ino)
@@ -1447,21 +1445,21 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
  * If no cow copies or snapshots exist, we write directly to the existing
  * blocks on disk
  */
-static noinline int run_delalloc_nocow(struct inode *inode,
+static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
                                       struct page *locked_page,
                                       const u64 start, const u64 end,
                                       int *page_started, int force,
                                       unsigned long *nr_written)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct btrfs_root *root = inode->root;
        struct btrfs_path *path;
        u64 cow_start = (u64)-1;
        u64 cur_offset = start;
        int ret;
        bool check_prev = true;
-       const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
-       u64 ino = btrfs_ino(BTRFS_I(inode));
+       const bool freespace_inode = btrfs_is_free_space_inode(inode);
+       u64 ino = btrfs_ino(inode);
        bool nocow = false;
        u64 disk_bytenr = 0;
 
@@ -1687,8 +1685,8 @@ out_check:
                 * NOCOW, following one which needs to be COW'ed
                 */
                if (cow_start != (u64)-1) {
-                       ret = fallback_to_cow(inode, locked_page, cow_start,
-                                             found_key.offset - 1,
+                       ret = fallback_to_cow(inode, locked_page,
+                                             cow_start, found_key.offset - 1,
                                              page_started, nr_written);
                        if (ret)
                                goto error;
@@ -1716,8 +1714,7 @@ out_check:
                                                       num_bytes,
                                                       BTRFS_ORDERED_PREALLOC);
                        if (ret) {
-                               btrfs_drop_extent_cache(BTRFS_I(inode),
-                                                       cur_offset,
+                               btrfs_drop_extent_cache(inode, cur_offset,
                                                        cur_offset + num_bytes - 1,
                                                        0);
                                goto error;
@@ -1793,11 +1790,11 @@ error:
        return ret;
 }
 
-static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
+static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
 {
 
-       if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
-           !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
+       if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
+           !(inode->flags & BTRFS_INODE_PREALLOC))
                return 0;
 
        /*
@@ -1805,9 +1802,8 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
         * if is not zero, it means the file is defragging.
         * Force cow if given extent needs to be defragged.
         */
-       if (BTRFS_I(inode)->defrag_bytes &&
-           test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
-                          EXTENT_DEFRAG, 0, NULL))
+       if (inode->defrag_bytes &&
+           test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
                return 1;
 
        return 0;
@@ -1817,26 +1813,25 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
  * Function to process delayed allocation (create CoW) for ranges which are
  * being touched for the first time.
  */
-int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
+int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
                u64 start, u64 end, int *page_started, unsigned long *nr_written,
                struct writeback_control *wbc)
 {
        int ret;
        int force_cow = need_force_cow(inode, start, end);
 
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+       if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 1, nr_written);
-       } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+       } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
        } else if (!inode_can_compress(inode) ||
                   !inode_need_compress(inode, start, end)) {
                ret = cow_file_range(inode, locked_page, start, end,
-                                     page_started, nr_written, 1);
+                                    page_started, nr_written, 1);
        } else {
-               set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-                       &BTRFS_I(inode)->runtime_flags);
+               set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
                ret = cow_file_range_async(inode, wbc, locked_page, start, end,
                                           page_started, nr_written);
        }
@@ -2085,9 +2080,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
                    do_list && !(state->state & EXTENT_NORESERVE) &&
                    (*bits & EXTENT_CLEAR_DATA_RESV))
-                       btrfs_free_reserved_data_space_noquota(
-                                       &inode->vfs_inode,
-                                       state->start, len);
+                       btrfs_free_reserved_data_space_noquota(fs_info, len);
 
                percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
                                         fs_info->delalloc_batch);
@@ -2163,7 +2156,7 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
        struct inode *inode = private_data;
        blk_status_t ret = 0;
 
-       ret = btrfs_csum_one_bio(inode, bio, 0, 0);
+       ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
        BUG_ON(ret); /* -ENOMEM */
        return 0;
 }
@@ -2228,7 +2221,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
                                          0, inode, btrfs_submit_bio_start);
                goto out;
        } else if (!skip_sum) {
-               ret = btrfs_csum_one_bio(inode, bio, 0, 0);
+               ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
                if (ret)
                        goto out;
        }
@@ -2265,13 +2258,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
+int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                              unsigned int extra_bits,
                              struct extent_state **cached_state)
 {
        WARN_ON(PAGE_ALIGNED(end));
-       return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
-                                  extra_bits, cached_state);
+       return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
+                                  cached_state);
 }
 
 /* see btrfs_writepage_start_hook for details on why this is required */
@@ -2288,7 +2281,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
        struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
        struct page *page;
-       struct inode *inode;
+       struct btrfs_inode *inode;
        u64 page_start;
        u64 page_end;
        int ret = 0;
@@ -2296,7 +2289,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 
        fixup = container_of(work, struct btrfs_writepage_fixup, work);
        page = fixup->page;
-       inode = fixup->inode;
+       inode = BTRFS_I(fixup->inode);
        page_start = page_offset(page);
        page_end = page_offset(page) + PAGE_SIZE - 1;
 
@@ -2333,8 +2326,7 @@ again:
                 *    when the page was already properly dealt with.
                 */
                if (!ret) {
-                       btrfs_delalloc_release_extents(BTRFS_I(inode),
-                                                      PAGE_SIZE);
+                       btrfs_delalloc_release_extents(inode, PAGE_SIZE);
                        btrfs_delalloc_release_space(inode, data_reserved,
                                                     page_start, PAGE_SIZE,
                                                     true);
@@ -2350,20 +2342,18 @@ again:
        if (ret)
                goto out_page;
 
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                        &cached_state);
+       lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
 
        /* already ordered? We're done */
        if (PagePrivate2(page))
                goto out_reserved;
 
-       ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
-                                       PAGE_SIZE);
+       ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
        if (ordered) {
-               unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
-                                    page_end, &cached_state);
+               unlock_extent_cached(&inode->io_tree, page_start, page_end,
+                                    &cached_state);
                unlock_page(page);
-               btrfs_start_ordered_extent(inode, ordered, 1);
+               btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
                btrfs_put_ordered_extent(ordered);
                goto again;
        }
@@ -2383,11 +2373,11 @@ again:
        BUG_ON(!PageDirty(page));
        free_delalloc_space = false;
 out_reserved:
-       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+       btrfs_delalloc_release_extents(inode, PAGE_SIZE);
        if (free_delalloc_space)
                btrfs_delalloc_release_space(inode, data_reserved, page_start,
                                             PAGE_SIZE, true);
-       unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
+       unlock_extent_cached(&inode->io_tree, page_start, page_end,
                             &cached_state);
 out_page:
        if (ret) {
@@ -2410,7 +2400,7 @@ out_page:
         * that could need flushing space. Recursing back to fixup worker would
         * deadlock.
         */
-       btrfs_add_delayed_iput(inode);
+       btrfs_add_delayed_iput(&inode->vfs_inode);
 }
 
 /*
@@ -2466,18 +2456,18 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 }
 
 static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
-                                      struct inode *inode, u64 file_pos,
-                                      u64 disk_bytenr, u64 disk_num_bytes,
-                                      u64 num_bytes, u64 ram_bytes,
-                                      u8 compression, u8 encryption,
-                                      u16 other_encoding, int extent_type)
+                                      struct btrfs_inode *inode, u64 file_pos,
+                                      struct btrfs_file_extent_item *stack_fi,
+                                      u64 qgroup_reserved)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_file_extent_item *fi;
+       struct btrfs_root *root = inode->root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key ins;
-       u64 qg_released;
+       u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
+       u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
+       u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
+       u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
        int extent_inserted = 0;
        int ret;
 
@@ -2496,60 +2486,42 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         */
        ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
                                   file_pos + num_bytes, NULL, 0,
-                                  1, sizeof(*fi), &extent_inserted);
+                                  1, sizeof(*stack_fi), &extent_inserted);
        if (ret)
                goto out;
 
        if (!extent_inserted) {
-               ins.objectid = btrfs_ino(BTRFS_I(inode));
+               ins.objectid = btrfs_ino(inode);
                ins.offset = file_pos;
                ins.type = BTRFS_EXTENT_DATA_KEY;
 
                path->leave_spinning = 1;
                ret = btrfs_insert_empty_item(trans, root, path, &ins,
-                                             sizeof(*fi));
+                                             sizeof(*stack_fi));
                if (ret)
                        goto out;
        }
        leaf = path->nodes[0];
-       fi = btrfs_item_ptr(leaf, path->slots[0],
-                           struct btrfs_file_extent_item);
-       btrfs_set_file_extent_generation(leaf, fi, trans->transid);
-       btrfs_set_file_extent_type(leaf, fi, extent_type);
-       btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
-       btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
-       btrfs_set_file_extent_offset(leaf, fi, 0);
-       btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
-       btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
-       btrfs_set_file_extent_compression(leaf, fi, compression);
-       btrfs_set_file_extent_encryption(leaf, fi, encryption);
-       btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+       btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
+       write_extent_buffer(leaf, stack_fi,
+                       btrfs_item_ptr_offset(leaf, path->slots[0]),
+                       sizeof(struct btrfs_file_extent_item));
 
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 
-       inode_add_bytes(inode, num_bytes);
+       inode_add_bytes(&inode->vfs_inode, num_bytes);
 
        ins.objectid = disk_bytenr;
        ins.offset = disk_num_bytes;
        ins.type = BTRFS_EXTENT_ITEM_KEY;
 
-       ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
-                                               ram_bytes);
+       ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
        if (ret)
                goto out;
 
-       /*
-        * Release the reserved range from inode dirty range map, as it is
-        * already moved into delayed_ref_head
-        */
-       ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
-       if (ret < 0)
-               goto out;
-       qg_released = ret;
-       ret = btrfs_alloc_reserved_file_extent(trans, root,
-                                              btrfs_ino(BTRFS_I(inode)),
-                                              file_pos, qg_released, &ins);
+       ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
+                                              file_pos, qgroup_reserved, &ins);
 out:
        btrfs_free_path(path);
 
@@ -2571,7 +2543,33 @@ static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
        btrfs_put_block_group(cache);
 }
 
-/* as ordered data IO finishes, this gets called so we can finish
+static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
+                                            struct inode *inode,
+                                            struct btrfs_ordered_extent *oe)
+{
+       struct btrfs_file_extent_item stack_fi;
+       u64 logical_len;
+
+       memset(&stack_fi, 0, sizeof(stack_fi));
+       btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
+       btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
+       btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
+                                                  oe->disk_num_bytes);
+       if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
+               logical_len = oe->truncated_len;
+       else
+               logical_len = oe->num_bytes;
+       btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
+       btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
+       btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
+       /* Encryption and other encoding is reserved and all 0 */
+
+       return insert_reserved_file_extent(trans, BTRFS_I(inode), oe->file_offset,
+                                          &stack_fi, oe->qgroup_rsv);
+}
+
+/*
+ * As ordered data IO finishes, this gets called so we can finish
  * an ordered extent if the range of bytes in the file it covers are
  * fully written.
  */
@@ -2622,13 +2620,6 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
 
-               /*
-                * For mwrite(mmap + memset to write) case, we still reserve
-                * space for NOCOW range.
-                * As NOCOW won't cause a new delayed ref, just free the space
-                */
-               btrfs_qgroup_free_data(inode, NULL, start,
-                                      ordered_extent->num_bytes);
                btrfs_inode_safe_disk_i_size_write(inode, 0);
                if (freespace_inode)
                        trans = btrfs_join_transaction_spacecache(root);
@@ -2665,20 +2656,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                compress_type = ordered_extent->compress_type;
        if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
                BUG_ON(compress_type);
-               btrfs_qgroup_free_data(inode, NULL, start,
-                                      ordered_extent->num_bytes);
                ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
                                                ordered_extent->file_offset,
                                                ordered_extent->file_offset +
                                                logical_len);
        } else {
                BUG_ON(root == fs_info->tree_root);
-               ret = insert_reserved_file_extent(trans, inode, start,
-                                               ordered_extent->disk_bytenr,
-                                               ordered_extent->disk_num_bytes,
-                                               logical_len, logical_len,
-                                               compress_type, 0, 0,
-                                               BTRFS_FILE_EXTENT_REG);
+               ret = insert_ordered_extent_file_extent(trans, inode,
+                                                       ordered_extent);
                if (!ret) {
                        clear_reserved_extent = false;
                        btrfs_release_delalloc_bytes(fs_info,
@@ -2830,6 +2815,9 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
 zeroit:
        btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
                                    io_bio->mirror_num);
+       if (io_bio->device)
+               btrfs_dev_stat_inc_and_print(io_bio->device,
+                                            BTRFS_DEV_STAT_CORRUPTION_ERRS);
        memset(kaddr + pgoff, 1, len);
        flush_dcache_page(page);
        kunmap_atomic(kaddr);
@@ -3348,6 +3336,14 @@ cache_index:
         */
        BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
 
+       /*
+        * Same logic as for last_unlink_trans. We don't persist the generation
+        * of the last transaction where this inode was used for a reflink
+        * operation, so after eviction and reloading the inode we must be
+        * pessimistic and assume the last transaction that modified the inode.
+        */
+       BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
+
        path->slots[0]++;
        if (inode->i_nlink != 1 ||
            path->slots[0] >= btrfs_header_nritems(leaf))
@@ -3496,7 +3492,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
 
        fill_inode_item(trans, leaf, inode_item, inode);
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_set_inode_last_trans(trans, inode);
+       btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
        ret = 0;
 failed:
        btrfs_free_path(path);
@@ -3526,7 +3522,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 
                ret = btrfs_delayed_update_inode(trans, root, inode);
                if (!ret)
-                       btrfs_set_inode_last_trans(trans, inode);
+                       btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
                return ret;
        }
 
@@ -4041,6 +4037,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
                }
        }
 
+       free_anon_bdev(dest->anon_dev);
+       dest->anon_dev = 0;
 out_end_trans:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
@@ -4511,11 +4509,13 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
        struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
        char *kaddr;
+       bool only_release_metadata = false;
        u32 blocksize = fs_info->sectorsize;
        pgoff_t index = from >> PAGE_SHIFT;
        unsigned offset = from & (blocksize - 1);
        struct page *page;
        gfp_t mask = btrfs_alloc_write_mask(mapping);
+       size_t write_bytes = blocksize;
        int ret = 0;
        u64 block_start;
        u64 block_end;
@@ -4527,15 +4527,28 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
        block_start = round_down(from, blocksize);
        block_end = block_start + blocksize - 1;
 
-       ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
-                                          block_start, blocksize);
-       if (ret)
+       ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
+                                         block_start, blocksize);
+       if (ret < 0) {
+               if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
+                                          &write_bytes) > 0) {
+                       /* For nocow case, no need to reserve data space */
+                       only_release_metadata = true;
+               } else {
+                       goto out;
+               }
+       }
+       ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+       if (ret < 0) {
+               if (!only_release_metadata)
+                       btrfs_free_reserved_data_space(BTRFS_I(inode),
+                                       data_reserved, block_start, blocksize);
                goto out;
-
+       }
 again:
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
-               btrfs_delalloc_release_space(inode, data_reserved,
+               btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
                                             block_start, blocksize, true);
                btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
                ret = -ENOMEM;
@@ -4560,7 +4573,7 @@ again:
        lock_extent_bits(io_tree, block_start, block_end, &cached_state);
        set_page_extent_mapped(page);
 
-       ordered = btrfs_lookup_ordered_extent(inode, block_start);
+       ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
        if (ordered) {
                unlock_extent_cached(io_tree, block_start, block_end,
                                     &cached_state);
@@ -4575,7 +4588,7 @@ again:
                         EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                         0, 0, &cached_state);
 
-       ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
                                        &cached_state);
        if (ret) {
                unlock_extent_cached(io_tree, block_start, block_end,
@@ -4600,14 +4613,26 @@ again:
        set_page_dirty(page);
        unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
 
+       if (only_release_metadata)
+               set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
+                               block_end, EXTENT_NORESERVE, NULL, NULL,
+                               GFP_NOFS);
+
 out_unlock:
-       if (ret)
-               btrfs_delalloc_release_space(inode, data_reserved, block_start,
-                                            blocksize, true);
+       if (ret) {
+               if (only_release_metadata)
+                       btrfs_delalloc_release_metadata(BTRFS_I(inode),
+                                       blocksize, true);
+               else
+                       btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+                                       block_start, blocksize, true);
+       }
        btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
        unlock_page(page);
        put_page(page);
 out:
+       if (only_release_metadata)
+               btrfs_check_nocow_unlock(BTRFS_I(inode));
        extent_changeset_free(data_reserved);
        return ret;
 }
@@ -4965,7 +4990,8 @@ static void evict_inode_truncate_pages(struct inode *inode)
                 * Note, end is the bytenr of last byte, so we need + 1 here.
                 */
                if (state_flags & EXTENT_DELALLOC)
-                       btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
+                       btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
+                                              end - start + 1);
 
                clear_extent_bit(io_tree, start, end,
                                 EXTENT_LOCKED | EXTENT_DELALLOC |
@@ -6040,7 +6066,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        inode_tree_add(inode);
 
        trace_btrfs_inode_new(inode);
-       btrfs_set_inode_last_trans(trans, inode);
+       btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
 
        btrfs_update_root_times(trans, root);
 
@@ -6849,7 +6875,7 @@ out:
        return em;
 }
 
-static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
+static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
                                                  const u64 start,
                                                  const u64 len,
                                                  const u64 orig_start,
@@ -6863,21 +6889,19 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
        int ret;
 
        if (type != BTRFS_ORDERED_NOCOW) {
-               em = create_io_em(inode, start, len, orig_start,
-                                 block_start, block_len, orig_block_len,
-                                 ram_bytes,
+               em = create_io_em(inode, start, len, orig_start, block_start,
+                                 block_len, orig_block_len, ram_bytes,
                                  BTRFS_COMPRESS_NONE, /* compress_type */
                                  type);
                if (IS_ERR(em))
                        goto out;
        }
-       ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
-                                          len, block_len, type);
+       ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
+                                          block_len, type);
        if (ret) {
                if (em) {
                        free_extent_map(em);
-                       btrfs_drop_extent_cache(BTRFS_I(inode), start,
-                                               start + len - 1, 0);
+                       btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
                }
                em = ERR_PTR(ret);
        }
@@ -6886,11 +6910,11 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
        return em;
 }
 
-static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
                                                  u64 start, u64 len)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_map *em;
        struct btrfs_key ins;
        u64 alloc_hint;
@@ -6907,15 +6931,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                     ins.offset, BTRFS_ORDERED_REGULAR);
        btrfs_dec_block_group_reservations(fs_info, ins.objectid);
        if (IS_ERR(em))
-               btrfs_free_reserved_extent(fs_info, ins.objectid,
-                                          ins.offset, 1);
+               btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
+                                          1);
 
        return em;
 }
 
 /*
- * returns 1 when the nocow is safe, < 1 on error, 0 if the
- * block must be cow'd
+ * Check if we can do nocow write into the range [@offset, @offset + @len)
+ *
+ * @offset:    File offset
+ * @len:       The length to write, will be updated to the nocow writeable
+ *             range
+ * @orig_start:        (optional) Return the original file offset of the file extent
+ * @orig_len:  (optional) Return the original on-disk length of the file extent
+ * @ram_bytes: (optional) Return the ram_bytes of the file extent
+ *
+ * This function will flush ordered extents in the range to ensure proper
+ * nocow checks for (nowait == false) case.
+ *
+ * Return:
+ * >0  and update @len if we can do nocow write
+ *  0  if we can't do nocow write
+ * <0  if error happened
+ *
+ * NOTE: This only checks the file extents, caller is responsible to wait for
+ *      any ordered extents.
  */
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
@@ -7142,8 +7183,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 }
 
 /* The callers of this must take lock_extent() */
-static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
-                                      u64 orig_start, u64 block_start,
+static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+                                      u64 len, u64 orig_start, u64 block_start,
                                       u64 block_len, u64 orig_block_len,
                                       u64 ram_bytes, int compress_type,
                                       int type)
@@ -7157,7 +7198,7 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
               type == BTRFS_ORDERED_NOCOW ||
               type == BTRFS_ORDERED_REGULAR);
 
-       em_tree = &BTRFS_I(inode)->extent_tree;
+       em_tree = &inode->extent_tree;
        em = alloc_extent_map();
        if (!em)
                return ERR_PTR(-ENOMEM);
@@ -7179,8 +7220,8 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
        }
 
        do {
-               btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
-                               em->start + em->len - 1, 0);
+               btrfs_drop_extent_cache(inode, em->start,
+                                       em->start + em->len - 1, 0);
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em, 1);
                write_unlock(&em_tree->lock);
@@ -7259,7 +7300,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
                    btrfs_inc_nocow_writers(fs_info, block_start)) {
                        struct extent_map *em2;
 
-                       em2 = btrfs_create_dio_extent(inode, start, len,
+                       em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
                                                      orig_start, block_start,
                                                      len, orig_block_len,
                                                      ram_bytes, type);
@@ -7278,8 +7319,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
                         * use the existing or preallocated extent, so does not
                         * need to adjust btrfs_space_info's bytes_may_use.
                         */
-                       btrfs_free_reserved_data_space_noquota(inode, start,
-                                                              len);
+                       btrfs_free_reserved_data_space_noquota(fs_info, len);
                        goto skip_cow;
                }
        }
@@ -7287,7 +7327,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
        /* this will cow the extent */
        len = bh_result->b_size;
        free_extent_map(em);
-       *map = em = btrfs_new_extent_direct(inode, start, len);
+       *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
        if (IS_ERR(em)) {
                ret = PTR_ERR(em);
                goto out;
@@ -7438,7 +7478,8 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
                return;
 
        if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
-               __endio_write_update_ordered(dip->inode, dip->logical_offset,
+               __endio_write_update_ordered(BTRFS_I(dip->inode),
+                                            dip->logical_offset,
                                             dip->bytes,
                                             !dip->dio_bio->bi_status);
        } else {
@@ -7524,18 +7565,18 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
        return err;
 }
 
-static void __endio_write_update_ordered(struct inode *inode,
+static void __endio_write_update_ordered(struct btrfs_inode *inode,
                                         const u64 offset, const u64 bytes,
                                         const bool uptodate)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_ordered_extent *ordered = NULL;
        struct btrfs_workqueue *wq;
        u64 ordered_offset = offset;
        u64 ordered_bytes = bytes;
        u64 last_offset;
 
-       if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+       if (btrfs_is_free_space_inode(inode))
                wq = fs_info->endio_freespace_worker;
        else
                wq = fs_info->endio_write_workers;
@@ -7543,9 +7584,9 @@ static void __endio_write_update_ordered(struct inode *inode,
        while (ordered_offset < offset + bytes) {
                last_offset = ordered_offset;
                if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
-                                                          &ordered_offset,
-                                                          ordered_bytes,
-                                                          uptodate)) {
+                                                        &ordered_offset,
+                                                        ordered_bytes,
+                                                        uptodate)) {
                        btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
                                        NULL);
                        btrfs_queue_work(wq, &ordered->work);
@@ -7572,7 +7613,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
 {
        struct inode *inode = private_data;
        blk_status_t ret;
-       ret = btrfs_csum_one_bio(inode, bio, offset, 1);
+       ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
        BUG_ON(ret); /* -ENOMEM */
        return 0;
 }
@@ -7633,7 +7674,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
                 * If we aren't doing async submit, calculate the csum of the
                 * bio now.
                 */
-               ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
+               ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
                if (ret)
                        goto err;
        } else {
@@ -7883,7 +7924,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                        inode_unlock(inode);
                        relock = true;
                }
-               ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+               ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
                                                   offset, count);
                if (ret)
                        goto out;
@@ -7915,8 +7956,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                current->journal_info = NULL;
                if (ret < 0 && ret != -EIOCBQUEUED) {
                        if (dio_data.reserve)
-                               btrfs_delalloc_release_space(inode, data_reserved,
-                                       offset, dio_data.reserve, true);
+                               btrfs_delalloc_release_space(BTRFS_I(inode),
+                                       data_reserved, offset, dio_data.reserve,
+                                       true);
                        /*
                         * On error we might have left some ordered extents
                         * without submitting corresponding bios for them, so
@@ -7925,13 +7967,13 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                         */
                        if (dio_data.unsubmitted_oe_range_start <
                            dio_data.unsubmitted_oe_range_end)
-                               __endio_write_update_ordered(inode,
+                               __endio_write_update_ordered(BTRFS_I(inode),
                                        dio_data.unsubmitted_oe_range_start,
                                        dio_data.unsubmitted_oe_range_end -
                                        dio_data.unsubmitted_oe_range_start,
                                        false);
                } else if (ret >= 0 && (size_t)ret < count)
-                       btrfs_delalloc_release_space(inode, data_reserved,
+                       btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
                                        offset, count - (size_t)ret, true);
                btrfs_delalloc_release_extents(BTRFS_I(inode), count);
        }
@@ -7946,7 +7988,7 @@ out:
 }
 
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-               __u64 start, __u64 len)
+                       u64 start, u64 len)
 {
        int     ret;
 
@@ -8133,7 +8175,7 @@ again:
         *    bit of its io_tree, and free the qgroup reserved data space.
         *    Since the IO will never happen for this page.
         */
-       btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+       btrfs_qgroup_free_data(BTRFS_I(inode), NULL, page_start, PAGE_SIZE);
        if (!inode_evicting) {
                clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
                                 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
@@ -8197,8 +8239,8 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
         * end up waiting indefinitely to get a lock on the page currently
         * being processed by btrfs_page_mkwrite() function.
         */
-       ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
-                                          reserved_space);
+       ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
+                                           page_start, reserved_space);
        if (!ret2) {
                ret2 = file_update_time(vmf->vma->vm_file);
                reserved = 1;
@@ -8245,9 +8287,9 @@ again:
                                          fs_info->sectorsize);
                if (reserved_space < PAGE_SIZE) {
                        end = page_start + reserved_space - 1;
-                       btrfs_delalloc_release_space(inode, data_reserved,
-                                       page_start, PAGE_SIZE - reserved_space,
-                                       true);
+                       btrfs_delalloc_release_space(BTRFS_I(inode),
+                                       data_reserved, page_start,
+                                       PAGE_SIZE - reserved_space, true);
                }
        }
 
@@ -8262,7 +8304,7 @@ again:
                          EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
                          EXTENT_DEFRAG, 0, 0, &cached_state);
 
-       ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
+       ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
                                        &cached_state);
        if (ret2) {
                unlock_extent_cached(io_tree, page_start, page_end,
@@ -8302,7 +8344,7 @@ out_unlock:
        unlock_page(page);
 out:
        btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
-       btrfs_delalloc_release_space(inode, data_reserved, page_start,
+       btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
                                     reserved_space, (ret != 0));
 out_noreserve:
        sb_end_pagefault(inode->i_sb);
@@ -8516,6 +8558,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->dir_index = 0;
        ei->last_unlink_trans = 0;
+       ei->last_reflink_trans = 0;
        ei->last_log_commit = 0;
 
        spin_lock_init(&ei->lock);
@@ -8602,7 +8645,7 @@ void btrfs_destroy_inode(struct inode *inode)
                        btrfs_put_ordered_extent(ordered);
                }
        }
-       btrfs_qgroup_check_reserved_leak(inode);
+       btrfs_qgroup_check_reserved_leak(BTRFS_I(inode));
        inode_tree_del(inode);
        btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
        btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
@@ -9584,6 +9627,31 @@ out_unlock:
        return err;
 }
 
+static int insert_prealloc_file_extent(struct btrfs_trans_handle *trans,
+                                      struct inode *inode, struct btrfs_key *ins,
+                                      u64 file_offset)
+{
+       struct btrfs_file_extent_item stack_fi;
+       u64 start = ins->objectid;
+       u64 len = ins->offset;
+       int ret;
+
+       memset(&stack_fi, 0, sizeof(stack_fi));
+
+       btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
+       btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
+       btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
+       btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
+       btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
+       btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
+       /* Encryption and other encoding is reserved and all 0 */
+
+       ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
+       if (ret < 0)
+               return ret;
+       return insert_reserved_file_extent(trans, BTRFS_I(inode), file_offset,
+                                          &stack_fi, ret);
+}
 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                                       u64 start, u64 num_bytes, u64 min_size,
                                       loff_t actual_len, u64 *alloc_hint,
@@ -9642,11 +9710,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 
                last_alloc = ins.offset;
-               ret = insert_reserved_file_extent(trans, inode,
-                                                 cur_offset, ins.objectid,
-                                                 ins.offset, ins.offset,
-                                                 ins.offset, 0, 0, 0,
-                                                 BTRFS_FILE_EXTENT_PREALLOC);
+               ret = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
                if (ret) {
                        btrfs_free_reserved_extent(fs_info, ins.objectid,
                                                   ins.offset, 0);
@@ -9719,7 +9783,7 @@ next:
                        btrfs_end_transaction(trans);
        }
        if (clear_offset < end)
-               btrfs_free_reserved_data_space(inode, NULL, clear_offset,
+               btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
                        end - clear_offset + 1);
        return ret;
 }
index e8f7c5f..bd3511c 100644 (file)
@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
        return 0;
 }
 
-/* Check if @flags are a supported and valid set of FS_*_FL flags */
-static int check_fsflags(unsigned int flags)
+/*
+ * Check if @flags are a supported and valid set of FS_*_FL flags and that
+ * the old and new flags are not conflicting
+ */
+static int check_fsflags(unsigned int old_flags, unsigned int flags)
 {
        if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
                      FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
                      FS_NOCOW_FL))
                return -EOPNOTSUPP;
 
+       /* COMPR and NOCOMP on new/old are valid */
        if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
                return -EINVAL;
 
+       if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
+               return -EINVAL;
+
+       /* NOCOW and compression options are mutually exclusive */
+       if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+               return -EINVAL;
+       if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
+               return -EINVAL;
+
        return 0;
 }
 
@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        unsigned int fsflags, old_fsflags;
        int ret;
        const char *comp = NULL;
-       u32 binode_flags = binode->flags;
+       u32 binode_flags;
 
        if (!inode_owner_or_capable(inode))
                return -EPERM;
@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
                return -EFAULT;
 
-       ret = check_fsflags(fsflags);
-       if (ret)
-               return ret;
-
        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
 
        inode_lock(inode);
-
        fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
        old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+
        ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
        if (ret)
                goto out_unlock;
 
+       ret = check_fsflags(old_fsflags, fsflags);
+       if (ret)
+               goto out_unlock;
+
+       binode_flags = binode->flags;
        if (fsflags & FS_SYNC_FL)
                binode_flags |= BTRFS_INODE_SYNC;
        else
@@ -566,6 +580,7 @@ static noinline int create_subvol(struct inode *dir,
        struct inode *inode;
        int ret;
        int err;
+       dev_t anon_dev = 0;
        u64 objectid;
        u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
        u64 index = 0;
@@ -578,6 +593,10 @@ static noinline int create_subvol(struct inode *dir,
        if (ret)
                goto fail_free;
 
+       ret = get_anon_bdev(&anon_dev);
+       if (ret < 0)
+               goto fail_free;
+
        /*
         * Don't create subvolume whose level is not zero. Or qgroup will be
         * screwed up since it assumes subvolume qgroup's level to be 0.
@@ -660,12 +679,15 @@ static noinline int create_subvol(struct inode *dir,
                goto fail;
 
        key.offset = (u64)-1;
-       new_root = btrfs_get_fs_root(fs_info, objectid, true);
+       new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
        if (IS_ERR(new_root)) {
+               free_anon_bdev(anon_dev);
                ret = PTR_ERR(new_root);
                btrfs_abort_transaction(trans, ret);
                goto fail;
        }
+       /* Freeing will be done in btrfs_put_root() of new_root */
+       anon_dev = 0;
 
        btrfs_record_root_in_trans(trans, new_root);
 
@@ -735,6 +757,8 @@ fail:
        return ret;
 
 fail_free:
+       if (anon_dev)
+               free_anon_bdev(anon_dev);
        kfree(root_item);
        return ret;
 }
@@ -762,6 +786,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (!pending_snapshot)
                return -ENOMEM;
 
+       ret = get_anon_bdev(&pending_snapshot->anon_dev);
+       if (ret < 0)
+               goto free_pending;
        pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
                        GFP_KERNEL);
        pending_snapshot->path = btrfs_alloc_path();
@@ -823,10 +850,16 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 
        d_instantiate(dentry, inode);
        ret = 0;
+       pending_snapshot->anon_dev = 0;
 fail:
+       /* Prevent double freeing of anon_dev */
+       if (ret && pending_snapshot->snap)
+               pending_snapshot->snap->anon_dev = 0;
        btrfs_put_root(pending_snapshot->snap);
        btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
 free_pending:
+       if (pending_snapshot->anon_dev)
+               free_anon_bdev(pending_snapshot->anon_dev);
        kfree(pending_snapshot->root_item);
        btrfs_free_path(pending_snapshot->path);
        kfree(pending_snapshot);
@@ -1243,7 +1276,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 
        page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
 
-       ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+       ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
                        start_index << PAGE_SHIFT,
                        page_cnt << PAGE_SHIFT);
        if (ret)
@@ -1265,7 +1298,7 @@ again:
                while (1) {
                        lock_extent_bits(tree, page_start, page_end,
                                         &cached_state);
-                       ordered = btrfs_lookup_ordered_extent(inode,
+                       ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
                                                              page_start);
                        unlock_extent_cached(tree, page_start, page_end,
                                             &cached_state);
@@ -1333,7 +1366,7 @@ again:
                spin_lock(&BTRFS_I(inode)->lock);
                btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
                spin_unlock(&BTRFS_I(inode)->lock);
-               btrfs_delalloc_release_space(inode, data_reserved,
+               btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
                                start_index << PAGE_SHIFT,
                                (page_cnt - i_done) << PAGE_SHIFT, true);
        }
@@ -1361,7 +1394,7 @@ out:
                unlock_page(pages[i]);
                put_page(pages[i]);
        }
-       btrfs_delalloc_release_space(inode, data_reserved,
+       btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
                        start_index << PAGE_SHIFT,
                        page_cnt << PAGE_SHIFT, true);
        btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
@@ -3198,11 +3231,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
        struct btrfs_ioctl_fs_info_args *fi_args;
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       u64 flags_in;
        int ret = 0;
 
-       fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
-       if (!fi_args)
-               return -ENOMEM;
+       fi_args = memdup_user(arg, sizeof(*fi_args));
+       if (IS_ERR(fi_args))
+               return PTR_ERR(fi_args);
+
+       flags_in = fi_args->flags;
+       memset(fi_args, 0, sizeof(*fi_args));
 
        rcu_read_lock();
        fi_args->num_devices = fs_devices->num_devices;
@@ -3218,6 +3255,23 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
        fi_args->sectorsize = fs_info->sectorsize;
        fi_args->clone_alignment = fs_info->sectorsize;
 
+       if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
+               fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
+               fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
+               fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
+       }
+
+       if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
+               fi_args->generation = fs_info->generation;
+               fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION;
+       }
+
+       if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
+               memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
+                      sizeof(fi_args->metadata_uuid));
+               fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID;
+       }
+
        if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
                ret = -EFAULT;
 
index e13b3d2..ebac133 100644 (file)
@@ -15,6 +15,7 @@
 #include "disk-io.h"
 #include "compression.h"
 #include "delalloc-space.h"
+#include "qgroup.h"
 
 static struct kmem_cache *btrfs_ordered_extent_cache;
 
@@ -152,23 +153,39 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
        return ret;
 }
 
-/* allocate and add a new ordered_extent into the per-inode tree.
+/*
+ * Allocate and add a new ordered_extent into the per-inode tree.
  *
  * The tree is given a single reference on the ordered extent that was
  * inserted.
  */
-static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
                                      u64 disk_num_bytes, int type, int dio,
                                      int compress_type)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_ordered_inode_tree *tree;
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
        struct rb_node *node;
        struct btrfs_ordered_extent *entry;
+       int ret;
 
-       tree = &BTRFS_I(inode)->ordered_tree;
+       if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
+               /* For nocow write, we can release the qgroup rsv right now */
+               ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
+               if (ret < 0)
+                       return ret;
+               ret = 0;
+       } else {
+               /*
+                * The ordered extent has reserved qgroup space, release now
+                * and pass the reserved number for qgroup_record to free.
+                */
+               ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
+               if (ret < 0)
+                       return ret;
+       }
        entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
        if (!entry)
                return -ENOMEM;
@@ -178,9 +195,10 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->num_bytes = num_bytes;
        entry->disk_num_bytes = disk_num_bytes;
        entry->bytes_left = num_bytes;
-       entry->inode = igrab(inode);
+       entry->inode = igrab(&inode->vfs_inode);
        entry->compress_type = compress_type;
        entry->truncated_len = (u64)-1;
+       entry->qgroup_rsv = ret;
        if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                set_bit(type, &entry->flags);
 
@@ -197,10 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        INIT_LIST_HEAD(&entry->root_extent_list);
        INIT_LIST_HEAD(&entry->work_list);
        init_completion(&entry->completion);
-       INIT_LIST_HEAD(&entry->log_list);
-       INIT_LIST_HEAD(&entry->trans_list);
 
-       trace_btrfs_ordered_extent_add(inode, entry);
+       trace_btrfs_ordered_extent_add(&inode->vfs_inode, entry);
 
        spin_lock_irq(&tree->lock);
        node = tree_insert(&tree->tree, file_offset,
@@ -228,14 +244,14 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
         * that work has been done at higher layers, so this is truly the
         * smallest the extent is going to get.
         */
-       spin_lock(&BTRFS_I(inode)->lock);
-       btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
-       spin_unlock(&BTRFS_I(inode)->lock);
+       spin_lock(&inode->lock);
+       btrfs_mod_outstanding_extents(inode, 1);
+       spin_unlock(&inode->lock);
 
        return 0;
 }
 
-int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
                             u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
                             int type)
 {
@@ -244,7 +260,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
                                          BTRFS_COMPRESS_NONE);
 }
 
-int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
                                 u64 disk_bytenr, u64 num_bytes,
                                 u64 disk_num_bytes, int type)
 {
@@ -253,7 +269,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
                                          BTRFS_COMPRESS_NONE);
 }
 
-int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
                                      u64 disk_num_bytes, int type,
                                      int compress_type)
@@ -291,12 +307,12 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
  * file_offset is updated to one byte past the range that is recorded as
  * complete.  This allows you to walk forward in the file.
  */
-int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
                                   struct btrfs_ordered_extent **cached,
                                   u64 *file_offset, u64 io_size, int uptodate)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_ordered_inode_tree *tree;
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
        struct rb_node *node;
        struct btrfs_ordered_extent *entry = NULL;
        int ret;
@@ -305,7 +321,6 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
        u64 dec_start;
        u64 to_dec;
 
-       tree = &BTRFS_I(inode)->ordered_tree;
        spin_lock_irqsave(&tree->lock, flags);
        node = tree_search(tree, *file_offset);
        if (!node) {
@@ -429,8 +444,6 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
        trace_btrfs_ordered_extent_put(entry->inode, entry);
 
        if (refcount_dec_and_test(&entry->refs)) {
-               ASSERT(list_empty(&entry->log_list));
-               ASSERT(list_empty(&entry->trans_list));
                ASSERT(list_empty(&entry->root_extent_list));
                ASSERT(RB_EMPTY_NODE(&entry->rb_node));
                if (entry->inode)
@@ -698,14 +711,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
  * find an ordered extent corresponding to file_offset.  return NULL if
  * nothing is found, otherwise take a reference on the extent and return it
  */
-struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
+struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
                                                         u64 file_offset)
 {
        struct btrfs_ordered_inode_tree *tree;
        struct rb_node *node;
        struct btrfs_ordered_extent *entry = NULL;
 
-       tree = &BTRFS_I(inode)->ordered_tree;
+       tree = &inode->ordered_tree;
        spin_lock_irq(&tree->lock);
        node = tree_search(tree, file_offset);
        if (!node)
@@ -803,7 +816,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
        const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        int index = 0;
 
-       ordered = btrfs_lookup_ordered_extent(inode, offset);
+       ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), offset);
        if (!ordered)
                return 0;
 
index c01c969..d61ea9c 100644 (file)
@@ -92,6 +92,9 @@ struct btrfs_ordered_extent {
        /* compression algorithm */
        int compress_type;
 
+       /* Qgroup reserved space */
+       int qgroup_rsv;
+
        /* reference count */
        refcount_t refs;
 
@@ -101,12 +104,6 @@ struct btrfs_ordered_extent {
        /* list of checksums for insertion when the extent io is done */
        struct list_head list;
 
-       /* If we need to wait on this to be done */
-       struct list_head log_list;
-
-       /* If the transaction needs to wait on this ordered extent */
-       struct list_head trans_list;
-
        /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
        wait_queue_head_t wait;
 
@@ -150,23 +147,23 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 int btrfs_dec_test_ordered_pending(struct inode *inode,
                                   struct btrfs_ordered_extent **cached,
                                   u64 file_offset, u64 io_size, int uptodate);
-int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
                                   struct btrfs_ordered_extent **cached,
                                   u64 *file_offset, u64 io_size,
                                   int uptodate);
-int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
                             u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
                             int type);
-int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
                                 u64 disk_bytenr, u64 num_bytes,
                                 u64 disk_num_bytes, int type);
-int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
                                      u64 disk_bytenr, u64 num_bytes,
                                      u64 disk_num_bytes, int type,
                                      int compress_type);
 void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
                           struct btrfs_ordered_sum *sum);
-struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
+struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
                                                         u64 file_offset);
 void btrfs_start_ordered_extent(struct inode *inode,
                                struct btrfs_ordered_extent *entry, int wait);
index 5bd4089..c0f350c 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/btrfs.h>
-#include <linux/sizes.h>
 
 #include "ctree.h"
 #include "transaction.h"
@@ -22,6 +21,7 @@
 #include "extent_io.h"
 #include "qgroup.h"
 #include "block-group.h"
+#include "sysfs.h"
 
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
@@ -220,10 +220,12 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
        return qgroup;
 }
 
-static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
+static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
+                           struct btrfs_qgroup *qgroup)
 {
        struct btrfs_qgroup_list *list;
 
+       btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
        list_del(&qgroup->dirty);
        while (!list_empty(&qgroup->groups)) {
                list = list_first_entry(&qgroup->groups,
@@ -252,7 +254,7 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
                return -ENOENT;
 
        rb_erase(&qgroup->node, &fs_info->qgroup_tree);
-       __del_qgroup_rb(qgroup);
+       __del_qgroup_rb(fs_info, qgroup);
        return 0;
 }
 
@@ -351,6 +353,9 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
                goto out;
        }
 
+       ret = btrfs_sysfs_add_qgroups(fs_info);
+       if (ret < 0)
+               goto out;
        /* default this to quota off, in case no status key is found */
        fs_info->qgroup_flags = 0;
 
@@ -412,6 +417,10 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
                                goto out;
                        }
                }
+               ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
+               if (ret < 0)
+                       goto out;
+
                switch (found_key.type) {
                case BTRFS_QGROUP_INFO_KEY: {
                        struct btrfs_qgroup_info_item *ptr;
@@ -500,11 +509,50 @@ out:
                ulist_free(fs_info->qgroup_ulist);
                fs_info->qgroup_ulist = NULL;
                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+               btrfs_sysfs_del_qgroups(fs_info);
        }
 
        return ret < 0 ? ret : 0;
 }
 
+/*
+ * Called in close_ctree() when quota is still enabled.  This verifies we don't
+ * leak some reserved space.
+ *
+ * Return false if no reserved space is left.
+ * Return true if some reserved space is leaked.
+ */
+bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info)
+{
+       struct rb_node *node;
+       bool ret = false;
+
+       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+               return ret;
+       /*
+        * Since we're unmounting, there is no race and no need to grab qgroup
+        * lock.  And here we don't go post-order to provide a more user
+        * friendly sorted result.
+        */
+       for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) {
+               struct btrfs_qgroup *qgroup;
+               int i;
+
+               qgroup = rb_entry(node, struct btrfs_qgroup, node);
+               for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) {
+                       if (qgroup->rsv.values[i]) {
+                               ret = true;
+                               btrfs_warn(fs_info,
+               "qgroup %hu/%llu has unreleased space, type %d rsv %llu",
+                                  btrfs_qgroup_level(qgroup->qgroupid),
+                                  btrfs_qgroup_subvolid(qgroup->qgroupid),
+                                  i, qgroup->rsv.values[i]);
+                       }
+               }
+       }
+       return ret;
+}
+
 /*
  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
  * first two are in single-threaded paths.And for the third one, we have set
@@ -519,7 +567,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
        while ((n = rb_first(&fs_info->qgroup_tree))) {
                qgroup = rb_entry(n, struct btrfs_qgroup, node);
                rb_erase(n, &fs_info->qgroup_tree);
-               __del_qgroup_rb(qgroup);
+               __del_qgroup_rb(fs_info, qgroup);
        }
        /*
         * We call btrfs_free_qgroup_config() when unmounting
@@ -528,6 +576,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
         */
        ulist_free(fs_info->qgroup_ulist);
        fs_info->qgroup_ulist = NULL;
+       btrfs_sysfs_del_qgroups(fs_info);
 }
 
 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
@@ -900,6 +949,9 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
                goto out;
        }
 
+       ret = btrfs_sysfs_add_qgroups(fs_info);
+       if (ret < 0)
+               goto out;
        /*
         * 1 for quota root item
         * 1 for BTRFS_QGROUP_STATUS item
@@ -987,6 +1039,11 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
                                btrfs_abort_transaction(trans, ret);
                                goto out_free_path;
                        }
+                       ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
+                       if (ret < 0) {
+                               btrfs_abort_transaction(trans, ret);
+                               goto out_free_path;
+                       }
                }
                ret = btrfs_next_item(tree_root, path);
                if (ret < 0) {
@@ -1011,6 +1068,11 @@ out_add_root:
                btrfs_abort_transaction(trans, ret);
                goto out_free_path;
        }
+       ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
+       if (ret < 0) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_free_path;
+       }
 
        ret = btrfs_commit_transaction(trans);
        trans = NULL;
@@ -1046,6 +1108,7 @@ out:
                fs_info->qgroup_ulist = NULL;
                if (trans)
                        btrfs_end_transaction(trans);
+               btrfs_sysfs_del_qgroups(fs_info);
        }
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
@@ -1398,8 +1461,11 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
        qgroup = add_qgroup_rb(fs_info, qgroupid);
        spin_unlock(&fs_info->qgroup_lock);
 
-       if (IS_ERR(qgroup))
+       if (IS_ERR(qgroup)) {
                ret = PTR_ERR(qgroup);
+               goto out;
+       }
+       ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
 out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
@@ -2818,6 +2884,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 
 unlock:
        spin_unlock(&fs_info->qgroup_lock);
+       if (!ret)
+               ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
 out:
        if (!committing)
                mutex_unlock(&fs_info->qgroup_ioctl_lock);
@@ -2826,20 +2894,8 @@ out:
        return ret;
 }
 
-/*
- * Two limits to commit transaction in advance.
- *
- * For RATIO, it will be 1/RATIO of the remaining limit as threshold.
- * For SIZE, it will be in byte unit as threshold.
- */
-#define QGROUP_FREE_RATIO              32
-#define QGROUP_FREE_SIZE               SZ_32M
-static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
-                               const struct btrfs_qgroup *qg, u64 num_bytes)
+static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
 {
-       u64 free;
-       u64 threshold;
-
        if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
            qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
                return false;
@@ -2848,32 +2904,6 @@ static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
            qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
                return false;
 
-       /*
-        * Even if we passed the check, it's better to check if reservation
-        * for meta_pertrans is pushing us near limit.
-        * If there is too much pertrans reservation or it's near the limit,
-        * let's try commit transaction to free some, using transaction_kthread
-        */
-       if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
-                             BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
-               if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
-                       free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl;
-                       threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO,
-                                         QGROUP_FREE_SIZE);
-               } else {
-                       free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer;
-                       threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO,
-                                         QGROUP_FREE_SIZE);
-               }
-
-               /*
-                * Use transaction_kthread to commit transaction, so we no
-                * longer need to bother nested transaction nor lock context.
-                */
-               if (free < threshold)
-                       btrfs_commit_transaction_locksafe(fs_info);
-       }
-
        return true;
 }
 
@@ -2921,7 +2951,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
 
                qg = unode_aux_to_qgroup(unode);
 
-               if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
+               if (enforce && !qgroup_check_limits(qg, num_bytes)) {
                        ret = -EDQUOT;
                        goto out;
                }
@@ -3378,28 +3408,132 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
        }
 }
 
+#define rbtree_iterate_from_safe(node, next, start)                            \
+       for (node = start; node && ({ next = rb_next(node); 1;}); node = next)
+
+static int qgroup_unreserve_range(struct btrfs_inode *inode,
+                                 struct extent_changeset *reserved, u64 start,
+                                 u64 len)
+{
+       struct rb_node *node;
+       struct rb_node *next;
+       struct ulist_node *entry = NULL;
+       int ret = 0;
+
+       node = reserved->range_changed.root.rb_node;
+       while (node) {
+               entry = rb_entry(node, struct ulist_node, rb_node);
+               if (entry->val < start)
+                       node = node->rb_right;
+               else if (entry)
+                       node = node->rb_left;
+               else
+                       break;
+       }
+
+       /* Empty changeset */
+       if (!entry)
+               return 0;
+
+       if (entry->val > start && rb_prev(&entry->rb_node))
+               entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
+                                rb_node);
+
+       rbtree_iterate_from_safe(node, next, &entry->rb_node) {
+               u64 entry_start;
+               u64 entry_end;
+               u64 entry_len;
+               int clear_ret;
+
+               entry = rb_entry(node, struct ulist_node, rb_node);
+               entry_start = entry->val;
+               entry_end = entry->aux;
+               entry_len = entry_end - entry_start + 1;
+
+               if (entry_start >= start + len)
+                       break;
+               if (entry_start + entry_len <= start)
+                       continue;
+               /*
+                * Now the entry is in [start, start + len), revert the
+                * EXTENT_QGROUP_RESERVED bit.
+                */
+               clear_ret = clear_extent_bits(&inode->io_tree, entry_start,
+                                             entry_end, EXTENT_QGROUP_RESERVED);
+               if (!ret && clear_ret < 0)
+                       ret = clear_ret;
+
+               ulist_del(&reserved->range_changed, entry->val, entry->aux);
+               if (likely(reserved->bytes_changed >= entry_len)) {
+                       reserved->bytes_changed -= entry_len;
+               } else {
+                       WARN_ON(1);
+                       reserved->bytes_changed = 0;
+               }
+       }
+
+       return ret;
+}
+
 /*
- * Reserve qgroup space for range [start, start + len).
+ * Try to free some space for qgroup.
  *
- * This function will either reserve space from related qgroups or doing
- * nothing if the range is already reserved.
+ * For qgroup, there are only 3 ways to free qgroup space:
+ * - Flush nodatacow write
+ *   Any nodatacow write will free its reserved data space at run_delalloc_range().
+ *   In theory, we should only flush nodatacow inodes, but it's not yet
+ *   possible, so we need to flush the whole root.
  *
- * Return 0 for successful reserve
- * Return <0 for error (including -EQUOT)
+ * - Wait for ordered extents
+ *   When ordered extents are finished, their reserved metadata is finally
+ *   converted to per_trans status, which can be freed by later commit
+ *   transaction.
  *
- * NOTE: this function may sleep for memory allocation.
- *       if btrfs_qgroup_reserve_data() is called multiple times with
- *       same @reserved, caller must ensure when error happens it's OK
- *       to free *ALL* reserved space.
+ * - Commit transaction
+ *   This would free the meta_per_trans space.
+ *   In theory this shouldn't provide much space, but any more qgroup space
+ *   is needed.
  */
-int btrfs_qgroup_reserve_data(struct inode *inode,
+static int try_flush_qgroup(struct btrfs_root *root)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       /*
+        * We don't want to run flush again and again, so if there is a running
+        * one, we won't try to start a new flush, but exit directly.
+        */
+       if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
+               wait_event(root->qgroup_flush_wait,
+                       !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
+               return 0;
+       }
+
+       ret = btrfs_start_delalloc_snapshot(root);
+       if (ret < 0)
+               goto out;
+       btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto out;
+       }
+
+       ret = btrfs_commit_transaction(trans);
+out:
+       clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
+       wake_up(&root->qgroup_flush_wait);
+       return ret;
+}
+
+static int qgroup_reserve_data(struct btrfs_inode *inode,
                        struct extent_changeset **reserved_ret, u64 start,
                        u64 len)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
+       struct btrfs_root *root = inode->root;
        struct extent_changeset *reserved;
+       bool new_reserved = false;
        u64 orig_reserved;
        u64 to_reserve;
        int ret;
@@ -3412,6 +3546,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
        if (WARN_ON(!reserved_ret))
                return -EINVAL;
        if (!*reserved_ret) {
+               new_reserved = true;
                *reserved_ret = extent_changeset_alloc();
                if (!*reserved_ret)
                        return -ENOMEM;
@@ -3419,15 +3554,15 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
        reserved = *reserved_ret;
        /* Record already reserved space */
        orig_reserved = reserved->bytes_changed;
-       ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
+       ret = set_record_extent_bits(&inode->io_tree, start,
                        start + len -1, EXTENT_QGROUP_RESERVED, reserved);
 
        /* Newly reserved space */
        to_reserve = reserved->bytes_changed - orig_reserved;
-       trace_btrfs_qgroup_reserve_data(inode, start, len,
+       trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len,
                                        to_reserve, QGROUP_RESERVE);
        if (ret < 0)
-               goto cleanup;
+               goto out;
        ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
        if (ret < 0)
                goto cleanup;
@@ -3435,23 +3570,49 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
        return ret;
 
 cleanup:
-       /* cleanup *ALL* already reserved ranges */
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(&reserved->range_changed, &uiter)))
-               clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
-                                unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
-       /* Also free data bytes of already reserved one */
-       btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid,
-                                 orig_reserved, BTRFS_QGROUP_RSV_DATA);
-       extent_changeset_release(reserved);
+       qgroup_unreserve_range(inode, reserved, start, len);
+out:
+       if (new_reserved) {
+               extent_changeset_release(reserved);
+               kfree(reserved);
+               *reserved_ret = NULL;
+       }
        return ret;
 }
 
+/*
+ * Reserve qgroup space for range [start, start + len).
+ *
+ * This function will either reserve space from related qgroups or do nothing
+ * if the range is already reserved.
+ *
+ * Return 0 for successful reservation
+ * Return <0 for error (including -EQUOT)
+ *
+ * NOTE: This function may sleep for memory allocation, dirty page flushing and
+ *      commit transaction. So caller should not hold any dirty page locked.
+ */
+int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
+                       struct extent_changeset **reserved_ret, u64 start,
+                       u64 len)
+{
+       int ret;
+
+       ret = qgroup_reserve_data(inode, reserved_ret, start, len);
+       if (ret <= 0 && ret != -EDQUOT)
+               return ret;
+
+       ret = try_flush_qgroup(inode->root);
+       if (ret < 0)
+               return ret;
+       return qgroup_reserve_data(inode, reserved_ret, start, len);
+}
+
 /* Free ranges specified by @reserved, normally in error path */
-static int qgroup_free_reserved_data(struct inode *inode,
+static int qgroup_free_reserved_data(struct btrfs_inode *inode,
                        struct extent_changeset *reserved, u64 start, u64 len)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_root *root = inode->root;
        struct ulist_node *unode;
        struct ulist_iterator uiter;
        struct extent_changeset changeset;
@@ -3487,8 +3648,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
                 * EXTENT_QGROUP_RESERVED, we won't double free.
                 * So not need to rush.
                 */
-               ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
-                               free_start, free_start + free_len - 1,
+               ret = clear_record_extent_bits(&inode->io_tree, free_start,
+                               free_start + free_len - 1,
                                EXTENT_QGROUP_RESERVED, &changeset);
                if (ret < 0)
                        goto out;
@@ -3502,7 +3663,7 @@ out:
        return ret;
 }
 
-static int __btrfs_qgroup_release_data(struct inode *inode,
+static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
                        struct extent_changeset *reserved, u64 start, u64 len,
                        int free)
 {
@@ -3510,8 +3671,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
        int trace_op = QGROUP_RELEASE;
        int ret;
 
-       if (!test_bit(BTRFS_FS_QUOTA_ENABLED,
-                     &BTRFS_I(inode)->root->fs_info->flags))
+       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags))
                return 0;
 
        /* In release case, we shouldn't have @reserved */
@@ -3519,18 +3679,18 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
        if (free && reserved)
                return qgroup_free_reserved_data(inode, reserved, start, len);
        extent_changeset_init(&changeset);
-       ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 
-                       start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
+       ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
+                                      EXTENT_QGROUP_RESERVED, &changeset);
        if (ret < 0)
                goto out;
 
        if (free)
                trace_op = QGROUP_FREE;
-       trace_btrfs_qgroup_release_data(inode, start, len,
+       trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len,
                                        changeset.bytes_changed, trace_op);
        if (free)
-               btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
-                               BTRFS_I(inode)->root->root_key.objectid,
+               btrfs_qgroup_free_refroot(inode->root->fs_info,
+                               inode->root->root_key.objectid,
                                changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
        ret = changeset.bytes_changed;
 out:
@@ -3550,7 +3710,7 @@ out:
  *
  * NOTE: This function may sleep for memory allocation.
  */
-int btrfs_qgroup_free_data(struct inode *inode,
+int btrfs_qgroup_free_data(struct btrfs_inode *inode,
                        struct extent_changeset *reserved, u64 start, u64 len)
 {
        return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
@@ -3571,7 +3731,7 @@ int btrfs_qgroup_free_data(struct inode *inode,
  *
  * NOTE: This function may sleep for memory allocation.
  */
-int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
+int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
 {
        return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
 }
@@ -3616,7 +3776,7 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
        return num_bytes;
 }
 
-int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
                                enum btrfs_qgroup_rsv_type type, bool enforce)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -3643,6 +3803,21 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
        return ret;
 }
 
+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                               enum btrfs_qgroup_rsv_type type, bool enforce)
+{
+       int ret;
+
+       ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
+       if (ret <= 0 && ret != -EDQUOT)
+               return ret;
+
+       ret = try_flush_qgroup(root);
+       if (ret < 0)
+               return ret;
+       return qgroup_reserve_meta(root, num_bytes, type, enforce);
+}
+
 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -3742,7 +3917,7 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
  * Check qgroup reserved space leaking, normally at destroy inode
  * time
  */
-void btrfs_qgroup_check_reserved_leak(struct inode *inode)
+void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
 {
        struct extent_changeset changeset;
        struct ulist_node *unode;
@@ -3750,19 +3925,19 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
        int ret;
 
        extent_changeset_init(&changeset);
-       ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+       ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
                        EXTENT_QGROUP_RESERVED, &changeset);
 
        WARN_ON(ret < 0);
        if (WARN_ON(changeset.bytes_changed)) {
                ULIST_ITER_INIT(&iter);
                while ((unode = ulist_next(&changeset.range_changed, &iter))) {
-                       btrfs_warn(BTRFS_I(inode)->root->fs_info,
-                               "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
-                               inode->i_ino, unode->val, unode->aux);
+                       btrfs_warn(inode->root->fs_info,
+               "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu",
+                               btrfs_ino(inode), unode->val, unode->aux);
                }
-               btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
-                               BTRFS_I(inode)->root->root_key.objectid,
+               btrfs_qgroup_free_refroot(inode->root->fs_info,
+                               inode->root->root_key.objectid,
                                changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
 
        }
index 1bc6544..50dea9a 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
+#include <linux/kobject.h>
 #include "ulist.h"
 #include "delayed-ref.h"
 
@@ -223,8 +224,18 @@ struct btrfs_qgroup {
         */
        u64 old_refcnt;
        u64 new_refcnt;
+
+       /*
+        * Sysfs kobjectid
+        */
+       struct kobject kobj;
 };
 
+static inline u64 btrfs_qgroup_subvolid(u64 qgroupid)
+{
+       return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1));
+}
+
 /*
  * For qgroup event trace points only
  */
@@ -344,12 +355,12 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
 #endif
 
 /* New io_tree based accurate qgroup reserve API */
-int btrfs_qgroup_reserve_data(struct inode *inode,
+int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
                        struct extent_changeset **reserved, u64 start, u64 len);
-int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
-int btrfs_qgroup_free_data(struct inode *inode,
-                       struct extent_changeset *reserved, u64 start, u64 len);
-
+int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
+int btrfs_qgroup_free_data(struct btrfs_inode *inode,
+                          struct extent_changeset *reserved, u64 start,
+                          u64 len);
 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
                                enum btrfs_qgroup_rsv_type type, bool enforce);
 /* Reserve metadata space for pertrans and prealloc type */
@@ -399,7 +410,7 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
  */
 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
 
-void btrfs_qgroup_check_reserved_leak(struct inode *inode);
+void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode);
 
 /* btrfs_qgroup_swapped_blocks related functions */
 void btrfs_qgroup_init_swapped_blocks(
@@ -415,5 +426,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
 int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
                struct btrfs_root *root, struct extent_buffer *eb);
 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
+bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
 
 #endif
index c870ef7..255490f 100644 (file)
@@ -1083,7 +1083,6 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
                            unsigned long bio_max_len)
 {
        struct bio *last = bio_list->tail;
-       u64 last_end = 0;
        int ret;
        struct bio *bio;
        struct btrfs_bio_stripe *stripe;
@@ -1098,15 +1097,14 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 
        /* see if we can add this page onto our existing bio */
        if (last) {
-               last_end = (u64)last->bi_iter.bi_sector << 9;
+               u64 last_end = (u64)last->bi_iter.bi_sector << 9;
                last_end += last->bi_iter.bi_size;
 
                /*
                 * we can't merge these if they are from different
                 * devices or if they are not contiguous
                 */
-               if (last_end == disk_start && stripe->dev->bdev &&
-                   !last->bi_status &&
+               if (last_end == disk_start && !last->bi_status &&
                    last->bi_disk == stripe->dev->bdev->bd_disk &&
                    last->bi_partno == stripe->dev->bdev->bd_partno) {
                        ret = bio_add_page(last, page, PAGE_SIZE, 0);
@@ -1117,6 +1115,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 
        /* put a new bio on the list */
        bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
+       btrfs_io_bio(bio)->device = stripe->dev;
        bio->bi_iter.bi_size = 0;
        bio_set_dev(bio, stripe->dev->bdev);
        bio->bi_iter.bi_sector = disk_start >> 9;
@@ -1325,11 +1324,7 @@ write_data:
        atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
        BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
 
-       while (1) {
-               bio = bio_list_pop(&bio_list);
-               if (!bio)
-                       break;
-
+       while ((bio = bio_list_pop(&bio_list))) {
                bio->bi_private = rbio;
                bio->bi_end_io = raid_write_end_io;
                bio->bi_opf = REQ_OP_WRITE;
@@ -1354,7 +1349,6 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
                           struct bio *bio)
 {
        u64 physical = bio->bi_iter.bi_sector;
-       u64 stripe_start;
        int i;
        struct btrfs_bio_stripe *stripe;
 
@@ -1362,9 +1356,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
 
        for (i = 0; i < rbio->bbio->num_stripes; i++) {
                stripe = &rbio->bbio->stripes[i];
-               stripe_start = stripe->physical;
-               if (physical >= stripe_start &&
-                   physical < stripe_start + rbio->stripe_len &&
+               if (in_range(physical, stripe->physical, rbio->stripe_len) &&
                    stripe->dev->bdev &&
                    bio->bi_disk == stripe->dev->bdev->bd_disk &&
                    bio->bi_partno == stripe->dev->bdev->bd_partno) {
@@ -1382,18 +1374,14 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
 static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
                                   struct bio *bio)
 {
-       u64 logical = bio->bi_iter.bi_sector;
-       u64 stripe_start;
+       u64 logical = (u64)bio->bi_iter.bi_sector << 9;
        int i;
 
-       logical <<= 9;
-
        for (i = 0; i < rbio->nr_data; i++) {
-               stripe_start = rbio->bbio->raid_map[i];
-               if (logical >= stripe_start &&
-                   logical < stripe_start + rbio->stripe_len) {
+               u64 stripe_start = rbio->bbio->raid_map[i];
+
+               if (in_range(logical, stripe_start, rbio->stripe_len))
                        return i;
-               }
        }
        return -1;
 }
@@ -1567,11 +1555,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
         * not to touch it after that
         */
        atomic_set(&rbio->stripes_pending, bios_to_read);
-       while (1) {
-               bio = bio_list_pop(&bio_list);
-               if (!bio)
-                       break;
-
+       while ((bio = bio_list_pop(&bio_list))) {
                bio->bi_private = rbio;
                bio->bi_end_io = raid_rmw_end_io;
                bio->bi_opf = REQ_OP_READ;
@@ -1878,11 +1862,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
                        }
 
                        /* make sure our ps and qs are in order */
-                       if (faila > failb) {
-                               int tmp = failb;
-                               failb = faila;
-                               faila = tmp;
-                       }
+                       if (faila > failb)
+                               swap(faila, failb);
 
                        /* if the q stripe is failed, do a pstripe reconstruction
                         * from the xors.
@@ -2102,7 +2083,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
                 */
                if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
                        __raid_recover_end_io(rbio);
-                       goto out;
+                       return 0;
                } else {
                        goto cleanup;
                }
@@ -2113,11 +2094,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
         * not to touch it after that
         */
        atomic_set(&rbio->stripes_pending, bios_to_read);
-       while (1) {
-               bio = bio_list_pop(&bio_list);
-               if (!bio)
-                       break;
-
+       while ((bio = bio_list_pop(&bio_list))) {
                bio->bi_private = rbio;
                bio->bi_end_io = raid_recover_end_io;
                bio->bi_opf = REQ_OP_READ;
@@ -2126,7 +2103,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 
                submit_bio(bio);
        }
-out:
+
        return 0;
 
 cleanup:
@@ -2482,11 +2459,7 @@ submit_write:
 
        atomic_set(&rbio->stripes_pending, nr_data);
 
-       while (1) {
-               bio = bio_list_pop(&bio_list);
-               if (!bio)
-                       break;
-
+       while ((bio = bio_list_pop(&bio_list))) {
                bio->bi_private = rbio;
                bio->bi_end_io = raid_write_end_io;
                bio->bi_opf = REQ_OP_WRITE;
@@ -2664,11 +2637,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
         * not to touch it after that
         */
        atomic_set(&rbio->stripes_pending, bios_to_read);
-       while (1) {
-               bio = bio_list_pop(&bio_list);
-               if (!bio)
-                       break;
-
+       while ((bio = bio_list_pop(&bio_list))) {
                bio->bi_private = rbio;
                bio->bi_end_io = raid56_parity_scrub_end_io;
                bio->bi_opf = REQ_OP_READ;
index af92525..7f03dbe 100644 (file)
@@ -286,6 +286,8 @@ static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
                        exist_re = insert_root_entry(&exist->roots, re);
                        if (exist_re)
                                kfree(re);
+               } else {
+                       kfree(re);
                }
                kfree(be);
                return exist;
index 040009d..5cd0251 100644 (file)
@@ -68,8 +68,8 @@ static int copy_inline_to_page(struct inode *inode,
         * reservation here. Also we must not do the reservation while holding
         * a transaction open, otherwise we would deadlock.
         */
-       ret = btrfs_delalloc_reserve_space(inode, &data_reserved, file_offset,
-                                          block_size);
+       ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
+                                          file_offset, block_size);
        if (ret)
                goto out;
 
@@ -84,7 +84,8 @@ static int copy_inline_to_page(struct inode *inode,
        clear_extent_bit(&BTRFS_I(inode)->io_tree, file_offset, range_end,
                         EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                         0, 0, NULL);
-       ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode), file_offset, range_end,
+                                       0, NULL);
        if (ret)
                goto out_unlock;
 
@@ -133,8 +134,8 @@ out_unlock:
                put_page(page);
        }
        if (ret)
-               btrfs_delalloc_release_space(inode, data_reserved, file_offset,
-                                            block_size, true);
+               btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+                                            file_offset, block_size, true);
        btrfs_delalloc_release_extents(BTRFS_I(inode), block_size);
 out:
        extent_changeset_free(data_reserved);
@@ -336,6 +337,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
        while (1) {
                u64 next_key_min_offset = key.offset + 1;
                struct btrfs_file_extent_item *extent;
+               u64 extent_gen;
                int type;
                u32 size;
                struct btrfs_key new_key;
@@ -384,6 +386,7 @@ process_slot:
 
                extent = btrfs_item_ptr(leaf, slot,
                                        struct btrfs_file_extent_item);
+               extent_gen = btrfs_file_extent_generation(leaf, extent);
                comp = btrfs_file_extent_compression(leaf, extent);
                type = btrfs_file_extent_type(leaf, extent);
                if (type == BTRFS_FILE_EXTENT_REG ||
@@ -488,6 +491,19 @@ process_slot:
 
                btrfs_release_path(path);
 
+               /*
+                * If this is a new extent update the last_reflink_trans of both
+                * inodes. This is used by fsync to make sure it does not log
+                * multiple checksum items with overlapping ranges. For older
+                * extents we don't need to do it since inode logging skips the
+                * checksums for older extents. Also ignore holes and inline
+                * extents because they don't have checksums in the csum tree.
+                */
+               if (extent_gen == trans->transid && disko > 0) {
+                       BTRFS_I(src)->last_reflink_trans = trans->transid;
+                       BTRFS_I(inode)->last_reflink_trans = trans->transid;
+               }
+
                last_dest_end = ALIGN(new_key.offset + datal,
                                      fs_info->sectorsize);
                ret = clone_finish_inode_update(trans, inode, last_dest_end,
index 3bbae80..4ba1ab9 100644 (file)
@@ -1686,12 +1686,20 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                btrfs_unlock_up_safe(path, 0);
        }
 
-       min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
+       /*
+        * In merge_reloc_root(), we modify the upper level pointer to swap the
+        * tree blocks between reloc tree and subvolume tree.  Thus for tree
+        * block COW, we COW at most from level 1 to root level for each tree.
+        *
+        * Thus the needed metadata size is at most root_level * nodesize,
+        * and * 2 since we have two trees to COW.
+        */
+       min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2;
        memset(&next_key, 0, sizeof(next_key));
 
        while (1) {
                ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
-                                            BTRFS_RESERVE_FLUSH_ALL);
+                                            BTRFS_RESERVE_FLUSH_LIMIT);
                if (ret) {
                        err = ret;
                        goto out;
@@ -2571,58 +2579,50 @@ out_free_blocks:
        return err;
 }
 
-static noinline_for_stack
-int prealloc_file_extent_cluster(struct inode *inode,
-                                struct file_extent_cluster *cluster)
+static noinline_for_stack int prealloc_file_extent_cluster(
+                               struct btrfs_inode *inode,
+                               struct file_extent_cluster *cluster)
 {
        u64 alloc_hint = 0;
        u64 start;
        u64 end;
-       u64 offset = BTRFS_I(inode)->index_cnt;
+       u64 offset = inode->index_cnt;
        u64 num_bytes;
-       int nr = 0;
+       int nr;
        int ret = 0;
        u64 prealloc_start = cluster->start - offset;
        u64 prealloc_end = cluster->end - offset;
-       u64 cur_offset;
-       struct extent_changeset *data_reserved = NULL;
+       u64 cur_offset = prealloc_start;
 
        BUG_ON(cluster->start != cluster->boundary[0]);
-       inode_lock(inode);
-
-       ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
-                                         prealloc_end + 1 - prealloc_start);
+       ret = btrfs_alloc_data_chunk_ondemand(inode,
+                                             prealloc_end + 1 - prealloc_start);
        if (ret)
-               goto out;
+               return ret;
 
-       cur_offset = prealloc_start;
-       while (nr < cluster->nr) {
+       inode_lock(&inode->vfs_inode);
+       for (nr = 0; nr < cluster->nr; nr++) {
                start = cluster->boundary[nr] - offset;
                if (nr + 1 < cluster->nr)
                        end = cluster->boundary[nr + 1] - 1 - offset;
                else
                        end = cluster->end - offset;
 
-               lock_extent(&BTRFS_I(inode)->io_tree, start, end);
+               lock_extent(&inode->io_tree, start, end);
                num_bytes = end + 1 - start;
-               if (cur_offset < start)
-                       btrfs_free_reserved_data_space(inode, data_reserved,
-                                       cur_offset, start - cur_offset);
-               ret = btrfs_prealloc_file_range(inode, 0, start,
+               ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
                                                num_bytes, num_bytes,
                                                end + 1, &alloc_hint);
                cur_offset = end + 1;
-               unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+               unlock_extent(&inode->io_tree, start, end);
                if (ret)
                        break;
-               nr++;
        }
+       inode_unlock(&inode->vfs_inode);
+
        if (cur_offset < prealloc_end)
-               btrfs_free_reserved_data_space(inode, data_reserved,
-                               cur_offset, prealloc_end + 1 - cur_offset);
-out:
-       inode_unlock(inode);
-       extent_changeset_free(data_reserved);
+               btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
+                                              prealloc_end + 1 - cur_offset);
        return ret;
 }
 
@@ -2664,7 +2664,8 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
  */
 int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
 {
-       return atomic_read(&fs_info->balance_cancel_req);
+       return atomic_read(&fs_info->balance_cancel_req) ||
+               fatal_signal_pending(current);
 }
 ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
 
@@ -2690,7 +2691,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
        if (!ra)
                return -ENOMEM;
 
-       ret = prealloc_file_extent_cluster(inode, cluster);
+       ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
        if (ret)
                goto out;
 
@@ -2762,8 +2763,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        nr++;
                }
 
-               ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
-                                               NULL);
+               ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
+                                               page_end, 0, NULL);
                if (ret) {
                        unlock_page(page);
                        put_page(page);
@@ -3872,9 +3873,9 @@ out:
  * cloning checksum properly handles the nodatasum extents.
  * it also saves CPU time to re-calculate the checksum.
  */
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
+int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_ordered_sum *sums;
        struct btrfs_ordered_extent *ordered;
        int ret;
@@ -3885,7 +3886,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
        ordered = btrfs_lookup_ordered_extent(inode, file_pos);
        BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
 
-       disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
+       disk_bytenr = file_pos + inode->index_cnt;
        ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
                                       disk_bytenr + len - 1, &list, 0);
        if (ret)
index 016a025..5a6cb9d 100644 (file)
@@ -1616,13 +1616,9 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
        struct scrub_page *spage = sblock->pagev[page_num];
 
        BUG_ON(spage->page == NULL);
-       if (spage->io_error) {
-               void *mapped_buffer = kmap_atomic(spage->page);
+       if (spage->io_error)
+               clear_page(page_address(spage->page));
 
-               clear_page(mapped_buffer);
-               flush_dcache_page(spage->page);
-               kunmap_atomic(mapped_buffer);
-       }
        return scrub_add_page_to_wr_bio(sblock->sctx, spage);
 }
 
@@ -1790,42 +1786,21 @@ static int scrub_checksum_data(struct scrub_block *sblock)
        struct btrfs_fs_info *fs_info = sctx->fs_info;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        u8 csum[BTRFS_CSUM_SIZE];
-       u8 *on_disk_csum;
-       struct page *page;
-       void *buffer;
-       u64 len;
-       int index;
+       struct scrub_page *spage;
+       char *kaddr;
 
        BUG_ON(sblock->page_count < 1);
-       if (!sblock->pagev[0]->have_csum)
+       spage = sblock->pagev[0];
+       if (!spage->have_csum)
                return 0;
 
+       kaddr = page_address(spage->page);
+
        shash->tfm = fs_info->csum_shash;
        crypto_shash_init(shash);
+       crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
 
-       on_disk_csum = sblock->pagev[0]->csum;
-       page = sblock->pagev[0]->page;
-       buffer = kmap_atomic(page);
-
-       len = sctx->fs_info->sectorsize;
-       index = 0;
-       for (;;) {
-               u64 l = min_t(u64, len, PAGE_SIZE);
-
-               crypto_shash_update(shash, buffer, l);
-               kunmap_atomic(buffer);
-               len -= l;
-               if (len == 0)
-                       break;
-               index++;
-               BUG_ON(index >= sblock->page_count);
-               BUG_ON(!sblock->pagev[index]->page);
-               page = sblock->pagev[index]->page;
-               buffer = kmap_atomic(page);
-       }
-
-       crypto_shash_final(shash, csum);
-       if (memcmp(csum, on_disk_csum, sctx->csum_size))
+       if (memcmp(csum, spage->csum, sctx->csum_size))
                sblock->checksum_error = 1;
 
        return sblock->checksum_error;
@@ -1839,20 +1814,15 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        u8 calculated_csum[BTRFS_CSUM_SIZE];
        u8 on_disk_csum[BTRFS_CSUM_SIZE];
-       struct page *page;
-       void *mapped_buffer;
-       u64 mapped_size;
-       void *p;
-       u64 len;
-       int index;
-
-       shash->tfm = fs_info->csum_shash;
-       crypto_shash_init(shash);
+       const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT;
+       int i;
+       struct scrub_page *spage;
+       char *kaddr;
 
        BUG_ON(sblock->page_count < 1);
-       page = sblock->pagev[0]->page;
-       mapped_buffer = kmap_atomic(page);
-       h = (struct btrfs_header *)mapped_buffer;
+       spage = sblock->pagev[0];
+       kaddr = page_address(spage->page);
+       h = (struct btrfs_header *)kaddr;
        memcpy(on_disk_csum, h->csum, sctx->csum_size);
 
        /*
@@ -1860,40 +1830,29 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
         * a) don't have an extent buffer and
         * b) the page is already kmapped
         */
-       if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
+       if (spage->logical != btrfs_stack_header_bytenr(h))
                sblock->header_error = 1;
 
-       if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
+       if (spage->generation != btrfs_stack_header_generation(h)) {
                sblock->header_error = 1;
                sblock->generation_error = 1;
        }
 
-       if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
+       if (!scrub_check_fsid(h->fsid, spage))
                sblock->header_error = 1;
 
        if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
                   BTRFS_UUID_SIZE))
                sblock->header_error = 1;
 
-       len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
-       mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
-       p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
-       index = 0;
-       for (;;) {
-               u64 l = min_t(u64, len, mapped_size);
+       shash->tfm = fs_info->csum_shash;
+       crypto_shash_init(shash);
+       crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
+                           PAGE_SIZE - BTRFS_CSUM_SIZE);
 
-               crypto_shash_update(shash, p, l);
-               kunmap_atomic(mapped_buffer);
-               len -= l;
-               if (len == 0)
-                       break;
-               index++;
-               BUG_ON(index >= sblock->page_count);
-               BUG_ON(!sblock->pagev[index]->page);
-               page = sblock->pagev[index]->page;
-               mapped_buffer = kmap_atomic(page);
-               mapped_size = PAGE_SIZE;
-               p = mapped_buffer;
+       for (i = 1; i < num_pages; i++) {
+               kaddr = page_address(sblock->pagev[i]->page);
+               crypto_shash_update(shash, kaddr, PAGE_SIZE);
        }
 
        crypto_shash_final(shash, calculated_csum);
@@ -1910,57 +1869,31 @@ static int scrub_checksum_super(struct scrub_block *sblock)
        struct btrfs_fs_info *fs_info = sctx->fs_info;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        u8 calculated_csum[BTRFS_CSUM_SIZE];
-       u8 on_disk_csum[BTRFS_CSUM_SIZE];
-       struct page *page;
-       void *mapped_buffer;
-       u64 mapped_size;
-       void *p;
+       struct scrub_page *spage;
+       char *kaddr;
        int fail_gen = 0;
        int fail_cor = 0;
-       u64 len;
-       int index;
-
-       shash->tfm = fs_info->csum_shash;
-       crypto_shash_init(shash);
 
        BUG_ON(sblock->page_count < 1);
-       page = sblock->pagev[0]->page;
-       mapped_buffer = kmap_atomic(page);
-       s = (struct btrfs_super_block *)mapped_buffer;
-       memcpy(on_disk_csum, s->csum, sctx->csum_size);
+       spage = sblock->pagev[0];
+       kaddr = page_address(spage->page);
+       s = (struct btrfs_super_block *)kaddr;
 
-       if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
+       if (spage->logical != btrfs_super_bytenr(s))
                ++fail_cor;
 
-       if (sblock->pagev[0]->generation != btrfs_super_generation(s))
+       if (spage->generation != btrfs_super_generation(s))
                ++fail_gen;
 
-       if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
+       if (!scrub_check_fsid(s->fsid, spage))
                ++fail_cor;
 
-       len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
-       mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
-       p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
-       index = 0;
-       for (;;) {
-               u64 l = min_t(u64, len, mapped_size);
-
-               crypto_shash_update(shash, p, l);
-               kunmap_atomic(mapped_buffer);
-               len -= l;
-               if (len == 0)
-                       break;
-               index++;
-               BUG_ON(index >= sblock->page_count);
-               BUG_ON(!sblock->pagev[index]->page);
-               page = sblock->pagev[index]->page;
-               mapped_buffer = kmap_atomic(page);
-               mapped_size = PAGE_SIZE;
-               p = mapped_buffer;
-       }
+       shash->tfm = fs_info->csum_shash;
+       crypto_shash_init(shash);
+       crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
+                       BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
 
-       crypto_shash_final(shash, calculated_csum);
-       if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
+       if (memcmp(calculated_csum, s->csum, sctx->csum_size))
                ++fail_cor;
 
        if (fail_cor + fail_gen) {
@@ -1973,10 +1906,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
                ++sctx->stat.super_errors;
                spin_unlock(&sctx->stat_lock);
                if (fail_cor)
-                       btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
+                       btrfs_dev_stat_inc_and_print(spage->dev,
                                BTRFS_DEV_STAT_CORRUPTION_ERRS);
                else
-                       btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
+                       btrfs_dev_stat_inc_and_print(spage->dev,
                                BTRFS_DEV_STAT_GENERATION_ERRS);
        }
 
@@ -3758,7 +3691,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
        struct btrfs_fs_info *fs_info = sctx->fs_info;
 
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
-               return -EIO;
+               return -EROFS;
 
        /* Seed devices of a new filesystem has their own generation. */
        if (scrub_dev->fs_devices != fs_info->fs_devices)
index c7bd3fd..475968c 100644 (file)
@@ -468,8 +468,8 @@ again:
                        "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
                        cache->start, cache->length, cache->used, cache->pinned,
                        cache->reserved, cache->ro ? "[readonly]" : "");
-               btrfs_dump_free_space(cache, bytes);
                spin_unlock(&cache->lock);
+               btrfs_dump_free_space(cache, bytes);
        }
        if (++index < BTRFS_NR_RAID_TYPES)
                goto again;
index c3826ae..5a9dc31 100644 (file)
@@ -67,6 +67,21 @@ static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
+/*
+ * Generally the error codes correspond to their respective errors, but there
+ * are a few special cases.
+ *
+ * EUCLEAN: Any sort of corruption that we encounter.  The tree-checker for
+ *          instance will return EUCLEAN if any of the blocks are corrupted in
+ *          a way that is problematic.  We want to reserve EUCLEAN for these
+ *          sort of corruptions.
+ *
+ * EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
+ *        need to use EROFS for this case.  We will have no idea of the
+ *        original failure, that will have been reported at the time we tripped
+ *        over the error.  Each subsequent error that doesn't have any context
+ *        of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
+ */
 const char * __attribute_const__ btrfs_decode_error(int errno)
 {
        char *errstr = "unknown";
@@ -326,7 +341,6 @@ enum {
        Opt_defrag, Opt_nodefrag,
        Opt_discard, Opt_nodiscard,
        Opt_discard_mode,
-       Opt_nologreplay,
        Opt_norecovery,
        Opt_ratio,
        Opt_rescan_uuid_tree,
@@ -340,13 +354,15 @@ enum {
        Opt_subvolid,
        Opt_thread_pool,
        Opt_treelog, Opt_notreelog,
-       Opt_usebackuproot,
        Opt_user_subvol_rm_allowed,
 
+       /* Rescue options */
+       Opt_rescue,
+       Opt_usebackuproot,
+       Opt_nologreplay,
+
        /* Deprecated options */
-       Opt_alloc_start,
        Opt_recovery,
-       Opt_subvolrootid,
 
        /* Debugging options */
        Opt_check_integrity,
@@ -390,7 +406,6 @@ static const match_table_t tokens = {
        {Opt_discard, "discard"},
        {Opt_discard_mode, "discard=%s"},
        {Opt_nodiscard, "nodiscard"},
-       {Opt_nologreplay, "nologreplay"},
        {Opt_norecovery, "norecovery"},
        {Opt_ratio, "metadata_ratio=%u"},
        {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
@@ -408,13 +423,17 @@ static const match_table_t tokens = {
        {Opt_thread_pool, "thread_pool=%u"},
        {Opt_treelog, "treelog"},
        {Opt_notreelog, "notreelog"},
-       {Opt_usebackuproot, "usebackuproot"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
 
+       /* Rescue options */
+       {Opt_rescue, "rescue=%s"},
+       /* Deprecated, with alias rescue=nologreplay */
+       {Opt_nologreplay, "nologreplay"},
+       /* Deprecated, with alias rescue=usebackuproot */
+       {Opt_usebackuproot, "usebackuproot"},
+
        /* Deprecated options */
-       {Opt_alloc_start, "alloc_start=%s"},
        {Opt_recovery, "recovery"},
-       {Opt_subvolrootid, "subvolrootid=%d"},
 
        /* Debugging options */
        {Opt_check_integrity, "check_int"},
@@ -433,6 +452,55 @@ static const match_table_t tokens = {
        {Opt_err, NULL},
 };
 
+static const match_table_t rescue_tokens = {
+       {Opt_usebackuproot, "usebackuproot"},
+       {Opt_nologreplay, "nologreplay"},
+       {Opt_err, NULL},
+};
+
+static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
+{
+       char *opts;
+       char *orig;
+       char *p;
+       substring_t args[MAX_OPT_ARGS];
+       int ret = 0;
+
+       opts = kstrdup(options, GFP_KERNEL);
+       if (!opts)
+               return -ENOMEM;
+       orig = opts;
+
+       while ((p = strsep(&opts, ":")) != NULL) {
+               int token;
+
+               if (!*p)
+                       continue;
+               token = match_token(p, rescue_tokens, args);
+               switch (token){
+               case Opt_usebackuproot:
+                       btrfs_info(info,
+                                  "trying to use backup root at mount time");
+                       btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
+                       break;
+               case Opt_nologreplay:
+                       btrfs_set_and_info(info, NOLOGREPLAY,
+                                          "disabling log replay at mount time");
+                       break;
+               case Opt_err:
+                       btrfs_info(info, "unrecognized rescue option '%s'", p);
+                       ret = -EINVAL;
+                       goto out;
+               default:
+                       break;
+               }
+
+       }
+out:
+       kfree(orig);
+       return ret;
+}
+
 /*
  * Regular mount options parser.  Everything that is needed only when
  * reading in a new superblock is parsed here.
@@ -479,7 +547,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                case Opt_subvol:
                case Opt_subvol_empty:
                case Opt_subvolid:
-               case Opt_subvolrootid:
                case Opt_device:
                        /*
                         * These are parsed by btrfs_parse_subvol_options or
@@ -663,10 +730,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                                goto out;
                        }
                        break;
-               case Opt_alloc_start:
-                       btrfs_info(info,
-                               "option alloc_start is obsolete, ignored");
-                       break;
                case Opt_acl:
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
                        info->sb->s_flags |= SB_POSIXACL;
@@ -689,6 +752,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                        break;
                case Opt_norecovery:
                case Opt_nologreplay:
+                       btrfs_warn(info,
+               "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
                        btrfs_set_and_info(info, NOLOGREPLAY,
                                           "disabling log replay at mount time");
                        break;
@@ -762,6 +827,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                        }
                        break;
                case Opt_inode_cache:
+                       btrfs_warn(info,
+       "the 'inode_cache' option is deprecated and will have no effect from 5.11");
                        btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
                                           "enabling inode map caching");
                        break;
@@ -791,10 +858,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                                             "disabling auto defrag");
                        break;
                case Opt_recovery:
-                       btrfs_warn(info,
-                                  "'recovery' is deprecated, use 'usebackuproot' instead");
-                       fallthrough;
                case Opt_usebackuproot:
+                       btrfs_warn(info,
+                       "'%s' is deprecated, use 'rescue=usebackuproot' instead",
+                                  token == Opt_recovery ? "recovery" :
+                                  "usebackuproot");
                        btrfs_info(info,
                                   "trying to use backup root at mount time");
                        btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
@@ -859,6 +927,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                        }
                        info->commit_interval = intarg;
                        break;
+               case Opt_rescue:
+                       ret = parse_rescue_options(info, args[0].from);
+                       if (ret < 0)
+                               goto out;
+                       break;
 #ifdef CONFIG_BTRFS_DEBUG
                case Opt_fragment_all:
                        btrfs_info(info, "fragmenting all space");
@@ -1020,9 +1093,6 @@ static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
 
                        *subvol_objectid = subvolid;
                        break;
-               case Opt_subvolrootid:
-                       pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
-                       break;
                default:
                        break;
                }
@@ -1344,7 +1414,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
        if (btrfs_test_opt(info, NOTREELOG))
                seq_puts(seq, ",notreelog");
        if (btrfs_test_opt(info, NOLOGREPLAY))
-               seq_puts(seq, ",nologreplay");
+               seq_puts(seq, ",rescue=nologreplay");
        if (btrfs_test_opt(info, FLUSHONCOMMIT))
                seq_puts(seq, ",flushoncommit");
        if (btrfs_test_opt(info, DISCARD_SYNC))
@@ -1712,11 +1782,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
                                new_pool_size);
 }
 
-static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
-{
-       set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
-}
-
 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
                                       unsigned long old_opts, int flags)
 {
@@ -1750,8 +1815,6 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
        else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
                 !btrfs_test_opt(fs_info, DISCARD_ASYNC))
                btrfs_discard_cleanup(fs_info);
-
-       clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
 }
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -1767,7 +1830,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
        int ret;
 
        sync_filesystem(sb);
-       btrfs_remount_prepare(fs_info);
+       set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
 
        if (data) {
                void *new_sec_opts = NULL;
@@ -1889,6 +1952,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 out:
        wake_up_process(fs_info->transaction_kthread);
        btrfs_remount_cleanup(fs_info, old_opts);
+       clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+
        return 0;
 
 restore:
@@ -1903,6 +1968,8 @@ restore:
                old_thread_pool_size, fs_info->thread_pool_size);
        fs_info->metadata_ratio = old_metadata_ratio;
        btrfs_remount_cleanup(fs_info, old_opts);
+       clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+
        return ret;
 }
 
@@ -2296,9 +2363,7 @@ static int btrfs_unfreeze(struct super_block *sb)
 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
-       struct btrfs_fs_devices *cur_devices;
        struct btrfs_device *dev, *first_dev = NULL;
-       struct list_head *head;
 
        /*
         * Lightweight locking of the devices. We should not need
@@ -2308,18 +2373,13 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
         * least until the rcu_read_unlock.
         */
        rcu_read_lock();
-       cur_devices = fs_info->fs_devices;
-       while (cur_devices) {
-               head = &cur_devices->devices;
-               list_for_each_entry_rcu(dev, head, dev_list) {
-                       if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
-                               continue;
-                       if (!dev->name)
-                               continue;
-                       if (!first_dev || dev->devid < first_dev->devid)
-                               first_dev = dev;
-               }
-               cur_devices = cur_devices->seed;
+       list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
+                       continue;
+               if (!dev->name)
+                       continue;
+               if (!first_dev || dev->devid < first_dev->devid)
+                       first_dev = dev;
        }
 
        if (first_dev)
index a39bff6..104c80c 100644 (file)
@@ -19,6 +19,7 @@
 #include "volumes.h"
 #include "space-info.h"
 #include "block-group.h"
+#include "qgroup.h"
 
 struct btrfs_feature_attr {
        struct kobj_attribute kobj_attr;
@@ -936,8 +937,12 @@ void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
 
 void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
 {
+       struct kobject *fsid_kobj = &fs_info->fs_devices->fsid_kobj;
+
        btrfs_reset_fs_info_ptr(fs_info);
 
+       sysfs_remove_link(fsid_kobj, "bdi");
+
        if (fs_info->space_info_kobj) {
                sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
                kobject_del(fs_info->space_info_kobj);
@@ -957,8 +962,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
        }
 #endif
        addrm_unknown_feature_attrs(fs_info, false);
-       sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
-       sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       sysfs_remove_files(fsid_kobj, btrfs_attrs);
        btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, NULL);
 }
 
@@ -1273,7 +1278,9 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
 {
        int error = 0;
        struct btrfs_device *dev;
+       unsigned int nofs_flag;
 
+       nofs_flag = memalloc_nofs_save();
        list_for_each_entry(dev, &fs_devices->devices, dev_list) {
 
                if (one_device && one_device != dev)
@@ -1301,6 +1308,7 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
                        break;
                }
        }
+       memalloc_nofs_restore(nofs_flag);
 
        return error;
 }
@@ -1438,6 +1446,10 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
        if (error)
                goto failure;
 
+       error = sysfs_create_link(fsid_kobj, &fs_info->sb->s_bdi->dev->kobj, "bdi");
+       if (error)
+               goto failure;
+
        fs_info->space_info_kobj = kobject_create_and_add("allocation",
                                                  fsid_kobj);
        if (!fs_info->space_info_kobj) {
@@ -1455,6 +1467,153 @@ failure:
        return error;
 }
 
+static inline struct btrfs_fs_info *qgroup_kobj_to_fs_info(struct kobject *kobj)
+{
+       return to_fs_info(kobj->parent->parent);
+}
+
+#define QGROUP_ATTR(_member, _show_name)                                       \
+static ssize_t btrfs_qgroup_show_##_member(struct kobject *qgroup_kobj,                \
+                                          struct kobj_attribute *a,            \
+                                          char *buf)                           \
+{                                                                              \
+       struct btrfs_fs_info *fs_info = qgroup_kobj_to_fs_info(qgroup_kobj);    \
+       struct btrfs_qgroup *qgroup = container_of(qgroup_kobj,                 \
+                       struct btrfs_qgroup, kobj);                             \
+       return btrfs_show_u64(&qgroup->_member, &fs_info->qgroup_lock, buf);    \
+}                                                                              \
+BTRFS_ATTR(qgroup, _show_name, btrfs_qgroup_show_##_member)
+
+#define QGROUP_RSV_ATTR(_name, _type)                                          \
+static ssize_t btrfs_qgroup_rsv_show_##_name(struct kobject *qgroup_kobj,      \
+                                            struct kobj_attribute *a,          \
+                                            char *buf)                         \
+{                                                                              \
+       struct btrfs_fs_info *fs_info = qgroup_kobj_to_fs_info(qgroup_kobj);    \
+       struct btrfs_qgroup *qgroup = container_of(qgroup_kobj,                 \
+                       struct btrfs_qgroup, kobj);                             \
+       return btrfs_show_u64(&qgroup->rsv.values[_type],                       \
+                       &fs_info->qgroup_lock, buf);                            \
+}                                                                              \
+BTRFS_ATTR(qgroup, rsv_##_name, btrfs_qgroup_rsv_show_##_name)
+
+QGROUP_ATTR(rfer, referenced);
+QGROUP_ATTR(excl, exclusive);
+QGROUP_ATTR(max_rfer, max_referenced);
+QGROUP_ATTR(max_excl, max_exclusive);
+QGROUP_ATTR(lim_flags, limit_flags);
+QGROUP_RSV_ATTR(data, BTRFS_QGROUP_RSV_DATA);
+QGROUP_RSV_ATTR(meta_pertrans, BTRFS_QGROUP_RSV_META_PERTRANS);
+QGROUP_RSV_ATTR(meta_prealloc, BTRFS_QGROUP_RSV_META_PREALLOC);
+
+static struct attribute *qgroup_attrs[] = {
+       BTRFS_ATTR_PTR(qgroup, referenced),
+       BTRFS_ATTR_PTR(qgroup, exclusive),
+       BTRFS_ATTR_PTR(qgroup, max_referenced),
+       BTRFS_ATTR_PTR(qgroup, max_exclusive),
+       BTRFS_ATTR_PTR(qgroup, limit_flags),
+       BTRFS_ATTR_PTR(qgroup, rsv_data),
+       BTRFS_ATTR_PTR(qgroup, rsv_meta_pertrans),
+       BTRFS_ATTR_PTR(qgroup, rsv_meta_prealloc),
+       NULL
+};
+ATTRIBUTE_GROUPS(qgroup);
+
+static void qgroup_release(struct kobject *kobj)
+{
+       struct btrfs_qgroup *qgroup = container_of(kobj, struct btrfs_qgroup, kobj);
+
+       memset(&qgroup->kobj, 0, sizeof(*kobj));
+}
+
+static struct kobj_type qgroup_ktype = {
+       .sysfs_ops = &kobj_sysfs_ops,
+       .release = qgroup_release,
+       .default_groups = qgroup_groups,
+};
+
+int btrfs_sysfs_add_one_qgroup(struct btrfs_fs_info *fs_info,
+                               struct btrfs_qgroup *qgroup)
+{
+       struct kobject *qgroups_kobj = fs_info->qgroups_kobj;
+       int ret;
+
+       if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
+               return 0;
+       if (qgroup->kobj.state_initialized)
+               return 0;
+       if (!qgroups_kobj)
+               return -EINVAL;
+
+       ret = kobject_init_and_add(&qgroup->kobj, &qgroup_ktype, qgroups_kobj,
+                       "%hu_%llu", btrfs_qgroup_level(qgroup->qgroupid),
+                       btrfs_qgroup_subvolid(qgroup->qgroupid));
+       if (ret < 0)
+               kobject_put(&qgroup->kobj);
+
+       return ret;
+}
+
+void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_qgroup *qgroup;
+       struct btrfs_qgroup *next;
+
+       if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
+               return;
+
+       rbtree_postorder_for_each_entry_safe(qgroup, next,
+                                            &fs_info->qgroup_tree, node)
+               btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
+       kobject_del(fs_info->qgroups_kobj);
+       kobject_put(fs_info->qgroups_kobj);
+       fs_info->qgroups_kobj = NULL;
+}
+
+/* Called when qgroups get initialized, thus there is no need for locking */
+int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info)
+{
+       struct kobject *fsid_kobj = &fs_info->fs_devices->fsid_kobj;
+       struct btrfs_qgroup *qgroup;
+       struct btrfs_qgroup *next;
+       int ret = 0;
+
+       if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
+               return 0;
+
+       ASSERT(fsid_kobj);
+       if (fs_info->qgroups_kobj)
+               return 0;
+
+       fs_info->qgroups_kobj = kobject_create_and_add("qgroups", fsid_kobj);
+       if (!fs_info->qgroups_kobj) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       rbtree_postorder_for_each_entry_safe(qgroup, next,
+                                            &fs_info->qgroup_tree, node) {
+               ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
+               if (ret < 0)
+                       goto out;
+       }
+
+out:
+       if (ret < 0)
+               btrfs_sysfs_del_qgroups(fs_info);
+       return ret;
+}
+
+void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
+                               struct btrfs_qgroup *qgroup)
+{
+       if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
+               return;
+
+       if (qgroup->kobj.state_initialized) {
+               kobject_del(&qgroup->kobj);
+               kobject_put(&qgroup->kobj);
+       }
+}
 
 /*
  * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
index 718a26c..cf839c4 100644 (file)
@@ -36,4 +36,11 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
 void btrfs_sysfs_update_devid(struct btrfs_device *device);
 
+int btrfs_sysfs_add_one_qgroup(struct btrfs_fs_info *fs_info,
+                               struct btrfs_qgroup *qgroup);
+void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info);
+int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info);
+void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
+                               struct btrfs_qgroup *qgroup);
+
 #endif
index 914eea5..2c783d2 100644 (file)
@@ -60,8 +60,6 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
                                if (prev_bit == 0 && bit == 1) {
                                        extent_start = offset;
                                } else if (prev_bit == 1 && bit == 0) {
-                                       if (i >= num_extents)
-                                               goto invalid;
                                        if (i >= num_extents ||
                                            extent_start != extents[i].start ||
                                            offset - extent_start != extents[i].length)
index 24a8c71..894a63a 100644 (file)
@@ -954,8 +954,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        btrfs_test_inode_set_ops(inode);
 
        /* [BTRFS_MAX_EXTENT_SIZE] */
-       ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
-                                       NULL);
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode), 0,
+                                       BTRFS_MAX_EXTENT_SIZE - 1, 0, NULL);
        if (ret) {
                test_err("btrfs_set_extent_delalloc returned %d", ret);
                goto out;
@@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        }
 
        /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
-       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode), BTRFS_MAX_EXTENT_SIZE,
                                        BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
                                        0, NULL);
        if (ret) {
@@ -999,7 +999,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        }
 
        /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
-       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode), BTRFS_MAX_EXTENT_SIZE >> 1,
                                        (BTRFS_MAX_EXTENT_SIZE >> 1)
                                        + sectorsize - 1,
                                        0, NULL);
@@ -1017,7 +1017,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        /*
         * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
         */
-       ret = btrfs_set_extent_delalloc(inode,
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
                        BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
                        (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
                        0, NULL);
@@ -1035,7 +1035,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        /*
        * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
        */
-       ret = btrfs_set_extent_delalloc(inode,
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
                        BTRFS_MAX_EXTENT_SIZE + sectorsize,
                        BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL);
        if (ret) {
@@ -1069,7 +1069,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
         * Refill the hole again just for good measure, because I thought it
         * might fail and I'd rather satisfy my paranoia at this point.
         */
-       ret = btrfs_set_extent_delalloc(inode,
+       ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
                        BTRFS_MAX_EXTENT_SIZE + sectorsize,
                        BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL);
        if (ret) {
index b359d4b..20c6ac1 100644 (file)
@@ -937,7 +937,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        if (TRANS_ABORTED(trans) ||
            test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
                wake_up_process(info->transaction_kthread);
-               err = -EIO;
+               if (TRANS_ABORTED(trans))
+                       err = trans->aborted;
+               else
+                       err = -EROFS;
        }
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -1630,7 +1633,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        }
 
        key.offset = (u64)-1;
-       pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
+       pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
        if (IS_ERR(pending->snap)) {
                ret = PTR_ERR(pending->snap);
                btrfs_abort_transaction(trans, ret);
@@ -2351,7 +2354,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
         */
        cur_trans->state = TRANS_STATE_COMPLETED;
        wake_up(&cur_trans->commit_wait);
-       clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
 
        spin_lock(&fs_info->trans_lock);
        list_del_init(&cur_trans->list);
index bf102e6..d60b055 100644 (file)
@@ -151,18 +151,20 @@ struct btrfs_pending_snapshot {
        struct btrfs_block_rsv block_rsv;
        /* extra metadata reservation for relocation */
        int error;
+       /* Preallocated anonymous block device number */
+       dev_t anon_dev;
        bool readonly;
        struct list_head list;
 };
 
 static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
-                                             struct inode *inode)
+                                             struct btrfs_inode *inode)
 {
-       spin_lock(&BTRFS_I(inode)->lock);
-       BTRFS_I(inode)->last_trans = trans->transaction->transid;
-       BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
-       BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
-       spin_unlock(&BTRFS_I(inode)->lock);
+       spin_lock(&inode->lock);
+       inode->last_trans = trans->transaction->transid;
+       inode->last_sub_trans = inode->root->log_transid;
+       inode->last_log_commit = inode->root->last_log_commit;
+       spin_unlock(&inode->lock);
 }
 
 /*
@@ -208,20 +210,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
                                   int wait_for_unblock);
-
-/*
- * Try to commit transaction asynchronously, so this is safe to call
- * even holding a spinlock.
- *
- * It's done by informing transaction_kthread to commit transaction without
- * waiting for commit interval.
- */
-static inline void btrfs_commit_transaction_locksafe(
-               struct btrfs_fs_info *fs_info)
-{
-       set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
-       wake_up_process(fs_info->transaction_kthread);
-}
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
 void btrfs_throttle(struct btrfs_fs_info *fs_info);
index 16c3a6d..d3f28b8 100644 (file)
@@ -133,10 +133,9 @@ out:
                ret = 0;
        }
 done:
-       if (ret != -EAGAIN) {
+       if (ret != -EAGAIN)
                memset(&root->defrag_progress, 0,
                       sizeof(root->defrag_progress));
-               root->defrag_trans_start = trans->transid;
-       }
+
        return ret;
 }
index cd5348f..ea8136d 100644 (file)
@@ -3116,29 +3116,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        btrfs_init_log_ctx(&root_log_ctx, NULL);
 
        mutex_lock(&log_root_tree->log_mutex);
-       atomic_inc(&log_root_tree->log_batch);
-       atomic_inc(&log_root_tree->log_writers);
 
        index2 = log_root_tree->log_transid % 2;
        list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
        root_log_ctx.log_transid = log_root_tree->log_transid;
 
-       mutex_unlock(&log_root_tree->log_mutex);
-
-       mutex_lock(&log_root_tree->log_mutex);
-
        /*
         * Now we are safe to update the log_root_tree because we're under the
         * log_mutex, and we're a current writer so we're holding the commit
         * open until we drop the log_mutex.
         */
        ret = update_log_root(trans, log, &new_root_item);
-
-       if (atomic_dec_and_test(&log_root_tree->log_writers)) {
-               /* atomic_dec_and_test implies a barrier */
-               cond_wake_up_nomb(&log_root_tree->log_writer_wait);
-       }
-
        if (ret) {
                if (!list_empty(&root_log_ctx.list))
                        list_del_init(&root_log_ctx.list);
@@ -3184,8 +3172,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                                root_log_ctx.log_transid - 1);
        }
 
-       wait_for_writer(log_root_tree);
-
        /*
         * now that we've moved on to the tree of log tree roots,
         * check the full commit flag again
@@ -3906,6 +3892,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
 }
 
 static int log_csums(struct btrfs_trans_handle *trans,
+                    struct btrfs_inode *inode,
                     struct btrfs_root *log_root,
                     struct btrfs_ordered_sum *sums)
 {
@@ -3913,6 +3900,14 @@ static int log_csums(struct btrfs_trans_handle *trans,
        struct extent_state *cached_state = NULL;
        int ret;
 
+       /*
+        * If this inode was not used for reflink operations in the current
+        * transaction with new extents, then do the fast path, no need to
+        * worry about logging checksum items with overlapping ranges.
+        */
+       if (inode->last_reflink_trans < trans->transid)
+               return btrfs_csum_file_blocks(trans, log_root, sums);
+
        /*
         * Serialize logging for checksums. This is to avoid racing with the
         * same checksum being logged by another task that is logging another
@@ -4064,7 +4059,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                   struct btrfs_ordered_sum,
                                                   list);
                if (!ret)
-                       ret = log_csums(trans, log, sums);
+                       ret = log_csums(trans, inode, log, sums);
                list_del(&sums->list);
                kfree(sums);
        }
@@ -4123,7 +4118,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
                                                   struct btrfs_ordered_sum,
                                                   list);
                if (!ret)
-                       ret = log_csums(trans, log_root, sums);
+                       ret = log_csums(trans, inode, log_root, sums);
                list_del(&sums->list);
                kfree(sums);
        }
@@ -4151,7 +4146,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
+       ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
                                   em->start + em->len, NULL, 0, 1,
                                   sizeof(*fi), &extent_inserted);
        if (ret)
@@ -5123,14 +5118,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                           const loff_t end,
                           struct btrfs_log_ctx *ctx)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_path *path;
        struct btrfs_path *dst_path;
        struct btrfs_key min_key;
        struct btrfs_key max_key;
        struct btrfs_root *log = root->log_root;
        int err = 0;
-       int ret;
+       int ret = 0;
        bool fast_search = false;
        u64 ino = btrfs_ino(inode);
        struct extent_map_tree *em_tree = &inode->extent_tree;
@@ -5166,15 +5160,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        max_key.offset = (u64)-1;
 
        /*
-        * Only run delayed items if we are a dir or a new file.
-        * Otherwise commit the delayed inode only, which is needed in
-        * order for the log replay code to mark inodes for link count
-        * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
+        * Only run delayed items if we are a directory. We want to make sure
+        * all directory indexes hit the fs/subvolume tree so we can find them
+        * and figure out which index ranges have to be logged.
+        *
+        * Otherwise commit the delayed inode only if the full sync flag is set,
+        * as we want to make sure an up to date version is in the subvolume
+        * tree so copy_inode_items_to_log() / copy_items() can find it and copy
+        * it to the log tree. For a non full sync, we always log the inode item
+        * based on the in-memory struct btrfs_inode which is always up to date.
         */
-       if (S_ISDIR(inode->vfs_inode.i_mode) ||
-           inode->generation > fs_info->last_trans_committed)
+       if (S_ISDIR(inode->vfs_inode.i_mode))
                ret = btrfs_commit_inode_delayed_items(trans, inode);
-       else
+       else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
                ret = btrfs_commit_inode_delayed_inode(inode);
 
        if (ret) {
index f403fb1..d7670e2 100644 (file)
@@ -245,7 +245,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
  *
  * global::fs_devs - add, remove, updates to the global list
  *
- * does not protect: manipulation of the fs_devices::devices list!
+ * does not protect: manipulation of the fs_devices::devices list in general
+ * but in mount context it could be used to exclude list modifications by eg.
+ * scan ioctl
  *
  * btrfs_device::name - renames (write side), read is RCU
  *
@@ -258,6 +260,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
  * may be used to exclude some operations from running concurrently without any
  * modifications to the list (see write_all_supers)
  *
+ * Is not required at mount and close times, because our device list is
+ * protected by the uuid_mutex at that point.
+ *
  * balance_mutex
  * -------------
  * protects balance structures (status, state) and context accessed from
@@ -602,6 +607,11 @@ static int btrfs_free_stale_devices(const char *path,
        return ret;
 }
 
+/*
+ * This is only used on mount, and we are protected from competing things
+ * messing with our fs_devices by the uuid_mutex, thus we do not need the
+ * fs_devices->device_list_mutex here.
+ */
 static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
                        struct btrfs_device *device, fmode_t flags,
                        void *holder)
@@ -1229,8 +1239,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
        int ret;
 
        lockdep_assert_held(&uuid_mutex);
+       /*
+        * The device_list_mutex cannot be taken here in case opening the
+        * underlying device takes further locks like bd_mutex.
+        *
+        * We also don't need the lock here as this is called during mount and
+        * exclusion is provided by uuid_mutex
+        */
 
-       mutex_lock(&fs_devices->device_list_mutex);
        if (fs_devices->opened) {
                fs_devices->opened++;
                ret = 0;
@@ -1238,7 +1254,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                list_sort(NULL, &fs_devices->devices, devid_cmp);
                ret = open_fs_devices(fs_devices, flags, holder);
        }
-       mutex_unlock(&fs_devices->device_list_mutex);
 
        return ret;
 }
@@ -3231,7 +3246,7 @@ static int del_balance_item(struct btrfs_fs_info *fs_info)
        if (!path)
                return -ENOMEM;
 
-       trans = btrfs_start_transaction(root, 0);
+       trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
        if (IS_ERR(trans)) {
                btrfs_free_path(path);
                return PTR_ERR(trans);
@@ -4135,7 +4150,22 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        mutex_lock(&fs_info->balance_mutex);
        if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
                btrfs_info(fs_info, "balance: paused");
-       else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req))
+       /*
+        * Balance can be canceled by:
+        *
+        * - Regular cancel request
+        *   Then ret == -ECANCELED and balance_cancel_req > 0
+        *
+        * - Fatal signal to "btrfs" process
+        *   Either the signal caught by wait_reserve_ticket() and callers
+        *   got -EINTR, or caught by btrfs_should_cancel_balance() and
+        *   got -ECANCELED.
+        *   Either way, in this case balance_cancel_req = 0, and
+        *   ret == -EINTR or ret == -ECANCELED.
+        *
+        * So here we only check the return value to catch canceled balance.
+        */
+       else if (ret == -ECANCELED || ret == -EINTR)
                btrfs_info(fs_info, "balance: canceled");
        else
                btrfs_info(fs_info, "balance: ended with status: %d", ret);
@@ -5522,6 +5552,9 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
        atomic_set(&bbio->error, 0);
        refcount_set(&bbio->refs, 1);
 
+       bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
+       bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
+
        return bbio;
 }
 
@@ -6144,8 +6177,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                ret = -ENOMEM;
                goto out;
        }
-       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
-               bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
+
+       for (i = 0; i < num_stripes; i++) {
+               bbio->stripes[i].physical = map->stripes[stripe_index].physical +
+                       stripe_offset + stripe_nr * map->stripe_len;
+               bbio->stripes[i].dev = map->stripes[stripe_index].dev;
+               stripe_index++;
+       }
 
        /* build raid_map */
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
@@ -6153,11 +6191,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                u64 tmp;
                unsigned rot;
 
-               bbio->raid_map = (u64 *)((void *)bbio->stripes +
-                                sizeof(struct btrfs_bio_stripe) *
-                                num_alloc_stripes +
-                                sizeof(int) * tgtdev_indexes);
-
                /* Work out the disk rotation on this stripe-set */
                div_u64_rem(stripe_nr, num_stripes, &rot);
 
@@ -6171,25 +6204,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                if (map->type & BTRFS_BLOCK_GROUP_RAID6)
                        bbio->raid_map[(i+rot+1) % num_stripes] =
                                RAID6_Q_STRIPE;
-       }
-
 
-       for (i = 0; i < num_stripes; i++) {
-               bbio->stripes[i].physical =
-                       map->stripes[stripe_index].physical +
-                       stripe_offset +
-                       stripe_nr * map->stripe_len;
-               bbio->stripes[i].dev =
-                       map->stripes[stripe_index].dev;
-               stripe_index++;
+               sort_parity_stripes(bbio, num_stripes);
        }
 
        if (need_full_stripe(op))
                max_errors = btrfs_chunk_max_errors(map);
 
-       if (bbio->raid_map)
-               sort_parity_stripes(bbio, num_stripes);
-
        if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
            need_full_stripe(op)) {
                handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
@@ -6261,23 +6282,18 @@ static void btrfs_end_bio(struct bio *bio)
                atomic_inc(&bbio->error);
                if (bio->bi_status == BLK_STS_IOERR ||
                    bio->bi_status == BLK_STS_TARGET) {
-                       unsigned int stripe_index =
-                               btrfs_io_bio(bio)->stripe_index;
-                       struct btrfs_device *dev;
-
-                       BUG_ON(stripe_index >= bbio->num_stripes);
-                       dev = bbio->stripes[stripe_index].dev;
-                       if (dev->bdev) {
-                               if (bio_op(bio) == REQ_OP_WRITE)
-                                       btrfs_dev_stat_inc_and_print(dev,
+                       struct btrfs_device *dev = btrfs_io_bio(bio)->device;
+
+                       ASSERT(dev->bdev);
+                       if (bio_op(bio) == REQ_OP_WRITE)
+                               btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_WRITE_ERRS);
-                               else if (!(bio->bi_opf & REQ_RAHEAD))
-                                       btrfs_dev_stat_inc_and_print(dev,
+                       else if (!(bio->bi_opf & REQ_RAHEAD))
+                               btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_READ_ERRS);
-                               if (bio->bi_opf & REQ_PREFLUSH)
-                                       btrfs_dev_stat_inc_and_print(dev,
+                       if (bio->bi_opf & REQ_PREFLUSH)
+                               btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_FLUSH_ERRS);
-                       }
                }
        }
 
@@ -6313,13 +6329,12 @@ static void btrfs_end_bio(struct bio *bio)
 }
 
 static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
-                             u64 physical, int dev_nr)
+                             u64 physical, struct btrfs_device *dev)
 {
-       struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
        struct btrfs_fs_info *fs_info = bbio->fs_info;
 
        bio->bi_private = bbio;
-       btrfs_io_bio(bio)->stripe_index = dev_nr;
+       btrfs_io_bio(bio)->device = dev;
        bio->bi_end_io = btrfs_end_bio;
        bio->bi_iter.bi_sector = physical >> 9;
        btrfs_debug_in_rcu(fs_info,
@@ -6420,8 +6435,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
                else
                        bio = first_bio;
 
-               submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
-                                 dev_nr);
+               submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
        }
        btrfs_bio_counter_dec(fs_info);
        return BLK_STS_OK;
@@ -7029,6 +7043,19 @@ out:
        return ret;
 }
 
+static void readahead_tree_node_children(struct extent_buffer *node)
+{
+       int i;
+       const int nr_items = btrfs_header_nritems(node);
+
+       for (i = 0; i < nr_items; i++) {
+               u64 start;
+
+               start = btrfs_node_blockptr(node, i);
+               readahead_tree_block(node->fs_info, start);
+       }
+}
+
 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *root = fs_info->chunk_root;
@@ -7039,6 +7066,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
        int ret;
        int slot;
        u64 total_dev = 0;
+       u64 last_ra_node = 0;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -7049,7 +7077,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
         * otherwise we don't need it.
         */
        mutex_lock(&uuid_mutex);
-       mutex_lock(&fs_info->chunk_mutex);
 
        /*
         * It is possible for mount and umount to race in such a way that
@@ -7072,6 +7099,8 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
        if (ret < 0)
                goto error;
        while (1) {
+               struct extent_buffer *node;
+
                leaf = path->nodes[0];
                slot = path->slots[0];
                if (slot >= btrfs_header_nritems(leaf)) {
@@ -7082,6 +7111,17 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
                                goto error;
                        break;
                }
+               /*
+                * The nodes on level 1 are not locked but we don't need to do
+                * that during mount time as nothing else can access the tree
+                */
+               node = path->nodes[1];
+               if (node) {
+                       if (last_ra_node != node->start) {
+                               readahead_tree_node_children(node);
+                               last_ra_node = node->start;
+                       }
+               }
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
                if (found_key.type == BTRFS_DEV_ITEM_KEY) {
                        struct btrfs_dev_item *dev_item;
@@ -7094,7 +7134,9 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                        struct btrfs_chunk *chunk;
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+                       mutex_lock(&fs_info->chunk_mutex);
                        ret = read_one_chunk(&found_key, leaf, chunk);
+                       mutex_unlock(&fs_info->chunk_mutex);
                        if (ret)
                                goto error;
                }
@@ -7124,7 +7166,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
        }
        ret = 0;
 error:
-       mutex_unlock(&fs_info->chunk_mutex);
        mutex_unlock(&uuid_mutex);
 
        btrfs_free_path(path);
index 75af233..5eea939 100644 (file)
@@ -288,7 +288,7 @@ struct btrfs_fs_devices {
  */
 struct btrfs_io_bio {
        unsigned int mirror_num;
-       unsigned int stripe_index;
+       struct btrfs_device *device;
        u64 logical;
        u8 *csum;
        u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
index 64fe82e..061dd20 100644 (file)
@@ -320,9 +320,8 @@ static void decrypt_bh(struct work_struct *work)
 static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
 {
        /* Decrypt if needed */
-       if (uptodate && IS_ENABLED(CONFIG_FS_ENCRYPTION) &&
-           IS_ENCRYPTED(bh->b_page->mapping->host) &&
-           S_ISREG(bh->b_page->mapping->host->i_mode)) {
+       if (uptodate &&
+           fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
                struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
 
                if (ctx) {
@@ -3040,12 +3039,10 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
        if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
                clear_buffer_write_io_error(bh);
 
-       /*
-        * from here on down, it's all bio -- do the initial mapping,
-        * submit_bio -> generic_make_request may further map this bio around
-        */
        bio = bio_alloc(GFP_NOIO, 1);
 
+       fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
+
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio_set_dev(bio, bh->b_bdev);
        bio->bi_write_hint = write_hint;
index 8046d7c..a5f5c30 100644 (file)
@@ -4,6 +4,7 @@ config FS_ENCRYPTION
        select CRYPTO
        select CRYPTO_HASH
        select CRYPTO_SKCIPHER
+       select CRYPTO_LIB_SHA256
        select KEYS
        help
          Enable encryption of files and directories.  This
@@ -21,6 +22,11 @@ config FS_ENCRYPTION_ALGS
        select CRYPTO_CTS
        select CRYPTO_ECB
        select CRYPTO_HMAC
-       select CRYPTO_SHA256
        select CRYPTO_SHA512
        select CRYPTO_XTS
+
+config FS_ENCRYPTION_INLINE_CRYPT
+       bool "Enable fscrypt to use inline crypto"
+       depends on FS_ENCRYPTION && BLK_INLINE_ENCRYPTION
+       help
+         Enable fscrypt to use inline encryption hardware if available.
index 232e2bb..652c718 100644 (file)
@@ -11,3 +11,4 @@ fscrypto-y := crypto.o \
              policy.o
 
 fscrypto-$(CONFIG_BLOCK) += bio.o
+fscrypto-$(CONFIG_FS_ENCRYPTION_INLINE_CRYPT) += inline_crypt.o
index 4fa18ff..b048a0e 100644 (file)
@@ -41,6 +41,53 @@ void fscrypt_decrypt_bio(struct bio *bio)
 }
 EXPORT_SYMBOL(fscrypt_decrypt_bio);
 
+static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
+                                             pgoff_t lblk, sector_t pblk,
+                                             unsigned int len)
+{
+       const unsigned int blockbits = inode->i_blkbits;
+       const unsigned int blocks_per_page = 1 << (PAGE_SHIFT - blockbits);
+       struct bio *bio;
+       int ret, err = 0;
+       int num_pages = 0;
+
+       /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
+       bio = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+
+       while (len) {
+               unsigned int blocks_this_page = min(len, blocks_per_page);
+               unsigned int bytes_this_page = blocks_this_page << blockbits;
+
+               if (num_pages == 0) {
+                       fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS);
+                       bio_set_dev(bio, inode->i_sb->s_bdev);
+                       bio->bi_iter.bi_sector =
+                                       pblk << (blockbits - SECTOR_SHIFT);
+                       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+               }
+               ret = bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0);
+               if (WARN_ON(ret != bytes_this_page)) {
+                       err = -EIO;
+                       goto out;
+               }
+               num_pages++;
+               len -= blocks_this_page;
+               lblk += blocks_this_page;
+               pblk += blocks_this_page;
+               if (num_pages == BIO_MAX_PAGES || !len ||
+                   !fscrypt_mergeable_bio(bio, inode, lblk)) {
+                       err = submit_bio_wait(bio);
+                       if (err)
+                               goto out;
+                       bio_reset(bio);
+                       num_pages = 0;
+               }
+       }
+out:
+       bio_put(bio);
+       return err;
+}
+
 /**
  * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file
  * @inode: the file's inode
@@ -75,6 +122,10 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
        if (len == 0)
                return 0;
 
+       if (fscrypt_inode_uses_inline_crypto(inode))
+               return fscrypt_zeroout_range_inline_crypt(inode, lblk, pblk,
+                                                         len);
+
        BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES);
        nr_pages = min_t(unsigned int, ARRAY_SIZE(pages),
                         (len + blocks_per_page - 1) >> blocks_per_page_bits);
index ed015cb..9212325 100644 (file)
@@ -84,7 +84,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
                WARN_ON_ONCE(lblk_num > U32_MAX);
                lblk_num = (u32)(ci->ci_hashed_ino + lblk_num);
        } else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
-               memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+               memcpy(iv->nonce, ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE);
        }
        iv->lblk_num = cpu_to_le64(lblk_num);
 }
@@ -100,7 +100,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
        DECLARE_CRYPTO_WAIT(wait);
        struct scatterlist dst, src;
        struct fscrypt_info *ci = inode->i_crypt_info;
-       struct crypto_skcipher *tfm = ci->ci_ctfm;
+       struct crypto_skcipher *tfm = ci->ci_enc_key.tfm;
        int res = 0;
 
        if (WARN_ON_ONCE(len <= 0))
index 83ca5f1..011830f 100644 (file)
@@ -61,30 +61,13 @@ struct fscrypt_nokey_name {
  */
 #define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256)
 
-static struct crypto_shash *sha256_hash_tfm;
-
-static int fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result)
+static void fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result)
 {
-       struct crypto_shash *tfm = READ_ONCE(sha256_hash_tfm);
-
-       if (unlikely(!tfm)) {
-               struct crypto_shash *prev_tfm;
-
-               tfm = crypto_alloc_shash("sha256", 0, 0);
-               if (IS_ERR(tfm)) {
-                       fscrypt_err(NULL,
-                                   "Error allocating SHA-256 transform: %ld",
-                                   PTR_ERR(tfm));
-                       return PTR_ERR(tfm);
-               }
-               prev_tfm = cmpxchg(&sha256_hash_tfm, NULL, tfm);
-               if (prev_tfm) {
-                       crypto_free_shash(tfm);
-                       tfm = prev_tfm;
-               }
-       }
+       struct sha256_state sctx;
 
-       return crypto_shash_tfm_digest(tfm, data, data_len, result);
+       sha256_init(&sctx);
+       sha256_update(&sctx, data, data_len);
+       sha256_final(&sctx, result);
 }
 
 static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
@@ -115,7 +98,7 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
        struct skcipher_request *req = NULL;
        DECLARE_CRYPTO_WAIT(wait);
        const struct fscrypt_info *ci = inode->i_crypt_info;
-       struct crypto_skcipher *tfm = ci->ci_ctfm;
+       struct crypto_skcipher *tfm = ci->ci_enc_key.tfm;
        union fscrypt_iv iv;
        struct scatterlist sg;
        int res;
@@ -171,7 +154,7 @@ static int fname_decrypt(const struct inode *inode,
        DECLARE_CRYPTO_WAIT(wait);
        struct scatterlist src_sg, dst_sg;
        const struct fscrypt_info *ci = inode->i_crypt_info;
-       struct crypto_skcipher *tfm = ci->ci_ctfm;
+       struct crypto_skcipher *tfm = ci->ci_enc_key.tfm;
        union fscrypt_iv iv;
        int res;
 
@@ -349,7 +332,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
        const struct qstr qname = FSTR_TO_QSTR(iname);
        struct fscrypt_nokey_name nokey_name;
        u32 size; /* size of the unencoded no-key name */
-       int err;
 
        if (fscrypt_is_dot_dotdot(&qname)) {
                oname->name[0] = '.';
@@ -387,11 +369,9 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
        } else {
                memcpy(nokey_name.bytes, iname->name, sizeof(nokey_name.bytes));
                /* Compute strong hash of remaining part of name. */
-               err = fscrypt_do_sha256(&iname->name[sizeof(nokey_name.bytes)],
-                                       iname->len - sizeof(nokey_name.bytes),
-                                       nokey_name.sha256);
-               if (err)
-                       return err;
+               fscrypt_do_sha256(&iname->name[sizeof(nokey_name.bytes)],
+                                 iname->len - sizeof(nokey_name.bytes),
+                                 nokey_name.sha256);
                size = FSCRYPT_NOKEY_NAME_MAX;
        }
        oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name);
@@ -530,9 +510,8 @@ bool fscrypt_match_name(const struct fscrypt_name *fname,
                return false;
        if (memcmp(de_name, nokey_name->bytes, sizeof(nokey_name->bytes)))
                return false;
-       if (fscrypt_do_sha256(&de_name[sizeof(nokey_name->bytes)],
-                             de_name_len - sizeof(nokey_name->bytes), sha256))
-               return false;
+       fscrypt_do_sha256(&de_name[sizeof(nokey_name->bytes)],
+                         de_name_len - sizeof(nokey_name->bytes), sha256);
        return !memcmp(sha256, nokey_name->sha256, sizeof(sha256));
 }
 EXPORT_SYMBOL_GPL(fscrypt_match_name);
index eb7fcd2..8117a61 100644 (file)
 #include <linux/fscrypt.h>
 #include <linux/siphash.h>
 #include <crypto/hash.h>
+#include <linux/blk-crypto.h>
 
 #define CONST_STRLEN(str)      (sizeof(str) - 1)
 
-#define FS_KEY_DERIVATION_NONCE_SIZE   16
+#define FSCRYPT_FILE_NONCE_SIZE        16
 
-#define FSCRYPT_MIN_KEY_SIZE           16
+#define FSCRYPT_MIN_KEY_SIZE   16
 
 #define FSCRYPT_CONTEXT_V1     1
 #define FSCRYPT_CONTEXT_V2     2
@@ -30,7 +31,7 @@ struct fscrypt_context_v1 {
        u8 filenames_encryption_mode;
        u8 flags;
        u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
-       u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+       u8 nonce[FSCRYPT_FILE_NONCE_SIZE];
 };
 
 struct fscrypt_context_v2 {
@@ -40,7 +41,7 @@ struct fscrypt_context_v2 {
        u8 flags;
        u8 __reserved[4];
        u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
-       u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+       u8 nonce[FSCRYPT_FILE_NONCE_SIZE];
 };
 
 /*
@@ -166,6 +167,20 @@ struct fscrypt_symlink_data {
        char encrypted_path[1];
 } __packed;
 
+/**
+ * struct fscrypt_prepared_key - a key prepared for actual encryption/decryption
+ * @tfm: crypto API transform object
+ * @blk_key: key for blk-crypto
+ *
+ * Normally only one of the fields will be non-NULL.
+ */
+struct fscrypt_prepared_key {
+       struct crypto_skcipher *tfm;
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+       struct fscrypt_blk_crypto_key *blk_key;
+#endif
+};
+
 /*
  * fscrypt_info - the "encryption key" for an inode
  *
@@ -175,12 +190,20 @@ struct fscrypt_symlink_data {
  */
 struct fscrypt_info {
 
-       /* The actual crypto transform used for encryption and decryption */
-       struct crypto_skcipher *ci_ctfm;
+       /* The key in a form prepared for actual encryption/decryption */
+       struct fscrypt_prepared_key ci_enc_key;
 
-       /* True if the key should be freed when this fscrypt_info is freed */
+       /* True if ci_enc_key should be freed when this fscrypt_info is freed */
        bool ci_owns_key;
 
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+       /*
+        * True if this inode will use inline encryption (blk-crypto) instead of
+        * the traditional filesystem-layer encryption.
+        */
+       bool ci_inlinecrypt;
+#endif
+
        /*
         * Encryption mode used for this inode.  It corresponds to either the
         * contents or filenames encryption mode, depending on the inode type.
@@ -205,7 +228,7 @@ struct fscrypt_info {
 
        /*
         * If non-NULL, then encryption is done using the master key directly
-        * and ci_ctfm will equal ci_direct_key->dk_ctfm.
+        * and ci_enc_key will equal ci_direct_key->dk_key.
         */
        struct fscrypt_direct_key *ci_direct_key;
 
@@ -221,7 +244,7 @@ struct fscrypt_info {
        union fscrypt_policy ci_policy;
 
        /* This inode's nonce, copied from the fscrypt_context */
-       u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+       u8 ci_nonce[FSCRYPT_FILE_NONCE_SIZE];
 
        /* Hashed inode number.  Only set for IV_INO_LBLK_32 */
        u32 ci_hashed_ino;
@@ -257,9 +280,10 @@ union fscrypt_iv {
                __le64 lblk_num;
 
                /* per-file nonce; only set in DIRECT_KEY mode */
-               u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+               u8 nonce[FSCRYPT_FILE_NONCE_SIZE];
        };
        u8 raw[FSCRYPT_MAX_IV_SIZE];
+       __le64 dun[FSCRYPT_MAX_IV_SIZE / sizeof(__le64)];
 };
 
 void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
@@ -288,13 +312,13 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
  * outputs are unique and cryptographically isolated, i.e. knowledge of one
  * output doesn't reveal another.
  */
-#define HKDF_CONTEXT_KEY_IDENTIFIER    1
-#define HKDF_CONTEXT_PER_FILE_ENC_KEY  2
-#define HKDF_CONTEXT_DIRECT_KEY                3
-#define HKDF_CONTEXT_IV_INO_LBLK_64_KEY        4
-#define HKDF_CONTEXT_DIRHASH_KEY       5
-#define HKDF_CONTEXT_IV_INO_LBLK_32_KEY        6
-#define HKDF_CONTEXT_INODE_HASH_KEY    7
+#define HKDF_CONTEXT_KEY_IDENTIFIER    1 /* info=<empty>               */
+#define HKDF_CONTEXT_PER_FILE_ENC_KEY  2 /* info=file_nonce            */
+#define HKDF_CONTEXT_DIRECT_KEY                3 /* info=mode_num              */
+#define HKDF_CONTEXT_IV_INO_LBLK_64_KEY        4 /* info=mode_num||fs_uuid     */
+#define HKDF_CONTEXT_DIRHASH_KEY       5 /* info=file_nonce            */
+#define HKDF_CONTEXT_IV_INO_LBLK_32_KEY        6 /* info=mode_num||fs_uuid     */
+#define HKDF_CONTEXT_INODE_HASH_KEY    7 /* info=<empty>               */
 
 int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context,
                        const u8 *info, unsigned int infolen,
@@ -302,6 +326,78 @@ int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context,
 
 void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf);
 
+/* inline_crypt.c */
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+int fscrypt_select_encryption_impl(struct fscrypt_info *ci);
+
+static inline bool
+fscrypt_using_inline_encryption(const struct fscrypt_info *ci)
+{
+       return ci->ci_inlinecrypt;
+}
+
+int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
+                                    const u8 *raw_key,
+                                    const struct fscrypt_info *ci);
+
+void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key);
+
+/*
+ * Check whether the crypto transform or blk-crypto key has been allocated in
+ * @prep_key, depending on which encryption implementation the file will use.
+ */
+static inline bool
+fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key,
+                       const struct fscrypt_info *ci)
+{
+       /*
+        * The two smp_load_acquire()'s here pair with the smp_store_release()'s
+        * in fscrypt_prepare_inline_crypt_key() and fscrypt_prepare_key().
+        * I.e., in some cases (namely, if this prep_key is a per-mode
+        * encryption key) another task can publish blk_key or tfm concurrently,
+        * executing a RELEASE barrier.  We need to use smp_load_acquire() here
+        * to safely ACQUIRE the memory the other task published.
+        */
+       if (fscrypt_using_inline_encryption(ci))
+               return smp_load_acquire(&prep_key->blk_key) != NULL;
+       return smp_load_acquire(&prep_key->tfm) != NULL;
+}
+
+#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
+
+static inline int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
+{
+       return 0;
+}
+
+static inline bool
+fscrypt_using_inline_encryption(const struct fscrypt_info *ci)
+{
+       return false;
+}
+
+static inline int
+fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
+                                const u8 *raw_key,
+                                const struct fscrypt_info *ci)
+{
+       WARN_ON(1);
+       return -EOPNOTSUPP;
+}
+
+static inline void
+fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key)
+{
+}
+
+static inline bool
+fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key,
+                       const struct fscrypt_info *ci)
+{
+       return smp_load_acquire(&prep_key->tfm) != NULL;
+}
+#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
+
 /* keyring.c */
 
 /*
@@ -395,9 +491,9 @@ struct fscrypt_master_key {
         * Per-mode encryption keys for the various types of encryption policies
         * that use them.  Allocated and derived on-demand.
         */
-       struct crypto_skcipher *mk_direct_keys[__FSCRYPT_MODE_MAX + 1];
-       struct crypto_skcipher *mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1];
-       struct crypto_skcipher *mk_iv_ino_lblk_32_keys[__FSCRYPT_MODE_MAX + 1];
+       struct fscrypt_prepared_key mk_direct_keys[__FSCRYPT_MODE_MAX + 1];
+       struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1];
+       struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[__FSCRYPT_MODE_MAX + 1];
 
        /* Hash key for inode numbers.  Initialized only when needed. */
        siphash_key_t           mk_ino_hash_key;
@@ -461,13 +557,15 @@ struct fscrypt_mode {
        int keysize;
        int ivsize;
        int logged_impl_name;
+       enum blk_crypto_mode_num blk_crypto_mode;
 };
 
 extern struct fscrypt_mode fscrypt_modes[];
 
-struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
-                                                 const u8 *raw_key,
-                                                 const struct inode *inode);
+int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
+                       const u8 *raw_key, const struct fscrypt_info *ci);
+
+void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key);
 
 int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key);
 
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
new file mode 100644 (file)
index 0000000..b6b8574
--- /dev/null
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inline encryption support for fscrypt
+ *
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * With "inline encryption", the block layer handles the decryption/encryption
+ * as part of the bio, instead of the filesystem doing the crypto itself via
+ * crypto API.  See Documentation/block/inline-encryption.rst.  fscrypt still
+ * provides the key and IV to use.
+ */
+
+#include <linux/blk-crypto.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/sched/mm.h>
+
+#include "fscrypt_private.h"
+
+struct fscrypt_blk_crypto_key {
+       struct blk_crypto_key base;
+       int num_devs;
+       struct request_queue *devs[];
+};
+
+static int fscrypt_get_num_devices(struct super_block *sb)
+{
+       if (sb->s_cop->get_num_devices)
+               return sb->s_cop->get_num_devices(sb);
+       return 1;
+}
+
+static void fscrypt_get_devices(struct super_block *sb, int num_devs,
+                               struct request_queue **devs)
+{
+       if (num_devs == 1)
+               devs[0] = bdev_get_queue(sb->s_bdev);
+       else
+               sb->s_cop->get_devices(sb, devs);
+}
+
+static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci)
+{
+       struct super_block *sb = ci->ci_inode->i_sb;
+       unsigned int flags = fscrypt_policy_flags(&ci->ci_policy);
+       int ino_bits = 64, lblk_bits = 64;
+
+       if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
+               return offsetofend(union fscrypt_iv, nonce);
+
+       if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64)
+               return sizeof(__le64);
+
+       if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)
+               return sizeof(__le32);
+
+       /* Default case: IVs are just the file logical block number */
+       if (sb->s_cop->get_ino_and_lblk_bits)
+               sb->s_cop->get_ino_and_lblk_bits(sb, &ino_bits, &lblk_bits);
+       return DIV_ROUND_UP(lblk_bits, 8);
+}
+
+/* Enable inline encryption for this file if supported. */
+int fscrypt_select_encryption_impl(struct fscrypt_info *ci)
+{
+       const struct inode *inode = ci->ci_inode;
+       struct super_block *sb = inode->i_sb;
+       struct blk_crypto_config crypto_cfg;
+       int num_devs;
+       struct request_queue **devs;
+       int i;
+
+       /* The file must need contents encryption, not filenames encryption */
+       if (!fscrypt_needs_contents_encryption(inode))
+               return 0;
+
+       /* The crypto mode must have a blk-crypto counterpart */
+       if (ci->ci_mode->blk_crypto_mode == BLK_ENCRYPTION_MODE_INVALID)
+               return 0;
+
+       /* The filesystem must be mounted with -o inlinecrypt */
+       if (!(sb->s_flags & SB_INLINECRYPT))
+               return 0;
+
+       /*
+        * When a page contains multiple logically contiguous filesystem blocks,
+        * some filesystem code only calls fscrypt_mergeable_bio() for the first
+        * block in the page. This is fine for most of fscrypt's IV generation
+        * strategies, where contiguous blocks imply contiguous IVs. But it
+        * doesn't work with IV_INO_LBLK_32. For now, simply exclude
+        * IV_INO_LBLK_32 with blocksize != PAGE_SIZE from inline encryption.
+        */
+       if ((fscrypt_policy_flags(&ci->ci_policy) &
+            FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) &&
+           sb->s_blocksize != PAGE_SIZE)
+               return 0;
+
+       /*
+        * On all the filesystem's devices, blk-crypto must support the crypto
+        * configuration that the file would use.
+        */
+       crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode;
+       crypto_cfg.data_unit_size = sb->s_blocksize;
+       crypto_cfg.dun_bytes = fscrypt_get_dun_bytes(ci);
+       num_devs = fscrypt_get_num_devices(sb);
+       devs = kmalloc_array(num_devs, sizeof(*devs), GFP_NOFS);
+       if (!devs)
+               return -ENOMEM;
+       fscrypt_get_devices(sb, num_devs, devs);
+
+       for (i = 0; i < num_devs; i++) {
+               if (!blk_crypto_config_supported(devs[i], &crypto_cfg))
+                       goto out_free_devs;
+       }
+
+       ci->ci_inlinecrypt = true;
+out_free_devs:
+       kfree(devs);
+
+       return 0;
+}
+
+int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key,
+                                    const u8 *raw_key,
+                                    const struct fscrypt_info *ci)
+{
+       const struct inode *inode = ci->ci_inode;
+       struct super_block *sb = inode->i_sb;
+       enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode;
+       int num_devs = fscrypt_get_num_devices(sb);
+       int queue_refs = 0;
+       struct fscrypt_blk_crypto_key *blk_key;
+       int err;
+       int i;
+       unsigned int flags;
+
+       blk_key = kzalloc(struct_size(blk_key, devs, num_devs), GFP_NOFS);
+       if (!blk_key)
+               return -ENOMEM;
+
+       blk_key->num_devs = num_devs;
+       fscrypt_get_devices(sb, num_devs, blk_key->devs);
+
+       err = blk_crypto_init_key(&blk_key->base, raw_key, crypto_mode,
+                                 fscrypt_get_dun_bytes(ci), sb->s_blocksize);
+       if (err) {
+               fscrypt_err(inode, "error %d initializing blk-crypto key", err);
+               goto fail;
+       }
+
+       /*
+        * We have to start using blk-crypto on all the filesystem's devices.
+        * We also have to save all the request_queue's for later so that the
+        * key can be evicted from them.  This is needed because some keys
+        * aren't destroyed until after the filesystem was already unmounted
+        * (namely, the per-mode keys in struct fscrypt_master_key).
+        */
+       for (i = 0; i < num_devs; i++) {
+               if (!blk_get_queue(blk_key->devs[i])) {
+                       fscrypt_err(inode, "couldn't get request_queue");
+                       err = -EAGAIN;
+                       goto fail;
+               }
+               queue_refs++;
+
+               flags = memalloc_nofs_save();
+               err = blk_crypto_start_using_key(&blk_key->base,
+                                                blk_key->devs[i]);
+               memalloc_nofs_restore(flags);
+               if (err) {
+                       fscrypt_err(inode,
+                                   "error %d starting to use blk-crypto", err);
+                       goto fail;
+               }
+       }
+       /*
+        * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared().
+        * I.e., here we publish ->blk_key with a RELEASE barrier so that
+        * concurrent tasks can ACQUIRE it.  Note that this concurrency is only
+        * possible for per-mode keys, not for per-file keys.
+        */
+       smp_store_release(&prep_key->blk_key, blk_key);
+       return 0;
+
+fail:
+       for (i = 0; i < queue_refs; i++)
+               blk_put_queue(blk_key->devs[i]);
+       kzfree(blk_key);
+       return err;
+}
+
+void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key)
+{
+       struct fscrypt_blk_crypto_key *blk_key = prep_key->blk_key;
+       int i;
+
+       if (blk_key) {
+               for (i = 0; i < blk_key->num_devs; i++) {
+                       blk_crypto_evict_key(blk_key->devs[i], &blk_key->base);
+                       blk_put_queue(blk_key->devs[i]);
+               }
+               kzfree(blk_key);
+       }
+}
+
+bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
+{
+       return inode->i_crypt_info->ci_inlinecrypt;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_inode_uses_inline_crypto);
+
+static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num,
+                                u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE])
+{
+       union fscrypt_iv iv;
+       int i;
+
+       fscrypt_generate_iv(&iv, lblk_num, ci);
+
+       BUILD_BUG_ON(FSCRYPT_MAX_IV_SIZE > BLK_CRYPTO_MAX_IV_SIZE);
+       memset(dun, 0, BLK_CRYPTO_MAX_IV_SIZE);
+       for (i = 0; i < ci->ci_mode->ivsize/sizeof(dun[0]); i++)
+               dun[i] = le64_to_cpu(iv.dun[i]);
+}
+
+/**
+ * fscrypt_set_bio_crypt_ctx() - prepare a file contents bio for inline crypto
+ * @bio: a bio which will eventually be submitted to the file
+ * @inode: the file's inode
+ * @first_lblk: the first file logical block number in the I/O
+ * @gfp_mask: memory allocation flags - these must be a waiting mask so that
+ *                                     bio_crypt_set_ctx can't fail.
+ *
+ * If the contents of the file should be encrypted (or decrypted) with inline
+ * encryption, then assign the appropriate encryption context to the bio.
+ *
+ * Normally the bio should be newly allocated (i.e. no pages added yet), as
+ * otherwise fscrypt_mergeable_bio() won't work as intended.
+ *
+ * The encryption context will be freed automatically when the bio is freed.
+ */
+void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
+                              u64 first_lblk, gfp_t gfp_mask)
+{
+       const struct fscrypt_info *ci;
+       u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+
+       if (!fscrypt_inode_uses_inline_crypto(inode))
+               return;
+       ci = inode->i_crypt_info;
+
+       fscrypt_generate_dun(ci, first_lblk, dun);
+       bio_crypt_set_ctx(bio, &ci->ci_enc_key.blk_key->base, dun, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx);
+
+/* Extract the inode and logical block number from a buffer_head. */
+static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh,
+                                     const struct inode **inode_ret,
+                                     u64 *lblk_num_ret)
+{
+       struct page *page = bh->b_page;
+       const struct address_space *mapping;
+       const struct inode *inode;
+
+       /*
+        * The ext4 journal (jbd2) can submit a buffer_head it directly created
+        * for a non-pagecache page.  fscrypt doesn't care about these.
+        */
+       mapping = page_mapping(page);
+       if (!mapping)
+               return false;
+       inode = mapping->host;
+
+       *inode_ret = inode;
+       *lblk_num_ret = ((u64)page->index << (PAGE_SHIFT - inode->i_blkbits)) +
+                       (bh_offset(bh) >> inode->i_blkbits);
+       return true;
+}
+
+/**
+ * fscrypt_set_bio_crypt_ctx_bh() - prepare a file contents bio for inline
+ *                                 crypto
+ * @bio: a bio which will eventually be submitted to the file
+ * @first_bh: the first buffer_head for which I/O will be submitted
+ * @gfp_mask: memory allocation flags
+ *
+ * Same as fscrypt_set_bio_crypt_ctx(), except this takes a buffer_head instead
+ * of an inode and block number directly.
+ */
+void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio,
+                                 const struct buffer_head *first_bh,
+                                 gfp_t gfp_mask)
+{
+       const struct inode *inode;
+       u64 first_lblk;
+
+       if (bh_get_inode_and_lblk_num(first_bh, &inode, &first_lblk))
+               fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh);
+
+/**
+ * fscrypt_mergeable_bio() - test whether data can be added to a bio
+ * @bio: the bio being built up
+ * @inode: the inode for the next part of the I/O
+ * @next_lblk: the next file logical block number in the I/O
+ *
+ * When building a bio which may contain data which should undergo inline
+ * encryption (or decryption) via fscrypt, filesystems should call this function
+ * to ensure that the resulting bio contains only contiguous data unit numbers.
+ * This will return false if the next part of the I/O cannot be merged with the
+ * bio because either the encryption key would be different or the encryption
+ * data unit numbers would be discontiguous.
+ *
+ * fscrypt_set_bio_crypt_ctx() must have already been called on the bio.
+ *
+ * Return: true iff the I/O is mergeable
+ */
+bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
+                          u64 next_lblk)
+{
+       const struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+       u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+
+       if (!!bc != fscrypt_inode_uses_inline_crypto(inode))
+               return false;
+       if (!bc)
+               return true;
+
+       /*
+        * Comparing the key pointers is good enough, as all I/O for each key
+        * uses the same pointer.  I.e., there's currently no need to support
+        * merging requests where the keys are the same but the pointers differ.
+        */
+       if (bc->bc_key != &inode->i_crypt_info->ci_enc_key.blk_key->base)
+               return false;
+
+       fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun);
+       return bio_crypt_dun_is_contiguous(bc, bio->bi_iter.bi_size, next_dun);
+}
+EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio);
+
+/**
+ * fscrypt_mergeable_bio_bh() - test whether data can be added to a bio
+ * @bio: the bio being built up
+ * @next_bh: the next buffer_head for which I/O will be submitted
+ *
+ * Same as fscrypt_mergeable_bio(), except this takes a buffer_head instead of
+ * an inode and block number directly.
+ *
+ * Return: true iff the I/O is mergeable
+ */
+bool fscrypt_mergeable_bio_bh(struct bio *bio,
+                             const struct buffer_head *next_bh)
+{
+       const struct inode *inode;
+       u64 next_lblk;
+
+       if (!bh_get_inode_and_lblk_num(next_bh, &inode, &next_lblk))
+               return !bio->bi_crypt_context;
+
+       return fscrypt_mergeable_bio(bio, inode, next_lblk);
+}
+EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
index e24eb48..71d56f8 100644 (file)
@@ -45,9 +45,9 @@ static void free_master_key(struct fscrypt_master_key *mk)
        wipe_master_key_secret(&mk->mk_secret);
 
        for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) {
-               crypto_free_skcipher(mk->mk_direct_keys[i]);
-               crypto_free_skcipher(mk->mk_iv_ino_lblk_64_keys[i]);
-               crypto_free_skcipher(mk->mk_iv_ino_lblk_32_keys[i]);
+               fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]);
+               fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]);
+               fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]);
        }
 
        key_put(mk->mk_users);
@@ -213,7 +213,11 @@ static int allocate_filesystem_keyring(struct super_block *sb)
        if (IS_ERR(keyring))
                return PTR_ERR(keyring);
 
-       /* Pairs with READ_ONCE() in fscrypt_find_master_key() */
+       /*
+        * Pairs with the smp_load_acquire() in fscrypt_find_master_key().
+        * I.e., here we publish ->s_master_keys with a RELEASE barrier so that
+        * concurrent tasks can ACQUIRE it.
+        */
        smp_store_release(&sb->s_master_keys, keyring);
        return 0;
 }
@@ -234,8 +238,13 @@ struct key *fscrypt_find_master_key(struct super_block *sb,
        struct key *keyring;
        char description[FSCRYPT_MK_DESCRIPTION_SIZE];
 
-       /* pairs with smp_store_release() in allocate_filesystem_keyring() */
-       keyring = READ_ONCE(sb->s_master_keys);
+       /*
+        * Pairs with the smp_store_release() in allocate_filesystem_keyring().
+        * I.e., another task can publish ->s_master_keys concurrently,
+        * executing a RELEASE barrier.  We need to use smp_load_acquire() here
+        * to safely ACQUIRE the memory the other task published.
+        */
+       keyring = smp_load_acquire(&sb->s_master_keys);
        if (keyring == NULL)
                return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */
 
index 1129adf..fea6226 100644 (file)
@@ -19,6 +19,7 @@ struct fscrypt_mode fscrypt_modes[] = {
                .cipher_str = "xts(aes)",
                .keysize = 64,
                .ivsize = 16,
+               .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS,
        },
        [FSCRYPT_MODE_AES_256_CTS] = {
                .friendly_name = "AES-256-CTS-CBC",
@@ -31,6 +32,7 @@ struct fscrypt_mode fscrypt_modes[] = {
                .cipher_str = "essiv(cbc(aes),sha256)",
                .keysize = 16,
                .ivsize = 16,
+               .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
        },
        [FSCRYPT_MODE_AES_128_CTS] = {
                .friendly_name = "AES-128-CTS-CBC",
@@ -43,6 +45,7 @@ struct fscrypt_mode fscrypt_modes[] = {
                .cipher_str = "adiantum(xchacha12,aes)",
                .keysize = 32,
                .ivsize = 32,
+               .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM,
        },
 };
 
@@ -64,9 +67,9 @@ select_encryption_mode(const union fscrypt_policy *policy,
 }
 
 /* Create a symmetric cipher object for the given encryption mode and key */
-struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
-                                                 const u8 *raw_key,
-                                                 const struct inode *inode)
+static struct crypto_skcipher *
+fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key,
+                         const struct inode *inode)
 {
        struct crypto_skcipher *tfm;
        int err;
@@ -109,30 +112,56 @@ err_free_tfm:
        return ERR_PTR(err);
 }
 
-/* Given a per-file encryption key, set up the file's crypto transform object */
-int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key)
+/*
+ * Prepare the crypto transform object or blk-crypto key in @prep_key, given the
+ * raw key, encryption mode, and flag indicating which encryption implementation
+ * (fs-layer or blk-crypto) will be used.
+ */
+int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
+                       const u8 *raw_key, const struct fscrypt_info *ci)
 {
        struct crypto_skcipher *tfm;
 
+       if (fscrypt_using_inline_encryption(ci))
+               return fscrypt_prepare_inline_crypt_key(prep_key, raw_key, ci);
+
        tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode);
        if (IS_ERR(tfm))
                return PTR_ERR(tfm);
+       /*
+        * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared().
+        * I.e., here we publish ->tfm with a RELEASE barrier so that
+        * concurrent tasks can ACQUIRE it.  Note that this concurrency is only
+        * possible for per-mode keys, not for per-file keys.
+        */
+       smp_store_release(&prep_key->tfm, tfm);
+       return 0;
+}
 
-       ci->ci_ctfm = tfm;
+/* Destroy a crypto transform object and/or blk-crypto key. */
+void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key)
+{
+       crypto_free_skcipher(prep_key->tfm);
+       fscrypt_destroy_inline_crypt_key(prep_key);
+}
+
+/* Given a per-file encryption key, set up the file's crypto transform object */
+int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key)
+{
        ci->ci_owns_key = true;
-       return 0;
+       return fscrypt_prepare_key(&ci->ci_enc_key, raw_key, ci);
 }
 
 static int setup_per_mode_enc_key(struct fscrypt_info *ci,
                                  struct fscrypt_master_key *mk,
-                                 struct crypto_skcipher **tfms,
+                                 struct fscrypt_prepared_key *keys,
                                  u8 hkdf_context, bool include_fs_uuid)
 {
        const struct inode *inode = ci->ci_inode;
        const struct super_block *sb = inode->i_sb;
        struct fscrypt_mode *mode = ci->ci_mode;
        const u8 mode_num = mode - fscrypt_modes;
-       struct crypto_skcipher *tfm;
+       struct fscrypt_prepared_key *prep_key;
        u8 mode_key[FSCRYPT_MAX_KEY_SIZE];
        u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)];
        unsigned int hkdf_infolen = 0;
@@ -141,16 +170,15 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
        if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX))
                return -EINVAL;
 
-       /* pairs with smp_store_release() below */
-       tfm = READ_ONCE(tfms[mode_num]);
-       if (likely(tfm != NULL)) {
-               ci->ci_ctfm = tfm;
+       prep_key = &keys[mode_num];
+       if (fscrypt_is_key_prepared(prep_key, ci)) {
+               ci->ci_enc_key = *prep_key;
                return 0;
        }
 
        mutex_lock(&fscrypt_mode_key_setup_mutex);
 
-       if (tfms[mode_num])
+       if (fscrypt_is_key_prepared(prep_key, ci))
                goto done_unlock;
 
        BUILD_BUG_ON(sizeof(mode_num) != 1);
@@ -167,16 +195,12 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
                                  mode_key, mode->keysize);
        if (err)
                goto out_unlock;
-       tfm = fscrypt_allocate_skcipher(mode, mode_key, inode);
+       err = fscrypt_prepare_key(prep_key, mode_key, ci);
        memzero_explicit(mode_key, mode->keysize);
-       if (IS_ERR(tfm)) {
-               err = PTR_ERR(tfm);
+       if (err)
                goto out_unlock;
-       }
-       /* pairs with READ_ONCE() above */
-       smp_store_release(&tfms[mode_num], tfm);
 done_unlock:
-       ci->ci_ctfm = tfm;
+       ci->ci_enc_key = *prep_key;
        err = 0;
 out_unlock:
        mutex_unlock(&fscrypt_mode_key_setup_mutex);
@@ -189,7 +213,7 @@ int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
        int err;
 
        err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_DIRHASH_KEY,
-                                 ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE,
+                                 ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE,
                                  (u8 *)&ci->ci_dirhash_key,
                                  sizeof(ci->ci_dirhash_key));
        if (err)
@@ -270,8 +294,7 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
 
                err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
                                          HKDF_CONTEXT_PER_FILE_ENC_KEY,
-                                         ci->ci_nonce,
-                                         FS_KEY_DERIVATION_NONCE_SIZE,
+                                         ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE,
                                          derived_key, ci->ci_mode->keysize);
                if (err)
                        return err;
@@ -310,6 +333,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
        struct fscrypt_key_specifier mk_spec;
        int err;
 
+       err = fscrypt_select_encryption_impl(ci);
+       if (err)
+               return err;
+
        switch (ci->ci_policy.version) {
        case FSCRYPT_POLICY_V1:
                mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
@@ -402,7 +429,7 @@ static void put_crypt_info(struct fscrypt_info *ci)
        if (ci->ci_direct_key)
                fscrypt_put_direct_key(ci->ci_direct_key);
        else if (ci->ci_owns_key)
-               crypto_free_skcipher(ci->ci_ctfm);
+               fscrypt_destroy_prepared_key(&ci->ci_enc_key);
 
        key = ci->ci_master_key;
        if (key) {
@@ -472,7 +499,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
        }
 
        memcpy(crypt_info->ci_nonce, fscrypt_context_nonce(&ctx),
-              FS_KEY_DERIVATION_NONCE_SIZE);
+              FSCRYPT_FILE_NONCE_SIZE);
 
        if (!fscrypt_supported_policy(&crypt_info->ci_policy, inode)) {
                res = -EINVAL;
@@ -491,7 +518,17 @@ int fscrypt_get_encryption_info(struct inode *inode)
        if (res)
                goto out;
 
+       /*
+        * Multiple tasks may race to set ->i_crypt_info, so use
+        * cmpxchg_release().  This pairs with the smp_load_acquire() in
+        * fscrypt_get_info().  I.e., here we publish ->i_crypt_info with a
+        * RELEASE barrier so that other tasks can ACQUIRE it.
+        */
        if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) {
+               /*
+                * We won the race and set ->i_crypt_info to our crypt_info.
+                * Now link it into the master key's inode list.
+                */
                if (master_key) {
                        struct fscrypt_master_key *mk =
                                master_key->payload.data[0];
@@ -562,7 +599,7 @@ EXPORT_SYMBOL(fscrypt_free_inode);
  */
 int fscrypt_drop_inode(struct inode *inode)
 {
-       const struct fscrypt_info *ci = READ_ONCE(inode->i_crypt_info);
+       const struct fscrypt_info *ci = fscrypt_get_info(inode);
        const struct fscrypt_master_key *mk;
 
        /*
index 801b48c..e4e707f 100644 (file)
@@ -45,7 +45,7 @@ static DEFINE_SPINLOCK(fscrypt_direct_keys_lock);
  * key is longer, then only the first 'derived_keysize' bytes are used.
  */
 static int derive_key_aes(const u8 *master_key,
-                         const u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE],
+                         const u8 nonce[FSCRYPT_FILE_NONCE_SIZE],
                          u8 *derived_key, unsigned int derived_keysize)
 {
        int res = 0;
@@ -68,7 +68,7 @@ static int derive_key_aes(const u8 *master_key,
        skcipher_request_set_callback(req,
                        CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
                        crypto_req_done, &wait);
-       res = crypto_skcipher_setkey(tfm, nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+       res = crypto_skcipher_setkey(tfm, nonce, FSCRYPT_FILE_NONCE_SIZE);
        if (res < 0)
                goto out;
 
@@ -146,7 +146,7 @@ struct fscrypt_direct_key {
        struct hlist_node               dk_node;
        refcount_t                      dk_refcount;
        const struct fscrypt_mode       *dk_mode;
-       struct crypto_skcipher          *dk_ctfm;
+       struct fscrypt_prepared_key     dk_key;
        u8                              dk_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
        u8                              dk_raw[FSCRYPT_MAX_KEY_SIZE];
 };
@@ -154,7 +154,7 @@ struct fscrypt_direct_key {
 static void free_direct_key(struct fscrypt_direct_key *dk)
 {
        if (dk) {
-               crypto_free_skcipher(dk->dk_ctfm);
+               fscrypt_destroy_prepared_key(&dk->dk_key);
                kzfree(dk);
        }
 }
@@ -199,6 +199,8 @@ find_or_insert_direct_key(struct fscrypt_direct_key *to_insert,
                        continue;
                if (ci->ci_mode != dk->dk_mode)
                        continue;
+               if (!fscrypt_is_key_prepared(&dk->dk_key, ci))
+                       continue;
                if (crypto_memneq(raw_key, dk->dk_raw, ci->ci_mode->keysize))
                        continue;
                /* using existing tfm with same (descriptor, mode, raw_key) */
@@ -231,13 +233,9 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key)
                return ERR_PTR(-ENOMEM);
        refcount_set(&dk->dk_refcount, 1);
        dk->dk_mode = ci->ci_mode;
-       dk->dk_ctfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key,
-                                               ci->ci_inode);
-       if (IS_ERR(dk->dk_ctfm)) {
-               err = PTR_ERR(dk->dk_ctfm);
-               dk->dk_ctfm = NULL;
+       err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci);
+       if (err)
                goto err_free_dk;
-       }
        memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor,
               FSCRYPT_KEY_DESCRIPTOR_SIZE);
        memcpy(dk->dk_raw, raw_key, ci->ci_mode->keysize);
@@ -259,7 +257,7 @@ static int setup_v1_file_key_direct(struct fscrypt_info *ci,
        if (IS_ERR(dk))
                return PTR_ERR(dk);
        ci->ci_direct_key = dk;
-       ci->ci_ctfm = dk->dk_ctfm;
+       ci->ci_enc_key = dk->dk_key;
        return 0;
 }
 
index d23ff16..2d73fd3 100644 (file)
@@ -77,6 +77,20 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy,
        struct super_block *sb = inode->i_sb;
        int ino_bits = 64, lblk_bits = 64;
 
+       /*
+        * IV_INO_LBLK_* exist only because of hardware limitations, and
+        * currently the only known use case for them involves AES-256-XTS.
+        * That's also all we test currently.  For these reasons, for now only
+        * allow AES-256-XTS here.  This can be relaxed later if a use case for
+        * IV_INO_LBLK_* with other encryption modes arises.
+        */
+       if (policy->contents_encryption_mode != FSCRYPT_MODE_AES_256_XTS) {
+               fscrypt_warn(inode,
+                            "Can't use %s policy with contents mode other than AES-256-XTS",
+                            type);
+               return false;
+       }
+
        /*
         * It's unsafe to include inode numbers in the IVs if the filesystem can
         * potentially renumber inodes, e.g. via filesystem shrinking.
@@ -338,7 +352,7 @@ static int fscrypt_get_policy(struct inode *inode, union fscrypt_policy *policy)
        union fscrypt_context ctx;
        int ret;
 
-       ci = READ_ONCE(inode->i_crypt_info);
+       ci = fscrypt_get_info(inode);
        if (ci) {
                /* key available, use the cached policy */
                *policy = ci->ci_policy;
@@ -529,7 +543,7 @@ int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg)
        if (!fscrypt_context_is_valid(&ctx, ret))
                return -EINVAL;
        if (copy_to_user(arg, fscrypt_context_nonce(&ctx),
-                        FS_KEY_DERIVATION_NONCE_SIZE))
+                        FSCRYPT_FILE_NONCE_SIZE))
                return -EFAULT;
        return 0;
 }
@@ -627,7 +641,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
        if (res < 0)
                return res;
 
-       ci = READ_ONCE(parent->i_crypt_info);
+       ci = fscrypt_get_info(parent);
        if (ci == NULL)
                return -ENOKEY;
 
index 6d5370e..1832998 100644 (file)
@@ -1387,8 +1387,8 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         * Attempt to prefetch the pieces we likely need later.
         */
        prefetch(&bdev->bd_disk->part_tbl);
-       prefetch(bdev->bd_queue);
-       prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
+       prefetch(bdev->bd_disk->queue);
+       prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES);
 
        return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block,
                                     end_io, submit_io, flags);
index 4a6ebff..a4a945d 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/blkdev.h>
 
 #include "efs.h"
 #include <linux/efs_vh.h>
index 10dd470..44bad4b 100644 (file)
@@ -1096,7 +1096,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
        }
        if (unlikely(err)) {
                page_zero_new_buffers(page, from, to);
-       } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
+       } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
                for (i = 0; i < nr_wait; i++) {
                        int err2;
 
@@ -3737,7 +3737,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
                /* Uhhuh. Read error. Complain and punt. */
                if (!buffer_uptodate(bh))
                        goto unlock;
-               if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
+               if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
                        /* We expect the key to be set. */
                        BUG_ON(!fscrypt_has_encryption_key(inode));
                        err = fscrypt_decrypt_pagecache_blocks(page, blocksize,
index de6fe96..defd2e1 100644 (file)
@@ -402,6 +402,7 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
         * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
         */
        bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+       fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio_set_dev(bio, bh->b_bdev);
        bio->bi_end_io = ext4_end_bio;
@@ -418,7 +419,8 @@ static void io_submit_add_bh(struct ext4_io_submit *io,
 {
        int ret;
 
-       if (io->io_bio && bh->b_blocknr != io->io_next_block) {
+       if (io->io_bio && (bh->b_blocknr != io->io_next_block ||
+                          !fscrypt_mergeable_bio_bh(io->io_bio, bh))) {
 submit_and_retry:
                ext4_io_submit(io);
        }
@@ -506,7 +508,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
         * (e.g. holes) to be unnecessarily encrypted, but this is rare and
         * can't happen in the common case of blocksize == PAGE_SIZE.
         */
-       if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) {
+       if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
                gfp_t gfp_flags = GFP_NOFS;
                unsigned int enc_bytes = round_up(len, i_blocksize(inode));
 
index 5761e99..f2df2db 100644 (file)
@@ -195,7 +195,7 @@ static void ext4_set_bio_post_read_ctx(struct bio *bio,
 {
        unsigned int post_read_steps = 0;
 
-       if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
+       if (fscrypt_inode_uses_fs_layer_crypto(inode))
                post_read_steps |= 1 << STEP_DECRYPT;
 
        if (ext4_need_verity(inode, first_idx))
@@ -230,6 +230,7 @@ int ext4_mpage_readpages(struct inode *inode,
        const unsigned blkbits = inode->i_blkbits;
        const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
        const unsigned blocksize = 1 << blkbits;
+       sector_t next_block;
        sector_t block_in_file;
        sector_t last_block;
        sector_t last_block_in_file;
@@ -258,7 +259,8 @@ int ext4_mpage_readpages(struct inode *inode,
                if (page_has_buffers(page))
                        goto confused;
 
-               block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
+               block_in_file = next_block =
+                       (sector_t)page->index << (PAGE_SHIFT - blkbits);
                last_block = block_in_file + nr_pages * blocks_per_page;
                last_block_in_file = (ext4_readpage_limit(inode) +
                                      blocksize - 1) >> blkbits;
@@ -358,7 +360,8 @@ int ext4_mpage_readpages(struct inode *inode,
                 * This page will go to BIO.  Do we need to send this
                 * BIO off first?
                 */
-               if (bio && (last_block_in_bio != blocks[0] - 1)) {
+               if (bio && (last_block_in_bio != blocks[0] - 1 ||
+                           !fscrypt_mergeable_bio(bio, inode, next_block))) {
                submit_and_realloc:
                        submit_bio(bio);
                        bio = NULL;
@@ -370,6 +373,8 @@ int ext4_mpage_readpages(struct inode *inode,
                         */
                        bio = bio_alloc(GFP_KERNEL,
                                min_t(int, nr_pages, BIO_MAX_PAGES));
+                       fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
+                                                 GFP_KERNEL);
                        ext4_set_bio_post_read_ctx(bio, inode, page->index);
                        bio_set_dev(bio, bdev);
                        bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
index 330957e..0907f90 100644 (file)
@@ -1508,6 +1508,7 @@ enum {
        Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
+       Opt_inlinecrypt,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
@@ -1610,6 +1611,7 @@ static const match_table_t tokens = {
        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
        {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
+       {Opt_inlinecrypt, "inlinecrypt"},
        {Opt_nombcache, "nombcache"},
        {Opt_nombcache, "no_mbcache"},  /* for backward compatibility */
        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
@@ -1946,6 +1948,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
        case Opt_nolazytime:
                sb->s_flags &= ~SB_LAZYTIME;
                return 1;
+       case Opt_inlinecrypt:
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+               sb->s_flags |= SB_INLINECRYPT;
+#else
+               ext4_msg(sb, KERN_ERR, "inline encryption not supported");
+#endif
+               return 1;
        }
 
        for (m = ext4_mount_opts; m->token != Opt_err; m++)
@@ -2404,6 +2413,9 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 
        fscrypt_show_test_dummy_encryption(seq, sep, sb);
 
+       if (sb->s_flags & SB_INLINECRYPT)
+               SEQ_OPTS_PUTS("inlinecrypt");
+
        if (test_opt(sb, DAX_ALWAYS)) {
                if (IS_EXT2_SB(sb))
                        SEQ_OPTS_PUTS("dax");
index 1e02a8c..29e50fb 100644 (file)
@@ -1086,7 +1086,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
                .submitted = false,
                .io_type = io_type,
                .io_wbc = wbc,
-               .encrypted = f2fs_encrypted_file(cc->inode),
+               .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode),
        };
        struct dnode_of_data dn;
        struct node_info ni;
index 326c638..b964260 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/pagevec.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
+#include <linux/blk-crypto.h>
 #include <linux/swap.h>
 #include <linux/prefetch.h>
 #include <linux/uio.h>
@@ -459,6 +460,33 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
        return bio;
 }
 
+static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
+                                 pgoff_t first_idx,
+                                 const struct f2fs_io_info *fio,
+                                 gfp_t gfp_mask)
+{
+       /*
+        * The f2fs garbage collector sets ->encrypted_page when it wants to
+        * read/write raw data without encryption.
+        */
+       if (!fio || !fio->encrypted_page)
+               fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
+}
+
+static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
+                                    pgoff_t next_idx,
+                                    const struct f2fs_io_info *fio)
+{
+       /*
+        * The f2fs garbage collector sets ->encrypted_page when it wants to
+        * read/write raw data without encryption.
+        */
+       if (fio && fio->encrypted_page)
+               return !bio_has_crypt_ctx(bio);
+
+       return fscrypt_mergeable_bio(bio, inode, next_idx);
+}
+
 static inline void __submit_bio(struct f2fs_sb_info *sbi,
                                struct bio *bio, enum page_type type)
 {
@@ -684,6 +712,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
        /* Allocate a new bio */
        bio = __bio_alloc(fio, 1);
 
+       f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
+                              fio->page->index, fio, GFP_NOIO);
+
        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                bio_put(bio);
                return -EFAULT;
@@ -763,9 +794,10 @@ static void del_bio_entry(struct bio_entry *be)
        kmem_cache_free(bio_entry_slab, be);
 }
 
-static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio,
+static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
                                                        struct page *page)
 {
+       struct f2fs_sb_info *sbi = fio->sbi;
        enum temp_type temp;
        bool found = false;
        int ret = -EAGAIN;
@@ -782,13 +814,19 @@ static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio,
 
                        found = true;
 
-                       if (bio_add_page(*bio, page, PAGE_SIZE, 0) ==
-                                                       PAGE_SIZE) {
+                       f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
+                                                           *fio->last_block,
+                                                           fio->new_blkaddr));
+                       if (f2fs_crypt_mergeable_bio(*bio,
+                                       fio->page->mapping->host,
+                                       fio->page->index, fio) &&
+                           bio_add_page(*bio, page, PAGE_SIZE, 0) ==
+                                       PAGE_SIZE) {
                                ret = 0;
                                break;
                        }
 
-                       /* bio is full */
+                       /* page can't be merged into bio; submit the bio */
                        del_bio_entry(be);
                        __submit_bio(sbi, *bio, DATA);
                        break;
@@ -880,11 +918,13 @@ alloc_new:
        if (!bio) {
                bio = __bio_alloc(fio, BIO_MAX_PAGES);
                __attach_io_flag(fio);
+               f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
+                                      fio->page->index, fio, GFP_NOIO);
                bio_set_op_attrs(bio, fio->op, fio->op_flags);
 
                add_bio_entry(fio->sbi, bio, page, fio->temp);
        } else {
-               if (add_ipu_page(fio->sbi, &bio, page))
+               if (add_ipu_page(fio, &bio, page))
                        goto alloc_new;
        }
 
@@ -936,8 +976,11 @@ next:
 
        inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
-       if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
-                       io->last_block_in_bio, fio->new_blkaddr))
+       if (io->bio &&
+           (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
+                             fio->new_blkaddr) ||
+            !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
+                                      bio_page->index, fio)))
                __submit_merged_bio(io);
 alloc_new:
        if (io->bio == NULL) {
@@ -949,6 +992,8 @@ alloc_new:
                        goto skip;
                }
                io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
+               f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
+                                      bio_page->index, fio, GFP_NOIO);
                io->fio = *fio;
        }
 
@@ -993,11 +1038,14 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
                                                                for_write);
        if (!bio)
                return ERR_PTR(-ENOMEM);
+
+       f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
+
        f2fs_target_device(sbi, blkaddr, bio);
        bio->bi_end_io = f2fs_read_end_io;
        bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
 
-       if (f2fs_encrypted_file(inode))
+       if (fscrypt_inode_uses_fs_layer_crypto(inode))
                post_read_steps |= 1 << STEP_DECRYPT;
        if (f2fs_compressed_file(inode))
                post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
@@ -2073,8 +2121,9 @@ zero_out:
         * This page will go to BIO.  Do we need to send this
         * BIO off first?
         */
-       if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
-                               *last_block_in_bio, block_nr)) {
+       if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
+                                      *last_block_in_bio, block_nr) ||
+                   !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
 submit_and_realloc:
                __submit_bio(F2FS_I_SB(inode), bio, DATA);
                bio = NULL;
@@ -2204,8 +2253,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
                blkaddr = data_blkaddr(dn.inode, dn.node_page,
                                                dn.ofs_in_node + i + 1);
 
-               if (bio && !page_is_mergeable(sbi, bio,
-                                       *last_block_in_bio, blkaddr)) {
+               if (bio && (!page_is_mergeable(sbi, bio,
+                                       *last_block_in_bio, blkaddr) ||
+                   !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
 submit_and_realloc:
                        __submit_bio(sbi, bio, DATA);
                        bio = NULL;
@@ -2421,6 +2471,9 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
        /* wait for GCed page writeback via META_MAPPING */
        f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
 
+       if (fscrypt_inode_uses_inline_crypto(inode))
+               return 0;
+
 retry_encrypt:
        fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
                                        PAGE_SIZE, 0, gfp_flags);
@@ -2594,7 +2647,7 @@ got_it:
                        f2fs_unlock_op(fio->sbi);
                err = f2fs_inplace_write_data(fio);
                if (err) {
-                       if (f2fs_encrypted_file(inode))
+                       if (fscrypt_inode_uses_fs_layer_crypto(inode))
                                fscrypt_finalize_bounce_page(&fio->encrypted_page);
                        if (PageWriteback(page))
                                end_page_writeback(page);
index 20e56b0..23c49c3 100644 (file)
@@ -138,6 +138,7 @@ enum {
        Opt_alloc,
        Opt_fsync,
        Opt_test_dummy_encryption,
+       Opt_inlinecrypt,
        Opt_checkpoint_disable,
        Opt_checkpoint_disable_cap,
        Opt_checkpoint_disable_cap_perc,
@@ -204,6 +205,7 @@ static match_table_t f2fs_tokens = {
        {Opt_fsync, "fsync_mode=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
+       {Opt_inlinecrypt, "inlinecrypt"},
        {Opt_checkpoint_disable, "checkpoint=disable"},
        {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
        {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
@@ -833,6 +835,13 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        if (ret)
                                return ret;
                        break;
+               case Opt_inlinecrypt:
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+                       sb->s_flags |= SB_INLINECRYPT;
+#else
+                       f2fs_info(sbi, "inline encryption not supported");
+#endif
+                       break;
                case Opt_checkpoint_disable_cap_perc:
                        if (args->from && match_int(args, &arg))
                                return -EINVAL;
@@ -1590,6 +1599,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 
        fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb);
 
+       if (sbi->sb->s_flags & SB_INLINECRYPT)
+               seq_puts(seq, ",inlinecrypt");
+
        if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT)
                seq_printf(seq, ",alloc_mode=%s", "default");
        else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
@@ -1624,6 +1636,8 @@ static void default_options(struct f2fs_sb_info *sbi)
        F2FS_OPTION(sbi).compress_ext_cnt = 0;
        F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
 
+       sbi->sb->s_flags &= ~SB_INLINECRYPT;
+
        set_opt(sbi, INLINE_XATTR);
        set_opt(sbi, INLINE_DATA);
        set_opt(sbi, INLINE_DENTRY);
@@ -2470,6 +2484,25 @@ static void f2fs_get_ino_and_lblk_bits(struct super_block *sb,
        *lblk_bits_ret = 8 * sizeof(block_t);
 }
 
+static int f2fs_get_num_devices(struct super_block *sb)
+{
+       struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+       if (f2fs_is_multi_device(sbi))
+               return sbi->s_ndevs;
+       return 1;
+}
+
+static void f2fs_get_devices(struct super_block *sb,
+                            struct request_queue **devs)
+{
+       struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       int i;
+
+       for (i = 0; i < sbi->s_ndevs; i++)
+               devs[i] = bdev_get_queue(FDEV(i).bdev);
+}
+
 static const struct fscrypt_operations f2fs_cryptops = {
        .key_prefix             = "f2fs:",
        .get_context            = f2fs_get_context,
@@ -2479,6 +2512,8 @@ static const struct fscrypt_operations f2fs_cryptops = {
        .max_namelen            = F2FS_NAME_LEN,
        .has_stable_inodes      = f2fs_has_stable_inodes,
        .get_ino_and_lblk_bits  = f2fs_get_ino_and_lblk_bits,
+       .get_num_devices        = f2fs_get_num_devices,
+       .get_devices            = f2fs_get_devices,
 };
 #endif
 
index 2f224b9..f35a37c 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/cred.h>
 #include <linux/uio.h>
 #include <linux/xattr.h>
+#include <linux/blkdev.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
index 9b863a7..969988d 100644 (file)
@@ -23,7 +23,9 @@ struct user_namespace;
 extern void __init bdev_cache_init(void);
 
 extern int __sync_blockdev(struct block_device *bdev, int wait);
-
+void iterate_bdevs(void (*)(struct block_device *, void *), void *);
+void emergency_thaw_bdev(struct super_block *sb);
+void bd_forget(struct inode *inode);
 #else
 static inline void bdev_cache_init(void)
 {
@@ -33,7 +35,18 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
 {
        return 0;
 }
-#endif
+static inline void iterate_bdevs(void (*f)(struct block_device *, void *),
+               void *arg)
+{
+}
+static inline int emergency_thaw_bdev(struct super_block *sb)
+{
+       return 0;
+}
+static inline void bd_forget(struct inode *inode)
+{
+}
+#endif /* CONFIG_BLOCK */
 
 /*
  * buffer.c
index 47c5f3a..e92c472 100644 (file)
@@ -462,6 +462,7 @@ static void io_impersonate_work(struct io_worker *worker,
                io_wq_switch_mm(worker, work);
        if (worker->cur_creds != work->creds)
                io_wq_switch_creds(worker, work);
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
 }
 
 static void io_assign_current_work(struct io_worker *worker,
@@ -489,7 +490,6 @@ static void io_worker_handle_work(struct io_worker *worker)
 
        do {
                struct io_wq_work *work;
-               unsigned int hash;
 get_next:
                /*
                 * If we got some work, mark us as busy. If we didn't, but
@@ -512,6 +512,7 @@ get_next:
                /* handle a whole dependent link */
                do {
                        struct io_wq_work *old_work, *next_hashed, *linked;
+                       unsigned int hash = io_get_work_hash(work);
 
                        next_hashed = wq_next_work(work);
                        io_impersonate_work(worker, work);
@@ -522,10 +523,8 @@ get_next:
                        if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
                                work->flags |= IO_WQ_WORK_CANCEL;
 
-                       hash = io_get_work_hash(work);
-                       linked = old_work = work;
-                       wq->do_work(&linked);
-                       linked = (old_work == linked) ? NULL : linked;
+                       old_work = work;
+                       linked = wq->do_work(work);
 
                        work = next_hashed;
                        if (!work && linked && !io_wq_is_hashed(linked)) {
@@ -542,8 +541,6 @@ get_next:
                                spin_lock_irq(&wqe->lock);
                                wqe->hash_map &= ~BIT_ULL(hash);
                                wqe->flags &= ~IO_WQE_FLAG_STALLED;
-                               /* dependent work is not hashed */
-                               hash = -1U;
                                /* skip unnecessary unlock-lock wqe->lock */
                                if (!work)
                                        goto get_next;
@@ -781,8 +778,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
                struct io_wq_work *old_work = work;
 
                work->flags |= IO_WQ_WORK_CANCEL;
-               wq->do_work(&work);
-               work = (work == old_work) ? NULL : work;
+               work = wq->do_work(work);
                wq->free_work(old_work);
        } while (work);
 }
index 071f1a9..ddaf961 100644 (file)
@@ -5,10 +5,10 @@ struct io_wq;
 
 enum {
        IO_WQ_WORK_CANCEL       = 1,
-       IO_WQ_WORK_HASHED       = 4,
-       IO_WQ_WORK_UNBOUND      = 32,
-       IO_WQ_WORK_NO_CANCEL    = 256,
-       IO_WQ_WORK_CONCURRENT   = 512,
+       IO_WQ_WORK_HASHED       = 2,
+       IO_WQ_WORK_UNBOUND      = 4,
+       IO_WQ_WORK_NO_CANCEL    = 8,
+       IO_WQ_WORK_CONCURRENT   = 16,
 
        IO_WQ_HASH_SHIFT        = 24,   /* upper 8 bits are used for hash key */
 };
@@ -89,6 +89,7 @@ struct io_wq_work {
        struct mm_struct *mm;
        const struct cred *creds;
        struct fs_struct *fs;
+       unsigned long fsize;
        unsigned flags;
 };
 
@@ -101,7 +102,7 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 }
 
 typedef void (free_work_fn)(struct io_wq_work *);
-typedef void (io_wq_work_fn)(struct io_wq_work **);
+typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);
 
 struct io_wq_data {
        struct user_struct *user;
index 32b0064..2a3af95 100644 (file)
@@ -78,6 +78,7 @@
 #include <linux/fs_struct.h>
 #include <linux/splice.h>
 #include <linux/task_work.h>
+#include <linux/pagemap.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -226,7 +227,7 @@ struct io_ring_ctx {
        struct {
                unsigned int            flags;
                unsigned int            compat: 1;
-               unsigned int            account_mem: 1;
+               unsigned int            limit_mem: 1;
                unsigned int            cq_overflow_flushed: 1;
                unsigned int            drain_next: 1;
                unsigned int            eventfd_async: 1;
@@ -319,12 +320,12 @@ struct io_ring_ctx {
                spinlock_t              completion_lock;
 
                /*
-                * ->poll_list is protected by the ctx->uring_lock for
+                * ->iopoll_list is protected by the ctx->uring_lock for
                 * io_uring instances that don't use IORING_SETUP_SQPOLL.
                 * For SQPOLL, only the single threaded io_sq_thread() will
                 * manipulate the list, hence no extra locking is needed there.
                 */
-               struct list_head        poll_list;
+               struct list_head        iopoll_list;
                struct hlist_head       *cancel_hash;
                unsigned                cancel_hash_bits;
                bool                    poll_multi_file;
@@ -395,6 +396,7 @@ struct io_timeout {
        int                             flags;
        u32                             off;
        u32                             target_seq;
+       struct list_head                list;
 };
 
 struct io_rw {
@@ -413,7 +415,7 @@ struct io_connect {
 struct io_sr_msg {
        struct file                     *file;
        union {
-               struct user_msghdr __user *msg;
+               struct user_msghdr __user *umsg;
                void __user             *buf;
        };
        int                             msg_flags;
@@ -486,6 +488,12 @@ struct io_statx {
        struct statx __user             *buffer;
 };
 
+struct io_completion {
+       struct file                     *file;
+       struct list_head                list;
+       int                             cflags;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -503,6 +511,7 @@ struct io_async_rw {
        struct iovec                    *iov;
        ssize_t                         nr_segs;
        ssize_t                         size;
+       struct wait_page_queue          wpq;
 };
 
 struct io_async_ctx {
@@ -523,23 +532,18 @@ enum {
        REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
 
        REQ_F_LINK_HEAD_BIT,
-       REQ_F_LINK_NEXT_BIT,
        REQ_F_FAIL_LINK_BIT,
        REQ_F_INFLIGHT_BIT,
        REQ_F_CUR_POS_BIT,
        REQ_F_NOWAIT_BIT,
        REQ_F_LINK_TIMEOUT_BIT,
-       REQ_F_TIMEOUT_BIT,
        REQ_F_ISREG_BIT,
-       REQ_F_MUST_PUNT_BIT,
-       REQ_F_TIMEOUT_NOSEQ_BIT,
        REQ_F_COMP_LOCKED_BIT,
        REQ_F_NEED_CLEANUP_BIT,
        REQ_F_OVERFLOW_BIT,
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
        REQ_F_NO_FILE_TABLE_BIT,
-       REQ_F_QUEUE_TIMEOUT_BIT,
        REQ_F_WORK_INITIALIZED_BIT,
        REQ_F_TASK_PINNED_BIT,
 
@@ -563,8 +567,6 @@ enum {
 
        /* head of a link */
        REQ_F_LINK_HEAD         = BIT(REQ_F_LINK_HEAD_BIT),
-       /* already grabbed next link */
-       REQ_F_LINK_NEXT         = BIT(REQ_F_LINK_NEXT_BIT),
        /* fail rest of links */
        REQ_F_FAIL_LINK         = BIT(REQ_F_FAIL_LINK_BIT),
        /* on inflight list */
@@ -575,14 +577,8 @@ enum {
        REQ_F_NOWAIT            = BIT(REQ_F_NOWAIT_BIT),
        /* has linked timeout */
        REQ_F_LINK_TIMEOUT      = BIT(REQ_F_LINK_TIMEOUT_BIT),
-       /* timeout request */
-       REQ_F_TIMEOUT           = BIT(REQ_F_TIMEOUT_BIT),
        /* regular file */
        REQ_F_ISREG             = BIT(REQ_F_ISREG_BIT),
-       /* must be punted even for NONBLOCK */
-       REQ_F_MUST_PUNT         = BIT(REQ_F_MUST_PUNT_BIT),
-       /* no timeout sequence */
-       REQ_F_TIMEOUT_NOSEQ     = BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
        /* completion under lock */
        REQ_F_COMP_LOCKED       = BIT(REQ_F_COMP_LOCKED_BIT),
        /* needs cleanup */
@@ -595,8 +591,6 @@ enum {
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
        /* doesn't need file table for this request */
        REQ_F_NO_FILE_TABLE     = BIT(REQ_F_NO_FILE_TABLE_BIT),
-       /* needs to queue linked timeout */
-       REQ_F_QUEUE_TIMEOUT     = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
        /* io_wq_work is initialized */
        REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
        /* req->task is refcounted */
@@ -606,7 +600,6 @@ enum {
 struct async_poll {
        struct io_poll_iocb     poll;
        struct io_poll_iocb     *double_poll;
-       struct io_wq_work       work;
 };
 
 /*
@@ -635,51 +628,54 @@ struct io_kiocb {
                struct io_splice        splice;
                struct io_provide_buf   pbuf;
                struct io_statx         statx;
+               /* use only after cleaning per-op data, see io_clean_op() */
+               struct io_completion    compl;
        };
 
        struct io_async_ctx             *io;
-       int                             cflags;
        u8                              opcode;
        /* polled IO has completed */
        u8                              iopoll_completed;
 
        u16                             buf_index;
+       u32                             result;
 
-       struct io_ring_ctx      *ctx;
-       struct list_head        list;
-       unsigned int            flags;
-       refcount_t              refs;
-       struct task_struct      *task;
-       unsigned long           fsize;
-       u64                     user_data;
-       u32                     result;
-       u32                     sequence;
-
-       struct list_head        link_list;
+       struct io_ring_ctx              *ctx;
+       unsigned int                    flags;
+       refcount_t                      refs;
+       struct task_struct              *task;
+       u64                             user_data;
 
-       struct list_head        inflight_entry;
+       struct list_head                link_list;
 
-       struct percpu_ref       *fixed_file_refs;
+       /*
+        * 1. used with ctx->iopoll_list with reads/writes
+        * 2. to track reqs with ->files (see io_op_def::file_table)
+        */
+       struct list_head                inflight_entry;
+
+       struct percpu_ref               *fixed_file_refs;
+       struct callback_head            task_work;
+       /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
+       struct hlist_node               hash_node;
+       struct async_poll               *apoll;
+       struct io_wq_work               work;
+};
 
-       union {
-               /*
-                * Only commands that never go async can use the below fields,
-                * obviously. Right now only IORING_OP_POLL_ADD uses them, and
-                * async armed poll handlers for regular commands. The latter
-                * restore the work, if needed.
-                */
-               struct {
-                       struct callback_head    task_work;
-                       struct hlist_node       hash_node;
-                       struct async_poll       *apoll;
-               };
-               struct io_wq_work       work;
-       };
+struct io_defer_entry {
+       struct list_head        list;
+       struct io_kiocb         *req;
+       u32                     seq;
 };
 
-#define IO_PLUG_THRESHOLD              2
 #define IO_IOPOLL_BATCH                        8
 
+struct io_comp_state {
+       unsigned int            nr;
+       struct list_head        list;
+       struct io_ring_ctx      *ctx;
+};
+
 struct io_submit_state {
        struct blk_plug         plug;
 
@@ -689,13 +685,17 @@ struct io_submit_state {
        void                    *reqs[IO_IOPOLL_BATCH];
        unsigned int            free_reqs;
 
+       /*
+        * Batch completion logic
+        */
+       struct io_comp_state    comp;
+
        /*
         * File reference cache
         */
        struct file             *file;
        unsigned int            fd;
        unsigned int            has_refs;
-       unsigned int            used_refs;
        unsigned int            ios_left;
 };
 
@@ -723,6 +723,7 @@ struct io_op_def {
        unsigned                pollout : 1;
        /* op supports buffer selection */
        unsigned                buffer_select : 1;
+       unsigned                needs_fsize : 1;
 };
 
 static const struct io_op_def io_op_defs[] = {
@@ -742,6 +743,7 @@ static const struct io_op_def io_op_defs[] = {
                .hash_reg_file          = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
+               .needs_fsize            = 1,
        },
        [IORING_OP_FSYNC] = {
                .needs_file             = 1,
@@ -756,6 +758,7 @@ static const struct io_op_def io_op_defs[] = {
                .hash_reg_file          = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
+               .needs_fsize            = 1,
        },
        [IORING_OP_POLL_ADD] = {
                .needs_file             = 1,
@@ -808,6 +811,7 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_FALLOCATE] = {
                .needs_file             = 1,
+               .needs_fsize            = 1,
        },
        [IORING_OP_OPENAT] = {
                .file_table             = 1,
@@ -839,6 +843,7 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
+               .needs_fsize            = 1,
        },
        [IORING_OP_FADVISE] = {
                .needs_file             = 1,
@@ -881,22 +886,37 @@ static const struct io_op_def io_op_defs[] = {
        },
 };
 
-static void io_wq_submit_work(struct io_wq_work **workptr);
+enum io_mem_account {
+       ACCT_LOCKED,
+       ACCT_PINNED,
+};
+
+static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+                            struct io_comp_state *cs);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
 static void io_put_req(struct io_kiocb *req);
+static void io_double_put_req(struct io_kiocb *req);
 static void __io_double_put_req(struct io_kiocb *req);
 static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
 static void io_queue_linked_timeout(struct io_kiocb *req);
 static int __io_sqe_files_update(struct io_ring_ctx *ctx,
                                 struct io_uring_files_update *ip,
                                 unsigned nr_args);
-static int io_grab_files(struct io_kiocb *req);
-static void io_complete_rw_common(struct kiocb *kiocb, long res);
-static void io_cleanup_req(struct io_kiocb *req);
+static int io_prep_work_files(struct io_kiocb *req);
+static void __io_clean_op(struct io_kiocb *req);
 static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
                       int fd, struct file **out_file, bool fixed);
 static void __io_queue_sqe(struct io_kiocb *req,
-                          const struct io_uring_sqe *sqe);
+                          const struct io_uring_sqe *sqe,
+                          struct io_comp_state *cs);
+static void io_file_put_work(struct work_struct *work);
+
+static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
+                              struct iovec **iovec, struct iov_iter *iter,
+                              bool needs_lock);
+static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
+                            struct iovec *iovec, struct iovec *fast_iov,
+                            struct iov_iter *iter);
 
 static struct kmem_cache *req_cachep;
 
@@ -923,6 +943,12 @@ static void io_get_req_task(struct io_kiocb *req)
        req->flags |= REQ_F_TASK_PINNED;
 }
 
+static inline void io_clean_op(struct io_kiocb *req)
+{
+       if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
+               __io_clean_op(req);
+}
+
 /* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
 static void __io_put_req_task(struct io_kiocb *req)
 {
@@ -930,7 +956,41 @@ static void __io_put_req_task(struct io_kiocb *req)
                put_task_struct(req->task);
 }
 
-static void io_file_put_work(struct work_struct *work);
+static void io_sq_thread_drop_mm(void)
+{
+       struct mm_struct *mm = current->mm;
+
+       if (mm) {
+               kthread_unuse_mm(mm);
+               mmput(mm);
+       }
+}
+
+static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
+{
+       if (!current->mm) {
+               if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
+                            !mmget_not_zero(ctx->sqo_mm)))
+                       return -EFAULT;
+               kthread_use_mm(ctx->sqo_mm);
+       }
+
+       return 0;
+}
+
+static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
+                                  struct io_kiocb *req)
+{
+       if (!io_op_defs[req->opcode].needs_mm)
+               return 0;
+       return __io_sq_thread_acquire_mm(ctx);
+}
+
+static inline void req_set_fail_links(struct io_kiocb *req)
+{
+       if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
+               req->flags |= REQ_F_FAIL_LINK;
+}
 
 /*
  * Note: must call io_req_init_async() for the first time you
@@ -957,6 +1017,11 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
        complete(&ctx->ref_comp);
 }
 
+static inline bool io_is_timeout_noseq(struct io_kiocb *req)
+{
+       return !req->timeout.off;
+}
+
 static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
        struct io_ring_ctx *ctx;
@@ -1000,7 +1065,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        mutex_init(&ctx->uring_lock);
        init_waitqueue_head(&ctx->wait);
        spin_lock_init(&ctx->completion_lock);
-       INIT_LIST_HEAD(&ctx->poll_list);
+       INIT_LIST_HEAD(&ctx->iopoll_list);
        INIT_LIST_HEAD(&ctx->defer_list);
        INIT_LIST_HEAD(&ctx->timeout_list);
        init_waitqueue_head(&ctx->inflight_wait);
@@ -1017,18 +1082,14 @@ err:
        return NULL;
 }
 
-static inline bool __req_need_defer(struct io_kiocb *req)
+static bool req_need_defer(struct io_kiocb *req, u32 seq)
 {
-       struct io_ring_ctx *ctx = req->ctx;
+       if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
+               struct io_ring_ctx *ctx = req->ctx;
 
-       return req->sequence != ctx->cached_cq_tail
+               return seq != ctx->cached_cq_tail
                                + atomic_read(&ctx->cached_cq_overflow);
-}
-
-static inline bool req_need_defer(struct io_kiocb *req)
-{
-       if (unlikely(req->flags & REQ_F_IO_DRAIN))
-               return __req_need_defer(req);
+       }
 
        return false;
 }
@@ -1046,28 +1107,7 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
        }
 }
 
-static inline void io_req_work_grab_env(struct io_kiocb *req,
-                                       const struct io_op_def *def)
-{
-       if (!req->work.mm && def->needs_mm) {
-               mmgrab(current->mm);
-               req->work.mm = current->mm;
-       }
-       if (!req->work.creds)
-               req->work.creds = get_current_cred();
-       if (!req->work.fs && def->needs_fs) {
-               spin_lock(&current->fs->lock);
-               if (!current->fs->in_exec) {
-                       req->work.fs = current->fs;
-                       req->work.fs->users++;
-               } else {
-                       req->work.flags |= IO_WQ_WORK_CANCEL;
-               }
-               spin_unlock(&current->fs->lock);
-       }
-}
-
-static inline void io_req_work_drop_env(struct io_kiocb *req)
+static void io_req_clean_work(struct io_kiocb *req)
 {
        if (!(req->flags & REQ_F_WORK_INITIALIZED))
                return;
@@ -1089,11 +1129,12 @@ static inline void io_req_work_drop_env(struct io_kiocb *req)
                spin_unlock(&req->work.fs->lock);
                if (fs)
                        free_fs_struct(fs);
+               req->work.fs = NULL;
        }
+       req->flags &= ~REQ_F_WORK_INITIALIZED;
 }
 
-static inline void io_prep_async_work(struct io_kiocb *req,
-                                     struct io_kiocb **link)
+static void io_prep_async_work(struct io_kiocb *req)
 {
        const struct io_op_def *def = &io_op_defs[req->opcode];
 
@@ -1106,18 +1147,42 @@ static inline void io_prep_async_work(struct io_kiocb *req,
                if (def->unbound_nonreg_file)
                        req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
+       if (!req->work.mm && def->needs_mm) {
+               mmgrab(current->mm);
+               req->work.mm = current->mm;
+       }
+       if (!req->work.creds)
+               req->work.creds = get_current_cred();
+       if (!req->work.fs && def->needs_fs) {
+               spin_lock(&current->fs->lock);
+               if (!current->fs->in_exec) {
+                       req->work.fs = current->fs;
+                       req->work.fs->users++;
+               } else {
+                       req->work.flags |= IO_WQ_WORK_CANCEL;
+               }
+               spin_unlock(&current->fs->lock);
+       }
+       if (def->needs_fsize)
+               req->work.fsize = rlimit(RLIMIT_FSIZE);
+       else
+               req->work.fsize = RLIM_INFINITY;
+}
 
-       io_req_work_grab_env(req, def);
+static void io_prep_async_link(struct io_kiocb *req)
+{
+       struct io_kiocb *cur;
 
-       *link = io_prep_linked_timeout(req);
+       io_prep_async_work(req);
+       if (req->flags & REQ_F_LINK_HEAD)
+               list_for_each_entry(cur, &req->link_list, link_list)
+                       io_prep_async_work(cur);
 }
 
-static inline void io_queue_async_work(struct io_kiocb *req)
+static void __io_queue_async_work(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       struct io_kiocb *link;
-
-       io_prep_async_work(req, &link);
+       struct io_kiocb *link = io_prep_linked_timeout(req);
 
        trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
                                        &req->work, req->flags);
@@ -1127,14 +1192,22 @@ static inline void io_queue_async_work(struct io_kiocb *req)
                io_queue_linked_timeout(link);
 }
 
+static void io_queue_async_work(struct io_kiocb *req)
+{
+       /* init ->work of the whole link before punting */
+       io_prep_async_link(req);
+       __io_queue_async_work(req);
+}
+
 static void io_kill_timeout(struct io_kiocb *req)
 {
        int ret;
 
        ret = hrtimer_try_to_cancel(&req->io->timeout.timer);
        if (ret != -1) {
-               atomic_inc(&req->ctx->cq_timeouts);
-               list_del_init(&req->list);
+               atomic_set(&req->ctx->cq_timeouts,
+                       atomic_read(&req->ctx->cq_timeouts) + 1);
+               list_del_init(&req->timeout.list);
                req->flags |= REQ_F_COMP_LOCKED;
                io_cqring_fill_event(req, 0);
                io_put_req(req);
@@ -1146,7 +1219,7 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
        struct io_kiocb *req, *tmp;
 
        spin_lock_irq(&ctx->completion_lock);
-       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list)
                io_kill_timeout(req);
        spin_unlock_irq(&ctx->completion_lock);
 }
@@ -1154,13 +1227,15 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
 static void __io_queue_deferred(struct io_ring_ctx *ctx)
 {
        do {
-               struct io_kiocb *req = list_first_entry(&ctx->defer_list,
-                                                       struct io_kiocb, list);
+               struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
+                                               struct io_defer_entry, list);
 
-               if (req_need_defer(req))
+               if (req_need_defer(de->req, de->seq))
                        break;
-               list_del_init(&req->list);
-               io_queue_async_work(req);
+               list_del_init(&de->list);
+               /* punt-init is done before queueing for defer */
+               __io_queue_async_work(de->req);
+               kfree(de);
        } while (!list_empty(&ctx->defer_list));
 }
 
@@ -1168,15 +1243,15 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 {
        while (!list_empty(&ctx->timeout_list)) {
                struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
-                                                       struct io_kiocb, list);
+                                               struct io_kiocb, timeout.list);
 
-               if (req->flags & REQ_F_TIMEOUT_NOSEQ)
+               if (io_is_timeout_noseq(req))
                        break;
                if (req->timeout.target_seq != ctx->cached_cq_tail
                                        - atomic_read(&ctx->cq_timeouts))
                        break;
 
-               list_del_init(&req->list);
+               list_del_init(&req->timeout.list);
                io_kill_timeout(req);
        }
 }
@@ -1229,6 +1304,15 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
                eventfd_signal(ctx->cq_ev_fd, 1);
 }
 
+static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
+{
+       if (list_empty(&ctx->cq_overflow_list)) {
+               clear_bit(0, &ctx->sq_check_overflow);
+               clear_bit(0, &ctx->cq_check_overflow);
+               ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
+       }
+}
+
 /* Returns true if there are no backlogged entries after the flush */
 static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
@@ -1259,13 +1343,13 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
                        break;
 
                req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
-                                               list);
-               list_move(&req->list, &list);
+                                               compl.list);
+               list_move(&req->compl.list, &list);
                req->flags &= ~REQ_F_OVERFLOW;
                if (cqe) {
                        WRITE_ONCE(cqe->user_data, req->user_data);
                        WRITE_ONCE(cqe->res, req->result);
-                       WRITE_ONCE(cqe->flags, req->cflags);
+                       WRITE_ONCE(cqe->flags, req->compl.cflags);
                } else {
                        WRITE_ONCE(ctx->rings->cq_overflow,
                                atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1273,17 +1357,14 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
        }
 
        io_commit_cqring(ctx);
-       if (cqe) {
-               clear_bit(0, &ctx->sq_check_overflow);
-               clear_bit(0, &ctx->cq_check_overflow);
-               ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
-       }
+       io_cqring_mark_overflow(ctx);
+
        spin_unlock_irqrestore(&ctx->completion_lock, flags);
        io_cqring_ev_posted(ctx);
 
        while (!list_empty(&list)) {
-               req = list_first_entry(&list, struct io_kiocb, list);
-               list_del(&req->list);
+               req = list_first_entry(&list, struct io_kiocb, compl.list);
+               list_del(&req->compl.list);
                io_put_req(req);
        }
 
@@ -1316,11 +1397,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
                        set_bit(0, &ctx->cq_check_overflow);
                        ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
                }
+               io_clean_op(req);
                req->flags |= REQ_F_OVERFLOW;
-               refcount_inc(&req->refs);
                req->result = res;
-               req->cflags = cflags;
-               list_add_tail(&req->list, &ctx->cq_overflow_list);
+               req->compl.cflags = cflags;
+               refcount_inc(&req->refs);
+               list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
        }
 }
 
@@ -1329,7 +1411,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
        __io_cqring_fill_event(req, res, 0);
 }
 
-static void __io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
+static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
 {
        struct io_ring_ctx *ctx = req->ctx;
        unsigned long flags;
@@ -1342,9 +1424,52 @@ static void __io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
        io_cqring_ev_posted(ctx);
 }
 
-static void io_cqring_add_event(struct io_kiocb *req, long res)
+static void io_submit_flush_completions(struct io_comp_state *cs)
 {
-       __io_cqring_add_event(req, res, 0);
+       struct io_ring_ctx *ctx = cs->ctx;
+
+       spin_lock_irq(&ctx->completion_lock);
+       while (!list_empty(&cs->list)) {
+               struct io_kiocb *req;
+
+               req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
+               list_del(&req->compl.list);
+               __io_cqring_fill_event(req, req->result, req->compl.cflags);
+               if (!(req->flags & REQ_F_LINK_HEAD)) {
+                       req->flags |= REQ_F_COMP_LOCKED;
+                       io_put_req(req);
+               } else {
+                       spin_unlock_irq(&ctx->completion_lock);
+                       io_put_req(req);
+                       spin_lock_irq(&ctx->completion_lock);
+               }
+       }
+       io_commit_cqring(ctx);
+       spin_unlock_irq(&ctx->completion_lock);
+
+       io_cqring_ev_posted(ctx);
+       cs->nr = 0;
+}
+
+static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
+                             struct io_comp_state *cs)
+{
+       if (!cs) {
+               io_cqring_add_event(req, res, cflags);
+               io_put_req(req);
+       } else {
+               io_clean_op(req);
+               req->result = res;
+               req->compl.cflags = cflags;
+               list_add_tail(&req->compl.list, &cs->list);
+               if (++cs->nr >= 32)
+                       io_submit_flush_completions(cs);
+       }
+}
+
+static void io_req_complete(struct io_kiocb *req, long res)
+{
+       __io_req_complete(req, res, 0, NULL);
 }
 
 static inline bool io_is_fallback_req(struct io_kiocb *req)
@@ -1370,11 +1495,7 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx,
        gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
        struct io_kiocb *req;
 
-       if (!state) {
-               req = kmem_cache_alloc(req_cachep, gfp);
-               if (unlikely(!req))
-                       goto fallback;
-       } else if (!state->free_reqs) {
+       if (!state->free_reqs) {
                size_t sz;
                int ret;
 
@@ -1412,21 +1533,15 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file,
                fput(file);
 }
 
-static void __io_req_aux_free(struct io_kiocb *req)
+static void io_dismantle_req(struct io_kiocb *req)
 {
-       if (req->flags & REQ_F_NEED_CLEANUP)
-               io_cleanup_req(req);
+       io_clean_op(req);
 
-       kfree(req->io);
+       if (req->io)
+               kfree(req->io);
        if (req->file)
                io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
-       __io_put_req_task(req);
-       io_req_work_drop_env(req);
-}
-
-static void __io_free_req(struct io_kiocb *req)
-{
-       __io_req_aux_free(req);
+       io_req_clean_work(req);
 
        if (req->flags & REQ_F_INFLIGHT) {
                struct io_ring_ctx *ctx = req->ctx;
@@ -1438,57 +1553,20 @@ static void __io_free_req(struct io_kiocb *req)
                        wake_up(&ctx->inflight_wait);
                spin_unlock_irqrestore(&ctx->inflight_lock, flags);
        }
-
-       percpu_ref_put(&req->ctx->refs);
-       if (likely(!io_is_fallback_req(req)))
-               kmem_cache_free(req_cachep, req);
-       else
-               clear_bit_unlock(0, (unsigned long *) &req->ctx->fallback_req);
 }
 
-struct req_batch {
-       void *reqs[IO_IOPOLL_BATCH];
-       int to_free;
-       int need_iter;
-};
-
-static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb)
+static void __io_free_req(struct io_kiocb *req)
 {
-       if (!rb->to_free)
-               return;
-       if (rb->need_iter) {
-               int i, inflight = 0;
-               unsigned long flags;
-
-               for (i = 0; i < rb->to_free; i++) {
-                       struct io_kiocb *req = rb->reqs[i];
-
-                       if (req->flags & REQ_F_INFLIGHT)
-                               inflight++;
-                       __io_req_aux_free(req);
-               }
-               if (!inflight)
-                       goto do_free;
-
-               spin_lock_irqsave(&ctx->inflight_lock, flags);
-               for (i = 0; i < rb->to_free; i++) {
-                       struct io_kiocb *req = rb->reqs[i];
-
-                       if (req->flags & REQ_F_INFLIGHT) {
-                               list_del(&req->inflight_entry);
-                               if (!--inflight)
-                                       break;
-                       }
-               }
-               spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+       struct io_ring_ctx *ctx;
 
-               if (waitqueue_active(&ctx->inflight_wait))
-                       wake_up(&ctx->inflight_wait);
-       }
-do_free:
-       kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs);
-       percpu_ref_put_many(&ctx->refs, rb->to_free);
-       rb->to_free = rb->need_iter = 0;
+       io_dismantle_req(req);
+       __io_put_req_task(req);
+       ctx = req->ctx;
+       if (likely(!io_is_fallback_req(req)))
+               kmem_cache_free(req_cachep, req);
+       else
+               clear_bit_unlock(0, (unsigned long *) &ctx->fallback_req);
+       percpu_ref_put(&ctx->refs);
 }
 
 static bool io_link_cancel_timeout(struct io_kiocb *req)
@@ -1508,53 +1586,67 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)
        return false;
 }
 
-static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
+static bool __io_kill_linked_timeout(struct io_kiocb *req)
+{
+       struct io_kiocb *link;
+       bool wake_ev;
+
+       if (list_empty(&req->link_list))
+               return false;
+       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+       if (link->opcode != IORING_OP_LINK_TIMEOUT)
+               return false;
+
+       list_del_init(&link->link_list);
+       wake_ev = io_link_cancel_timeout(link);
+       req->flags &= ~REQ_F_LINK_TIMEOUT;
+       return wake_ev;
+}
+
+static void io_kill_linked_timeout(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       bool wake_ev = false;
+       bool wake_ev;
 
-       /* Already got next link */
-       if (req->flags & REQ_F_LINK_NEXT)
-               return;
+       if (!(req->flags & REQ_F_COMP_LOCKED)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->completion_lock, flags);
+               wake_ev = __io_kill_linked_timeout(req);
+               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       } else {
+               wake_ev = __io_kill_linked_timeout(req);
+       }
+
+       if (wake_ev)
+               io_cqring_ev_posted(ctx);
+}
+
+static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
+{
+       struct io_kiocb *nxt;
 
        /*
         * The list should never be empty when we are called here. But could
         * potentially happen if the chain is messed up, check to be on the
         * safe side.
         */
-       while (!list_empty(&req->link_list)) {
-               struct io_kiocb *nxt = list_first_entry(&req->link_list,
-                                               struct io_kiocb, link_list);
-
-               if (unlikely((req->flags & REQ_F_LINK_TIMEOUT) &&
-                            (nxt->flags & REQ_F_TIMEOUT))) {
-                       list_del_init(&nxt->link_list);
-                       wake_ev |= io_link_cancel_timeout(nxt);
-                       req->flags &= ~REQ_F_LINK_TIMEOUT;
-                       continue;
-               }
-
-               list_del_init(&req->link_list);
-               if (!list_empty(&nxt->link_list))
-                       nxt->flags |= REQ_F_LINK_HEAD;
-               *nxtptr = nxt;
-               break;
-       }
+       if (unlikely(list_empty(&req->link_list)))
+               return NULL;
 
-       req->flags |= REQ_F_LINK_NEXT;
-       if (wake_ev)
-               io_cqring_ev_posted(ctx);
+       nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+       list_del_init(&req->link_list);
+       if (!list_empty(&nxt->link_list))
+               nxt->flags |= REQ_F_LINK_HEAD;
+       return nxt;
 }
 
 /*
  * Called if REQ_F_LINK_HEAD is set, and we fail the head request
  */
-static void io_fail_links(struct io_kiocb *req)
+static void __io_fail_links(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ctx->completion_lock, flags);
 
        while (!list_empty(&req->link_list)) {
                struct io_kiocb *link = list_first_entry(&req->link_list,
@@ -1563,25 +1655,37 @@ static void io_fail_links(struct io_kiocb *req)
                list_del_init(&link->link_list);
                trace_io_uring_fail_link(req, link);
 
-               if ((req->flags & REQ_F_LINK_TIMEOUT) &&
-                   link->opcode == IORING_OP_LINK_TIMEOUT) {
-                       io_link_cancel_timeout(link);
-               } else {
-                       io_cqring_fill_event(link, -ECANCELED);
-                       __io_double_put_req(link);
-               }
+               io_cqring_fill_event(link, -ECANCELED);
+               __io_double_put_req(link);
                req->flags &= ~REQ_F_LINK_TIMEOUT;
        }
 
        io_commit_cqring(ctx);
-       spin_unlock_irqrestore(&ctx->completion_lock, flags);
        io_cqring_ev_posted(ctx);
 }
 
-static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+static void io_fail_links(struct io_kiocb *req)
 {
-       if (likely(!(req->flags & REQ_F_LINK_HEAD)))
-               return;
+       struct io_ring_ctx *ctx = req->ctx;
+
+       if (!(req->flags & REQ_F_COMP_LOCKED)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->completion_lock, flags);
+               __io_fail_links(req);
+               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       } else {
+               __io_fail_links(req);
+       }
+
+       io_cqring_ev_posted(ctx);
+}
+
+static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
+{
+       req->flags &= ~REQ_F_LINK_HEAD;
+       if (req->flags & REQ_F_LINK_TIMEOUT)
+               io_kill_linked_timeout(req);
 
        /*
         * If LINK is set, we have dependent requests in this chain. If we
@@ -1589,62 +1693,187 @@ static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
         * dependencies to the next request. In case of failure, fail the rest
         * of the chain.
         */
-       if (req->flags & REQ_F_FAIL_LINK) {
-               io_fail_links(req);
-       } else if ((req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_COMP_LOCKED)) ==
-                       REQ_F_LINK_TIMEOUT) {
-               struct io_ring_ctx *ctx = req->ctx;
-               unsigned long flags;
+       if (likely(!(req->flags & REQ_F_FAIL_LINK)))
+               return io_req_link_next(req);
+       io_fail_links(req);
+       return NULL;
+}
 
-               /*
-                * If this is a timeout link, we could be racing with the
-                * timeout timer. Grab the completion lock for this case to
-                * protect against that.
-                */
-               spin_lock_irqsave(&ctx->completion_lock, flags);
-               io_req_link_next(req, nxt);
-               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+static struct io_kiocb *io_req_find_next(struct io_kiocb *req)
+{
+       if (likely(!(req->flags & REQ_F_LINK_HEAD)))
+               return NULL;
+       return __io_req_find_next(req);
+}
+
+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
+{
+       struct task_struct *tsk = req->task;
+       struct io_ring_ctx *ctx = req->ctx;
+       int ret, notify = TWA_RESUME;
+
+       /*
+        * SQPOLL kernel thread doesn't need notification, just a wakeup.
+        * If we're not using an eventfd, then TWA_RESUME is always fine,
+        * as we won't have dependencies between request completions for
+        * other kernel wait conditions.
+        */
+       if (ctx->flags & IORING_SETUP_SQPOLL)
+               notify = 0;
+       else if (ctx->cq_ev_fd)
+               notify = TWA_SIGNAL;
+
+       ret = task_work_add(tsk, cb, notify);
+       if (!ret)
+               wake_up_process(tsk);
+       return ret;
+}
+
+static void __io_req_task_cancel(struct io_kiocb *req, int error)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+
+       spin_lock_irq(&ctx->completion_lock);
+       io_cqring_fill_event(req, error);
+       io_commit_cqring(ctx);
+       spin_unlock_irq(&ctx->completion_lock);
+
+       io_cqring_ev_posted(ctx);
+       req_set_fail_links(req);
+       io_double_put_req(req);
+}
+
+static void io_req_task_cancel(struct callback_head *cb)
+{
+       struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+
+       __io_req_task_cancel(req, -ECANCELED);
+}
+
+static void __io_req_task_submit(struct io_kiocb *req)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+
+       if (!__io_sq_thread_acquire_mm(ctx)) {
+               mutex_lock(&ctx->uring_lock);
+               __io_queue_sqe(req, NULL, NULL);
+               mutex_unlock(&ctx->uring_lock);
        } else {
-               io_req_link_next(req, nxt);
+               __io_req_task_cancel(req, -EFAULT);
        }
 }
 
-static void io_free_req(struct io_kiocb *req)
+static void io_req_task_submit(struct callback_head *cb)
 {
-       struct io_kiocb *nxt = NULL;
+       struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
 
-       io_req_find_next(req, &nxt);
-       __io_free_req(req);
+       __io_req_task_submit(req);
+}
+
+static void io_req_task_queue(struct io_kiocb *req)
+{
+       int ret;
+
+       init_task_work(&req->task_work, io_req_task_submit);
+
+       ret = io_req_task_work_add(req, &req->task_work);
+       if (unlikely(ret)) {
+               struct task_struct *tsk;
+
+               init_task_work(&req->task_work, io_req_task_cancel);
+               tsk = io_wq_get_task(req->ctx->io_wq);
+               task_work_add(tsk, &req->task_work, 0);
+               wake_up_process(tsk);
+       }
+}
+
+static void io_queue_next(struct io_kiocb *req)
+{
+       struct io_kiocb *nxt = io_req_find_next(req);
 
        if (nxt)
-               io_queue_async_work(nxt);
+               io_req_task_queue(nxt);
 }
 
-static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
+static void io_free_req(struct io_kiocb *req)
 {
-       struct io_kiocb *link;
-       const struct io_op_def *def = &io_op_defs[nxt->opcode];
+       io_queue_next(req);
+       __io_free_req(req);
+}
 
-       if ((nxt->flags & REQ_F_ISREG) && def->hash_reg_file)
-               io_wq_hash_work(&nxt->work, file_inode(nxt->file));
+struct req_batch {
+       void *reqs[IO_IOPOLL_BATCH];
+       int to_free;
 
-       *workptr = &nxt->work;
-       link = io_prep_linked_timeout(nxt);
-       if (link)
-               nxt->flags |= REQ_F_QUEUE_TIMEOUT;
+       struct task_struct      *task;
+       int                     task_refs;
+};
+
+static inline void io_init_req_batch(struct req_batch *rb)
+{
+       rb->to_free = 0;
+       rb->task_refs = 0;
+       rb->task = NULL;
+}
+
+static void __io_req_free_batch_flush(struct io_ring_ctx *ctx,
+                                     struct req_batch *rb)
+{
+       kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs);
+       percpu_ref_put_many(&ctx->refs, rb->to_free);
+       rb->to_free = 0;
+}
+
+static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
+                                    struct req_batch *rb)
+{
+       if (rb->to_free)
+               __io_req_free_batch_flush(ctx, rb);
+       if (rb->task) {
+               put_task_struct_many(rb->task, rb->task_refs);
+               rb->task = NULL;
+       }
+}
+
+static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
+{
+       if (unlikely(io_is_fallback_req(req))) {
+               io_free_req(req);
+               return;
+       }
+       if (req->flags & REQ_F_LINK_HEAD)
+               io_queue_next(req);
+
+       if (req->flags & REQ_F_TASK_PINNED) {
+               if (req->task != rb->task) {
+                       if (rb->task)
+                               put_task_struct_many(rb->task, rb->task_refs);
+                       rb->task = req->task;
+                       rb->task_refs = 0;
+               }
+               rb->task_refs++;
+               req->flags &= ~REQ_F_TASK_PINNED;
+       }
+
+       io_dismantle_req(req);
+       rb->reqs[rb->to_free++] = req;
+       if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
+               __io_req_free_batch_flush(req->ctx, rb);
 }
 
 /*
  * Drop reference to request, return next in chain (if there is one) if this
  * was the last reference to this request.
  */
-__attribute__((nonnull))
-static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
+static struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
 {
+       struct io_kiocb *nxt = NULL;
+
        if (refcount_dec_and_test(&req->refs)) {
-               io_req_find_next(req, nxtptr);
+               nxt = io_req_find_next(req);
                __io_free_req(req);
        }
+       return nxt;
 }
 
 static void io_put_req(struct io_kiocb *req)
@@ -1653,24 +1882,20 @@ static void io_put_req(struct io_kiocb *req)
                io_free_req(req);
 }
 
-static void io_steal_work(struct io_kiocb *req,
-                         struct io_wq_work **workptr)
+static struct io_wq_work *io_steal_work(struct io_kiocb *req)
 {
+       struct io_kiocb *nxt;
+
        /*
-        * It's in an io-wq worker, so there always should be at least
-        * one reference, which will be dropped in io_put_work() just
-        * after the current handler returns.
-        *
-        * It also means, that if the counter dropped to 1, then there is
-        * no asynchronous users left, so it's safe to steal the next work.
+        * A ref is owned by io-wq in which context we're. So, if that's the
+        * last one, it's safe to steal next work. False negatives are Ok,
+        * it just will be re-punted async in io_put_work()
         */
-       if (refcount_read(&req->refs) == 1) {
-               struct io_kiocb *nxt = NULL;
+       if (refcount_read(&req->refs) != 1)
+               return NULL;
 
-               io_req_find_next(req, &nxt);
-               if (nxt)
-                       io_wq_assign_next(workptr, nxt);
-       }
+       nxt = io_req_find_next(req);
+       return nxt ? &nxt->work : NULL;
 }
 
 /*
@@ -1720,31 +1945,34 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
        return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
 }
 
-static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
+static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf)
 {
-       if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req))
-               return false;
+       unsigned int cflags;
 
-       if (req->file || req->io)
-               rb->need_iter++;
-
-       rb->reqs[rb->to_free++] = req;
-       if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
-               io_free_req_many(req->ctx, rb);
-       return true;
+       cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
+       cflags |= IORING_CQE_F_BUFFER;
+       req->flags &= ~REQ_F_BUFFER_SELECTED;
+       kfree(kbuf);
+       return cflags;
 }
 
-static int io_put_kbuf(struct io_kiocb *req)
+static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
 {
        struct io_buffer *kbuf;
-       int cflags;
 
        kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
-       cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
-       cflags |= IORING_CQE_F_BUFFER;
-       req->rw.addr = 0;
-       kfree(kbuf);
-       return cflags;
+       return io_put_kbuf(req, kbuf);
+}
+
+static inline bool io_run_task_work(void)
+{
+       if (current->task_works) {
+               __set_current_state(TASK_RUNNING);
+               task_work_run();
+               return true;
+       }
+
+       return false;
 }
 
 static void io_iopoll_queue(struct list_head *again)
@@ -1752,18 +1980,9 @@ static void io_iopoll_queue(struct list_head *again)
        struct io_kiocb *req;
 
        do {
-               req = list_first_entry(again, struct io_kiocb, list);
-               list_del(&req->list);
-
-               /* shouldn't happen unless io_uring is dying, cancel reqs */
-               if (unlikely(!current->mm)) {
-                       io_complete_rw_common(&req->rw.kiocb, -EAGAIN);
-                       io_put_req(req);
-                       continue;
-               }
-
-               refcount_inc(&req->refs);
-               io_queue_async_work(req);
+               req = list_first_entry(again, struct io_kiocb, inflight_entry);
+               list_del(&req->inflight_entry);
+               __io_complete_rw(req, -EAGAIN, 0, NULL);
        } while (!list_empty(again));
 }
 
@@ -1780,33 +1999,32 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
        /* order with ->result store in io_complete_rw_iopoll() */
        smp_rmb();
 
-       rb.to_free = rb.need_iter = 0;
+       io_init_req_batch(&rb);
        while (!list_empty(done)) {
                int cflags = 0;
 
-               req = list_first_entry(done, struct io_kiocb, list);
+               req = list_first_entry(done, struct io_kiocb, inflight_entry);
                if (READ_ONCE(req->result) == -EAGAIN) {
                        req->iopoll_completed = 0;
-                       list_move_tail(&req->list, &again);
+                       list_move_tail(&req->inflight_entry, &again);
                        continue;
                }
-               list_del(&req->list);
+               list_del(&req->inflight_entry);
 
                if (req->flags & REQ_F_BUFFER_SELECTED)
-                       cflags = io_put_kbuf(req);
+                       cflags = io_put_rw_kbuf(req);
 
                __io_cqring_fill_event(req, req->result, cflags);
                (*nr_events)++;
 
-               if (refcount_dec_and_test(&req->refs) &&
-                   !io_req_multi_free(&rb, req))
-                       io_free_req(req);
+               if (refcount_dec_and_test(&req->refs))
+                       io_req_free_batch(&rb, req);
        }
 
        io_commit_cqring(ctx);
        if (ctx->flags & IORING_SETUP_SQPOLL)
                io_cqring_ev_posted(ctx);
-       io_free_req_many(ctx, &rb);
+       io_req_free_batch_finish(ctx, &rb);
 
        if (!list_empty(&again))
                io_iopoll_queue(&again);
@@ -1827,7 +2045,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
        spin = !ctx->poll_multi_file && *nr_events < min;
 
        ret = 0;
-       list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
+       list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
                struct kiocb *kiocb = &req->rw.kiocb;
 
                /*
@@ -1836,7 +2054,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
                 * and complete those lists first, if we have entries there.
                 */
                if (READ_ONCE(req->iopoll_completed)) {
-                       list_move_tail(&req->list, &done);
+                       list_move_tail(&req->inflight_entry, &done);
                        continue;
                }
                if (!list_empty(&done))
@@ -1846,6 +2064,10 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
                if (ret < 0)
                        break;
 
+               /* iopoll may have completed current req */
+               if (READ_ONCE(req->iopoll_completed))
+                       list_move_tail(&req->inflight_entry, &done);
+
                if (ret && spin)
                        spin = false;
                ret = 0;
@@ -1865,13 +2087,13 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
                                long min)
 {
-       while (!list_empty(&ctx->poll_list) && !need_resched()) {
+       while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
                int ret;
 
                ret = io_do_iopoll(ctx, nr_events, min);
                if (ret < 0)
                        return ret;
-               if (!min || *nr_events >= min)
+               if (*nr_events >= min)
                        return 0;
        }
 
@@ -1882,29 +2104,37 @@ static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
  * We can't just wait for polled events to come to us, we have to actively
  * find and complete them.
  */
-static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
+static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
 {
        if (!(ctx->flags & IORING_SETUP_IOPOLL))
                return;
 
        mutex_lock(&ctx->uring_lock);
-       while (!list_empty(&ctx->poll_list)) {
+       while (!list_empty(&ctx->iopoll_list)) {
                unsigned int nr_events = 0;
 
-               io_iopoll_getevents(ctx, &nr_events, 1);
+               io_do_iopoll(ctx, &nr_events, 0);
 
+               /* let it sleep and repeat later if can't complete a request */
+               if (nr_events == 0)
+                       break;
                /*
                 * Ensure we allow local-to-the-cpu processing to take place,
                 * in this case we need to ensure that we reap all events.
+                * Also let task_work, etc. to progress by releasing the mutex
                 */
-               cond_resched();
+               if (need_resched()) {
+                       mutex_unlock(&ctx->uring_lock);
+                       cond_resched();
+                       mutex_lock(&ctx->uring_lock);
+               }
        }
        mutex_unlock(&ctx->uring_lock);
 }
 
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
-                          long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 {
+       unsigned int nr_events = 0;
        int iters = 0, ret = 0;
 
        /*
@@ -1914,8 +2144,6 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
         */
        mutex_lock(&ctx->uring_lock);
        do {
-               int tmin = 0;
-
                /*
                 * Don't enter poll loop if we already have events pending.
                 * If we do, we can potentially be spinning for commands that
@@ -1936,17 +2164,15 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
                 */
                if (!(++iters & 7)) {
                        mutex_unlock(&ctx->uring_lock);
+                       io_run_task_work();
                        mutex_lock(&ctx->uring_lock);
                }
 
-               if (*nr_events < min)
-                       tmin = min - *nr_events;
-
-               ret = io_iopoll_getevents(ctx, nr_events, tmin);
+               ret = io_iopoll_getevents(ctx, &nr_events, min);
                if (ret <= 0)
                        break;
                ret = 0;
-       } while (min && !*nr_events && !need_resched());
+       } while (min && !nr_events && !need_resched());
 
        mutex_unlock(&ctx->uring_lock);
        return ret;
@@ -1966,13 +2192,8 @@ static void kiocb_end_write(struct io_kiocb *req)
        file_end_write(req->file);
 }
 
-static inline void req_set_fail_links(struct io_kiocb *req)
-{
-       if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
-               req->flags |= REQ_F_FAIL_LINK;
-}
-
-static void io_complete_rw_common(struct kiocb *kiocb, long res)
+static void io_complete_rw_common(struct kiocb *kiocb, long res,
+                                 struct io_comp_state *cs)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
        int cflags = 0;
@@ -1983,16 +2204,96 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
        if (res != req->result)
                req_set_fail_links(req);
        if (req->flags & REQ_F_BUFFER_SELECTED)
-               cflags = io_put_kbuf(req);
-       __io_cqring_add_event(req, res, cflags);
+               cflags = io_put_rw_kbuf(req);
+       __io_req_complete(req, res, cflags, cs);
+}
+
+#ifdef CONFIG_BLOCK
+static bool io_resubmit_prep(struct io_kiocb *req, int error)
+{
+       struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+       ssize_t ret = -ECANCELED;
+       struct iov_iter iter;
+       int rw;
+
+       if (error) {
+               ret = error;
+               goto end_req;
+       }
+
+       switch (req->opcode) {
+       case IORING_OP_READV:
+       case IORING_OP_READ_FIXED:
+       case IORING_OP_READ:
+               rw = READ;
+               break;
+       case IORING_OP_WRITEV:
+       case IORING_OP_WRITE_FIXED:
+       case IORING_OP_WRITE:
+               rw = WRITE;
+               break;
+       default:
+               printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n",
+                               req->opcode);
+               goto end_req;
+       }
+
+       ret = io_import_iovec(rw, req, &iovec, &iter, false);
+       if (ret < 0)
+               goto end_req;
+       ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
+       if (!ret)
+               return true;
+       kfree(iovec);
+end_req:
+       req_set_fail_links(req);
+       io_req_complete(req, ret);
+       return false;
+}
+
+static void io_rw_resubmit(struct callback_head *cb)
+{
+       struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+       struct io_ring_ctx *ctx = req->ctx;
+       int err;
+
+       err = io_sq_thread_acquire_mm(ctx, req);
+
+       if (io_resubmit_prep(req, err)) {
+               refcount_inc(&req->refs);
+               io_queue_async_work(req);
+       }
+}
+#endif
+
+static bool io_rw_reissue(struct io_kiocb *req, long res)
+{
+#ifdef CONFIG_BLOCK
+       int ret;
+
+       if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
+               return false;
+
+       init_task_work(&req->task_work, io_rw_resubmit);
+       ret = io_req_task_work_add(req, &req->task_work);
+       if (!ret)
+               return true;
+#endif
+       return false;
+}
+
+static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+                            struct io_comp_state *cs)
+{
+       if (!io_rw_reissue(req, res))
+               io_complete_rw_common(&req->rw.kiocb, res, cs);
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
-       io_complete_rw_common(kiocb, res);
-       io_put_req(req);
+       __io_complete_rw(req, res, res2, NULL);
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -2026,13 +2327,13 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
         * how we do polling eventually, not spinning if we're on potentially
         * different devices.
         */
-       if (list_empty(&ctx->poll_list)) {
+       if (list_empty(&ctx->iopoll_list)) {
                ctx->poll_multi_file = false;
        } else if (!ctx->poll_multi_file) {
                struct io_kiocb *list_req;
 
-               list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
-                                               list);
+               list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
+                                               inflight_entry);
                if (list_req->file != req->file)
                        ctx->poll_multi_file = true;
        }
@@ -2042,9 +2343,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
         * it to the front so we find it first.
         */
        if (READ_ONCE(req->iopoll_completed))
-               list_add(&req->list, &ctx->poll_list);
+               list_add(&req->inflight_entry, &ctx->iopoll_list);
        else
-               list_add_tail(&req->list, &ctx->poll_list);
+               list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
 
        if ((ctx->flags & IORING_SETUP_SQPOLL) &&
            wq_has_sleeper(&ctx->sqo_wait))
@@ -2053,10 +2354,8 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 
 static void __io_state_file_put(struct io_submit_state *state)
 {
-       int diff = state->has_refs - state->used_refs;
-
-       if (diff)
-               fput_many(state->file, diff);
+       if (state->has_refs)
+               fput_many(state->file, state->has_refs);
        state->file = NULL;
 }
 
@@ -2078,7 +2377,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
 
        if (state->file) {
                if (state->fd == fd) {
-                       state->used_refs++;
+                       state->has_refs--;
                        state->ios_left--;
                        return state->file;
                }
@@ -2089,12 +2388,20 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
                return NULL;
 
        state->fd = fd;
-       state->has_refs = state->ios_left;
-       state->used_refs = 1;
        state->ios_left--;
+       state->has_refs = state->ios_left;
        return state->file;
 }
 
+static bool io_bdev_nowait(struct block_device *bdev)
+{
+#ifdef CONFIG_BLOCK
+       return !bdev || queue_is_mq(bdev_get_queue(bdev));
+#else
+       return true;
+#endif
+}
+
 /*
  * If we tracked the file through the SCM inflight mechanism, we could support
  * any file. For now, just ensure that anything potentially problematic is done
@@ -2104,10 +2411,19 @@ static bool io_file_supports_async(struct file *file, int rw)
 {
        umode_t mode = file_inode(file)->i_mode;
 
-       if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISSOCK(mode))
-               return true;
-       if (S_ISREG(mode) && file->f_op != &io_uring_fops)
+       if (S_ISBLK(mode)) {
+               if (io_bdev_nowait(file->f_inode->i_bdev))
+                       return true;
+               return false;
+       }
+       if (S_ISCHR(mode) || S_ISSOCK(mode))
                return true;
+       if (S_ISREG(mode)) {
+               if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
+                   file->f_op != &io_uring_fops)
+                       return true;
+               return false;
+       }
 
        /* any ->read/write should understand O_NONBLOCK */
        if (file->f_flags & O_NONBLOCK)
@@ -2158,6 +2474,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        if (kiocb->ki_flags & IOCB_NOWAIT)
                req->flags |= REQ_F_NOWAIT;
 
+       if (kiocb->ki_flags & IOCB_DIRECT)
+               io_get_req_task(req);
+
        if (force_nonblock)
                kiocb->ki_flags |= IOCB_NOWAIT;
 
@@ -2168,8 +2487,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 
                kiocb->ki_flags |= IOCB_HIPRI;
                kiocb->ki_complete = io_complete_rw_iopoll;
-               req->result = 0;
                req->iopoll_completed = 0;
+               io_get_req_task(req);
        } else {
                if (kiocb->ki_flags & IOCB_HIPRI)
                        return -EINVAL;
@@ -2203,14 +2522,15 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
        }
 }
 
-static void kiocb_done(struct kiocb *kiocb, ssize_t ret)
+static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+                      struct io_comp_state *cs)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
        if (req->flags & REQ_F_CUR_POS)
                req->file->f_pos = kiocb->ki_pos;
        if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
-               io_complete_rw(kiocb, ret, 0);
+               __io_complete_rw(req, ret, 0, cs);
        else
                io_rw_done(kiocb, ret);
 }
@@ -2466,10 +2786,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
        if (req->io) {
                struct io_async_rw *iorw = &req->io->rw;
 
-               *iovec = iorw->iov;
-               iov_iter_init(iter, rw, *iovec, iorw->nr_segs, iorw->size);
-               if (iorw->iov == iorw->fast_iov)
-                       *iovec = NULL;
+               iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
+               *iovec = NULL;
                return iorw->size;
        }
 
@@ -2554,15 +2872,17 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
                          struct iovec *iovec, struct iovec *fast_iov,
                          struct iov_iter *iter)
 {
-       req->io->rw.nr_segs = iter->nr_segs;
-       req->io->rw.size = io_size;
-       req->io->rw.iov = iovec;
-       if (!req->io->rw.iov) {
-               req->io->rw.iov = req->io->rw.fast_iov;
-               if (req->io->rw.iov != fast_iov)
-                       memcpy(req->io->rw.iov, fast_iov,
+       struct io_async_rw *rw = &req->io->rw;
+
+       rw->nr_segs = iter->nr_segs;
+       rw->size = io_size;
+       if (!iovec) {
+               rw->iov = rw->fast_iov;
+               if (rw->iov != fast_iov)
+                       memcpy(rw->iov, fast_iov,
                               sizeof(struct iovec) * iter->nr_segs);
        } else {
+               rw->iov = iovec;
                req->flags |= REQ_F_NEED_CLEANUP;
        }
 }
@@ -2596,11 +2916,27 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
        return 0;
 }
 
+static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
+                                  bool force_nonblock)
+{
+       struct io_async_ctx *io = req->io;
+       struct iov_iter iter;
+       ssize_t ret;
+
+       io->rw.iov = io->rw.fast_iov;
+       req->io = NULL;
+       ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock);
+       req->io = io;
+       if (unlikely(ret < 0))
+               return ret;
+
+       io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+       return 0;
+}
+
 static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        bool force_nonblock)
 {
-       struct io_async_ctx *io;
-       struct iov_iter iter;
        ssize_t ret;
 
        ret = io_prep_rw(req, sqe, force_nonblock);
@@ -2613,75 +2949,169 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        /* either don't need iovec imported or already have it */
        if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
                return 0;
+       return io_rw_prep_async(req, READ, force_nonblock);
+}
 
-       io = req->io;
-       io->rw.iov = io->rw.fast_iov;
-       req->io = NULL;
-       ret = io_import_iovec(READ, req, &io->rw.iov, &iter, !force_nonblock);
-       req->io = io;
-       if (ret < 0)
-               return ret;
+static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
+                            int sync, void *arg)
+{
+       struct wait_page_queue *wpq;
+       struct io_kiocb *req = wait->private;
+       struct wait_page_key *key = arg;
+       int ret;
 
-       io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
-       return 0;
+       wpq = container_of(wait, struct wait_page_queue, wait);
+
+       if (!wake_page_match(wpq, key))
+               return 0;
+
+       /* Stop waking things up if the page is locked again */
+       if (test_bit(key->bit_nr, &key->page->flags))
+               return -1;
+
+       list_del_init(&wait->entry);
+
+       init_task_work(&req->task_work, io_req_task_submit);
+       /* submit ref gets dropped, acquire a new one */
+       refcount_inc(&req->refs);
+       ret = io_req_task_work_add(req, &req->task_work);
+       if (unlikely(ret)) {
+               struct task_struct *tsk;
+
+               /* queue just for cancelation */
+               init_task_work(&req->task_work, io_req_task_cancel);
+               tsk = io_wq_get_task(req->ctx->io_wq);
+               task_work_add(tsk, &req->task_work, 0);
+               wake_up_process(tsk);
+       }
+       return 1;
+}
+
+static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
+                                            struct wait_page_queue *wait,
+                                            wait_queue_func_t func,
+                                            void *data)
+{
+       /* Can't support async wakeup with polled IO */
+       if (kiocb->ki_flags & IOCB_HIPRI)
+               return -EINVAL;
+       if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
+               wait->wait.func = func;
+               wait->wait.private = data;
+               wait->wait.flags = 0;
+               INIT_LIST_HEAD(&wait->wait.entry);
+               kiocb->ki_flags |= IOCB_WAITQ;
+               kiocb->ki_waitq = wait;
+               return 0;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+
+static bool io_rw_should_retry(struct io_kiocb *req)
+{
+       struct kiocb *kiocb = &req->rw.kiocb;
+       int ret;
+
+       /* never retry for NOWAIT, we just complete with -EAGAIN */
+       if (req->flags & REQ_F_NOWAIT)
+               return false;
+
+       /* already tried, or we're doing O_DIRECT */
+       if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
+               return false;
+       /*
+        * just use poll if we can, and don't attempt if the fs doesn't
+        * support callback based unlocks
+        */
+       if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+               return false;
+
+       /*
+        * If request type doesn't require req->io to defer in general,
+        * we need to allocate it here
+        */
+       if (!req->io && __io_alloc_async_ctx(req))
+               return false;
+
+       ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
+                                               io_async_buf_func, req);
+       if (!ret) {
+               io_get_req_task(req);
+               return true;
+       }
+
+       return false;
+}
+
+static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
+{
+       if (req->file->f_op->read_iter)
+               return call_read_iter(req->file, &req->rw.kiocb, iter);
+       return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter);
 }
 
-static int io_read(struct io_kiocb *req, bool force_nonblock)
+static int io_read(struct io_kiocb *req, bool force_nonblock,
+                  struct io_comp_state *cs)
 {
        struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
        struct kiocb *kiocb = &req->rw.kiocb;
        struct iov_iter iter;
        size_t iov_count;
-       ssize_t io_size, ret;
+       ssize_t io_size, ret, ret2;
+       unsigned long nr_segs;
 
        ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock);
        if (ret < 0)
                return ret;
+       io_size = ret;
+       req->result = io_size;
 
        /* Ensure we clear previously set non-block flag */
        if (!force_nonblock)
                kiocb->ki_flags &= ~IOCB_NOWAIT;
 
-       req->result = 0;
-       io_size = ret;
-       if (req->flags & REQ_F_LINK_HEAD)
-               req->result = io_size;
-
-       /*
-        * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
-        * we know to async punt it even if it was opened O_NONBLOCK
-        */
+       /* If the file doesn't support async, just async punt */
        if (force_nonblock && !io_file_supports_async(req->file, READ))
                goto copy_iov;
 
        iov_count = iov_iter_count(&iter);
+       nr_segs = iter.nr_segs;
        ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
-       if (!ret) {
-               ssize_t ret2;
+       if (unlikely(ret))
+               goto out_free;
 
-               if (req->file->f_op->read_iter)
-                       ret2 = call_read_iter(req->file, kiocb, &iter);
-               else
-                       ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
+       ret2 = io_iter_do_read(req, &iter);
 
-               /* Catch -EAGAIN return for forced non-blocking submission */
-               if (!force_nonblock || ret2 != -EAGAIN) {
-                       kiocb_done(kiocb, ret2);
-               } else {
+       /* Catch -EAGAIN return for forced non-blocking submission */
+       if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
+               kiocb_done(kiocb, ret2, cs);
+       } else {
+               iter.count = iov_count;
+               iter.nr_segs = nr_segs;
 copy_iov:
-                       ret = io_setup_async_rw(req, io_size, iovec,
-                                               inline_vecs, &iter);
-                       if (ret)
+               ret = io_setup_async_rw(req, io_size, iovec, inline_vecs,
+                                       &iter);
+               if (ret)
+                       goto out_free;
+               /* it's copied and will be cleaned with ->io */
+               iovec = NULL;
+               /* if we can retry, do so with the callbacks armed */
+               if (io_rw_should_retry(req)) {
+                       ret2 = io_iter_do_read(req, &iter);
+                       if (ret2 == -EIOCBQUEUED) {
                                goto out_free;
-                       /* any defer here is final, must blocking retry */
-                       if (!(req->flags & REQ_F_NOWAIT) &&
-                           !file_can_poll(req->file))
-                               req->flags |= REQ_F_MUST_PUNT;
-                       return -EAGAIN;
+                       } else if (ret2 != -EAGAIN) {
+                               kiocb_done(kiocb, ret2, cs);
+                               goto out_free;
+                       }
                }
+               kiocb->ki_flags &= ~IOCB_WAITQ;
+               return -EAGAIN;
        }
 out_free:
-       if (!(req->flags & REQ_F_NEED_CLEANUP))
+       if (iovec)
                kfree(iovec);
        return ret;
 }
@@ -2689,8 +3119,6 @@ out_free:
 static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                         bool force_nonblock)
 {
-       struct io_async_ctx *io;
-       struct iov_iter iter;
        ssize_t ret;
 
        ret = io_prep_rw(req, sqe, force_nonblock);
@@ -2700,49 +3128,33 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
                return -EBADF;
 
-       req->fsize = rlimit(RLIMIT_FSIZE);
-
        /* either don't need iovec imported or already have it */
        if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
                return 0;
-
-       io = req->io;
-       io->rw.iov = io->rw.fast_iov;
-       req->io = NULL;
-       ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter, !force_nonblock);
-       req->io = io;
-       if (ret < 0)
-               return ret;
-
-       io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
-       return 0;
+       return io_rw_prep_async(req, WRITE, force_nonblock);
 }
 
-static int io_write(struct io_kiocb *req, bool force_nonblock)
+static int io_write(struct io_kiocb *req, bool force_nonblock,
+                   struct io_comp_state *cs)
 {
        struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
        struct kiocb *kiocb = &req->rw.kiocb;
        struct iov_iter iter;
        size_t iov_count;
-       ssize_t ret, io_size;
+       ssize_t ret, ret2, io_size;
+       unsigned long nr_segs;
 
        ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock);
        if (ret < 0)
                return ret;
+       io_size = ret;
+       req->result = io_size;
 
        /* Ensure we clear previously set non-block flag */
        if (!force_nonblock)
                req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
 
-       req->result = 0;
-       io_size = ret;
-       if (req->flags & REQ_F_LINK_HEAD)
-               req->result = io_size;
-
-       /*
-        * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
-        * we know to async punt it even if it was opened O_NONBLOCK
-        */
+       /* If the file doesn't support async, just async punt */
        if (force_nonblock && !io_file_supports_async(req->file, WRITE))
                goto copy_iov;
 
@@ -2752,59 +3164,53 @@ static int io_write(struct io_kiocb *req, bool force_nonblock)
                goto copy_iov;
 
        iov_count = iov_iter_count(&iter);
+       nr_segs = iter.nr_segs;
        ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
-       if (!ret) {
-               ssize_t ret2;
-
-               /*
-                * Open-code file_start_write here to grab freeze protection,
-                * which will be released by another thread in
-                * io_complete_rw().  Fool lockdep by telling it the lock got
-                * released so that it doesn't complain about the held lock when
-                * we return to userspace.
-                */
-               if (req->flags & REQ_F_ISREG) {
-                       __sb_start_write(file_inode(req->file)->i_sb,
-                                               SB_FREEZE_WRITE, true);
-                       __sb_writers_release(file_inode(req->file)->i_sb,
-                                               SB_FREEZE_WRITE);
-               }
-               kiocb->ki_flags |= IOCB_WRITE;
-
-               if (!force_nonblock)
-                       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+       if (unlikely(ret))
+               goto out_free;
 
-               if (req->file->f_op->write_iter)
-                       ret2 = call_write_iter(req->file, kiocb, &iter);
-               else
-                       ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
+       /*
+        * Open-code file_start_write here to grab freeze protection,
+        * which will be released by another thread in
+        * io_complete_rw().  Fool lockdep by telling it the lock got
+        * released so that it doesn't complain about the held lock when
+        * we return to userspace.
+        */
+       if (req->flags & REQ_F_ISREG) {
+               __sb_start_write(file_inode(req->file)->i_sb,
+                                       SB_FREEZE_WRITE, true);
+               __sb_writers_release(file_inode(req->file)->i_sb,
+                                       SB_FREEZE_WRITE);
+       }
+       kiocb->ki_flags |= IOCB_WRITE;
 
-               if (!force_nonblock)
-                       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+       if (req->file->f_op->write_iter)
+               ret2 = call_write_iter(req->file, kiocb, &iter);
+       else
+               ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
 
-               /*
-                * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
-                * retry them without IOCB_NOWAIT.
-                */
-               if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
-                       ret2 = -EAGAIN;
-               if (!force_nonblock || ret2 != -EAGAIN) {
-                       kiocb_done(kiocb, ret2);
-               } else {
+       /*
+        * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
+        * retry them without IOCB_NOWAIT.
+        */
+       if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
+               ret2 = -EAGAIN;
+       if (!force_nonblock || ret2 != -EAGAIN) {
+               kiocb_done(kiocb, ret2, cs);
+       } else {
+               iter.count = iov_count;
+               iter.nr_segs = nr_segs;
 copy_iov:
-                       ret = io_setup_async_rw(req, io_size, iovec,
-                                               inline_vecs, &iter);
-                       if (ret)
-                               goto out_free;
-                       /* any defer here is final, must blocking retry */
-                       if (!(req->flags & REQ_F_NOWAIT) &&
-                           !file_can_poll(req->file))
-                               req->flags |= REQ_F_MUST_PUNT;
-                       return -EAGAIN;
-               }
+               ret = io_setup_async_rw(req, io_size, iovec, inline_vecs,
+                                       &iter);
+               if (ret)
+                       goto out_free;
+               /* it's copied and will be cleaned with ->io */
+               iovec = NULL;
+               return -EAGAIN;
        }
 out_free:
-       if (!(req->flags & REQ_F_NEED_CLEANUP))
+       if (iovec)
                kfree(iovec);
        return ret;
 }
@@ -2870,10 +3276,9 @@ static int io_tee(struct io_kiocb *req, bool force_nonblock)
        io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
        req->flags &= ~REQ_F_NEED_CLEANUP;
 
-       io_cqring_add_event(req, ret);
        if (ret != sp->len)
                req_set_fail_links(req);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -2907,25 +3312,23 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
        io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
        req->flags &= ~REQ_F_NEED_CLEANUP;
 
-       io_cqring_add_event(req, ret);
        if (ret != sp->len)
                req_set_fail_links(req);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
 /*
  * IORING_OP_NOP just posts a completion event, nothing else.
  */
-static int io_nop(struct io_kiocb *req)
+static int io_nop(struct io_kiocb *req, struct io_comp_state *cs)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
        if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
                return -EINVAL;
 
-       io_cqring_add_event(req, 0);
-       io_put_req(req);
+       __io_req_complete(req, 0, 0, cs);
        return 0;
 }
 
@@ -2964,8 +3367,7 @@ static int io_fsync(struct io_kiocb *req, bool force_nonblock)
                                req->sync.flags & IORING_FSYNC_DATASYNC);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -2980,7 +3382,6 @@ static int io_fallocate_prep(struct io_kiocb *req,
        req->sync.off = READ_ONCE(sqe->off);
        req->sync.len = READ_ONCE(sqe->addr);
        req->sync.mode = READ_ONCE(sqe->len);
-       req->fsize = rlimit(RLIMIT_FSIZE);
        return 0;
 }
 
@@ -2991,15 +3392,11 @@ static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
        /* fallocate always requiring blocking context */
        if (force_nonblock)
                return -EAGAIN;
-
-       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
        ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
                                req->sync.len);
-       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -3095,8 +3492,7 @@ err:
        req->flags &= ~REQ_F_NEED_CLEANUP;
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -3150,7 +3546,8 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
        return i;
 }
 
-static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
+static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock,
+                            struct io_comp_state *cs)
 {
        struct io_provide_buf *p = &req->pbuf;
        struct io_ring_ctx *ctx = req->ctx;
@@ -3169,8 +3566,7 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
        io_ring_submit_lock(ctx, !force_nonblock);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
@@ -3228,7 +3624,8 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
        return i ? i : -ENOMEM;
 }
 
-static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock,
+                             struct io_comp_state *cs)
 {
        struct io_provide_buf *p = &req->pbuf;
        struct io_ring_ctx *ctx = req->ctx;
@@ -3257,8 +3654,7 @@ out:
        io_ring_submit_unlock(ctx, !force_nonblock);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
@@ -3289,7 +3685,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 #endif
 }
 
-static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
+static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock,
+                       struct io_comp_state *cs)
 {
 #if defined(CONFIG_EPOLL)
        struct io_epoll *ie = &req->epoll;
@@ -3301,8 +3698,7 @@ static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
 
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 #else
        return -EOPNOTSUPP;
@@ -3338,8 +3734,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
        ret = do_madvise(ma->addr, ma->len, ma->advice);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 #else
        return -EOPNOTSUPP;
@@ -3378,8 +3773,7 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
        ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -3418,8 +3812,7 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock)
 
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -3450,7 +3843,8 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static int io_close(struct io_kiocb *req, bool force_nonblock)
+static int io_close(struct io_kiocb *req, bool force_nonblock,
+                   struct io_comp_state *cs)
 {
        struct io_close *close = &req->close;
        int ret;
@@ -3464,8 +3858,10 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
 
        /* if the file has a flush method, be safe and punt to async */
        if (close->put_file->f_op->flush && force_nonblock) {
+               /* was never set, but play safe */
+               req->flags &= ~REQ_F_NOWAIT;
                /* avoid grabbing files - we don't need the files */
-               req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
+               req->flags |= REQ_F_NO_FILE_TABLE;
                return -EAGAIN;
        }
 
@@ -3473,10 +3869,9 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
        ret = filp_close(close->put_file, req->work.files);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
        fput(close->put_file);
        close->put_file = NULL;
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
@@ -3510,8 +3905,7 @@ static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
                                req->sync.flags);
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -3531,6 +3925,15 @@ static int io_setup_async_msg(struct io_kiocb *req,
        return -EAGAIN;
 }
 
+static int io_sendmsg_copy_hdr(struct io_kiocb *req,
+                              struct io_async_msghdr *iomsg)
+{
+       iomsg->iov = iomsg->fast_iov;
+       iomsg->msg.msg_name = &iomsg->addr;
+       return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg,
+                                  req->sr_msg.msg_flags, &iomsg->iov);
+}
+
 static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_sr_msg *sr = &req->sr_msg;
@@ -3541,7 +3944,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return -EINVAL;
 
        sr->msg_flags = READ_ONCE(sqe->msg_flags);
-       sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
 
 #ifdef CONFIG_COMPAT
@@ -3555,136 +3958,126 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (req->flags & REQ_F_NEED_CLEANUP)
                return 0;
 
-       io->msg.msg.msg_name = &io->msg.addr;
-       io->msg.iov = io->msg.fast_iov;
-       ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
-                                       &io->msg.iov);
+       ret = io_sendmsg_copy_hdr(req, &io->msg);
        if (!ret)
                req->flags |= REQ_F_NEED_CLEANUP;
        return ret;
 }
 
-static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
+                     struct io_comp_state *cs)
 {
-       struct io_async_msghdr *kmsg = NULL;
+       struct io_async_msghdr iomsg, *kmsg;
        struct socket *sock;
+       unsigned flags;
        int ret;
 
        sock = sock_from_file(req->file, &ret);
-       if (sock) {
-               struct io_async_ctx io;
-               unsigned flags;
-
-               if (req->io) {
-                       kmsg = &req->io->msg;
-                       kmsg->msg.msg_name = &req->io->msg.addr;
-                       /* if iov is set, it's allocated already */
-                       if (!kmsg->iov)
-                               kmsg->iov = kmsg->fast_iov;
-                       kmsg->msg.msg_iter.iov = kmsg->iov;
-               } else {
-                       struct io_sr_msg *sr = &req->sr_msg;
-
-                       kmsg = &io.msg;
-                       kmsg->msg.msg_name = &io.msg.addr;
+       if (unlikely(!sock))
+               return ret;
 
-                       io.msg.iov = io.msg.fast_iov;
-                       ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
-                                       sr->msg_flags, &io.msg.iov);
-                       if (ret)
-                               return ret;
-               }
+       if (req->io) {
+               kmsg = &req->io->msg;
+               kmsg->msg.msg_name = &req->io->msg.addr;
+               /* if iov is set, it's allocated already */
+               if (!kmsg->iov)
+                       kmsg->iov = kmsg->fast_iov;
+               kmsg->msg.msg_iter.iov = kmsg->iov;
+       } else {
+               ret = io_sendmsg_copy_hdr(req, &iomsg);
+               if (ret)
+                       return ret;
+               kmsg = &iomsg;
+       }
 
-               flags = req->sr_msg.msg_flags;
-               if (flags & MSG_DONTWAIT)
-                       req->flags |= REQ_F_NOWAIT;
-               else if (force_nonblock)
-                       flags |= MSG_DONTWAIT;
+       flags = req->sr_msg.msg_flags;
+       if (flags & MSG_DONTWAIT)
+               req->flags |= REQ_F_NOWAIT;
+       else if (force_nonblock)
+               flags |= MSG_DONTWAIT;
 
-               ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
-               if (force_nonblock && ret == -EAGAIN)
-                       return io_setup_async_msg(req, kmsg);
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-       }
+       ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+       if (force_nonblock && ret == -EAGAIN)
+               return io_setup_async_msg(req, kmsg);
+       if (ret == -ERESTARTSYS)
+               ret = -EINTR;
 
-       if (kmsg && kmsg->iov != kmsg->fast_iov)
+       if (kmsg->iov != kmsg->fast_iov)
                kfree(kmsg->iov);
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       io_cqring_add_event(req, ret);
        if (ret < 0)
                req_set_fail_links(req);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
-static int io_send(struct io_kiocb *req, bool force_nonblock)
+static int io_send(struct io_kiocb *req, bool force_nonblock,
+                  struct io_comp_state *cs)
 {
+       struct io_sr_msg *sr = &req->sr_msg;
+       struct msghdr msg;
+       struct iovec iov;
        struct socket *sock;
+       unsigned flags;
        int ret;
 
        sock = sock_from_file(req->file, &ret);
-       if (sock) {
-               struct io_sr_msg *sr = &req->sr_msg;
-               struct msghdr msg;
-               struct iovec iov;
-               unsigned flags;
+       if (unlikely(!sock))
+               return ret;
 
-               ret = import_single_range(WRITE, sr->buf, sr->len, &iov,
-                                               &msg.msg_iter);
-               if (ret)
-                       return ret;
+       ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
+       if (unlikely(ret))
+               return ret;;
 
-               msg.msg_name = NULL;
-               msg.msg_control = NULL;
-               msg.msg_controllen = 0;
-               msg.msg_namelen = 0;
+       msg.msg_name = NULL;
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       msg.msg_namelen = 0;
 
-               flags = req->sr_msg.msg_flags;
-               if (flags & MSG_DONTWAIT)
-                       req->flags |= REQ_F_NOWAIT;
-               else if (force_nonblock)
-                       flags |= MSG_DONTWAIT;
+       flags = req->sr_msg.msg_flags;
+       if (flags & MSG_DONTWAIT)
+               req->flags |= REQ_F_NOWAIT;
+       else if (force_nonblock)
+               flags |= MSG_DONTWAIT;
 
-               msg.msg_flags = flags;
-               ret = sock_sendmsg(sock, &msg);
-               if (force_nonblock && ret == -EAGAIN)
-                       return -EAGAIN;
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-       }
+       msg.msg_flags = flags;
+       ret = sock_sendmsg(sock, &msg);
+       if (force_nonblock && ret == -EAGAIN)
+               return -EAGAIN;
+       if (ret == -ERESTARTSYS)
+               ret = -EINTR;
 
-       io_cqring_add_event(req, ret);
        if (ret < 0)
                req_set_fail_links(req);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
-static int __io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
+static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
+                                struct io_async_msghdr *iomsg)
 {
        struct io_sr_msg *sr = &req->sr_msg;
        struct iovec __user *uiov;
        size_t iov_len;
        int ret;
 
-       ret = __copy_msghdr_from_user(&io->msg.msg, sr->msg, &io->msg.uaddr,
-                                       &uiov, &iov_len);
+       ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
+                                       &iomsg->uaddr, &uiov, &iov_len);
        if (ret)
                return ret;
 
        if (req->flags & REQ_F_BUFFER_SELECT) {
                if (iov_len > 1)
                        return -EINVAL;
-               if (copy_from_user(io->msg.iov, uiov, sizeof(*uiov)))
+               if (copy_from_user(iomsg->iov, uiov, sizeof(*uiov)))
                        return -EFAULT;
-               sr->len = io->msg.iov[0].iov_len;
-               iov_iter_init(&io->msg.msg.msg_iter, READ, io->msg.iov, 1,
+               sr->len = iomsg->iov[0].iov_len;
+               iov_iter_init(&iomsg->msg.msg_iter, READ, iomsg->iov, 1,
                                sr->len);
-               io->msg.iov = NULL;
+               iomsg->iov = NULL;
        } else {
                ret = import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
-                                       &io->msg.iov, &io->msg.msg.msg_iter);
+                                       &iomsg->iov, &iomsg->msg.msg_iter);
                if (ret > 0)
                        ret = 0;
        }
@@ -3694,7 +4087,7 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
 
 #ifdef CONFIG_COMPAT
 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
-                                       struct io_async_ctx *io)
+                                       struct io_async_msghdr *iomsg)
 {
        struct compat_msghdr __user *msg_compat;
        struct io_sr_msg *sr = &req->sr_msg;
@@ -3703,8 +4096,8 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
        compat_size_t len;
        int ret;
 
-       msg_compat = (struct compat_msghdr __user *) sr->msg;
-       ret = __get_compat_msghdr(&io->msg.msg, msg_compat, &io->msg.uaddr,
+       msg_compat = (struct compat_msghdr __user *) sr->umsg;
+       ret = __get_compat_msghdr(&iomsg->msg, msg_compat, &iomsg->uaddr,
                                        &ptr, &len);
        if (ret)
                return ret;
@@ -3721,12 +4114,12 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
                        return -EFAULT;
                if (clen < 0)
                        return -EINVAL;
-               sr->len = io->msg.iov[0].iov_len;
-               io->msg.iov = NULL;
+               sr->len = iomsg->iov[0].iov_len;
+               iomsg->iov = NULL;
        } else {
                ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
-                                               &io->msg.iov,
-                                               &io->msg.msg.msg_iter);
+                                               &iomsg->iov,
+                                               &iomsg->msg.msg_iter);
                if (ret < 0)
                        return ret;
        }
@@ -3735,40 +4128,40 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
 }
 #endif
 
-static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+                              struct io_async_msghdr *iomsg)
 {
-       io->msg.msg.msg_name = &io->msg.addr;
-       io->msg.iov = io->msg.fast_iov;
+       iomsg->msg.msg_name = &iomsg->addr;
+       iomsg->iov = iomsg->fast_iov;
 
 #ifdef CONFIG_COMPAT
        if (req->ctx->compat)
-               return __io_compat_recvmsg_copy_hdr(req, io);
+               return __io_compat_recvmsg_copy_hdr(req, iomsg);
 #endif
 
-       return __io_recvmsg_copy_hdr(req, io);
+       return __io_recvmsg_copy_hdr(req, iomsg);
 }
 
 static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
-                                              int *cflags, bool needs_lock)
+                                              bool needs_lock)
 {
        struct io_sr_msg *sr = &req->sr_msg;
        struct io_buffer *kbuf;
 
-       if (!(req->flags & REQ_F_BUFFER_SELECT))
-               return NULL;
-
        kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
        if (IS_ERR(kbuf))
                return kbuf;
 
        sr->kbuf = kbuf;
        req->flags |= REQ_F_BUFFER_SELECTED;
-
-       *cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
-       *cflags |= IORING_CQE_F_BUFFER;
        return kbuf;
 }
 
+static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
+{
+       return io_put_kbuf(req, req->sr_msg.kbuf);
+}
+
 static int io_recvmsg_prep(struct io_kiocb *req,
                           const struct io_uring_sqe *sqe)
 {
@@ -3780,7 +4173,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
                return -EINVAL;
 
        sr->msg_flags = READ_ONCE(sqe->msg_flags);
-       sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
        sr->bgid = READ_ONCE(sqe->buf_group);
 
@@ -3795,133 +4188,123 @@ static int io_recvmsg_prep(struct io_kiocb *req,
        if (req->flags & REQ_F_NEED_CLEANUP)
                return 0;
 
-       ret = io_recvmsg_copy_hdr(req, io);
+       ret = io_recvmsg_copy_hdr(req, &io->msg);
        if (!ret)
                req->flags |= REQ_F_NEED_CLEANUP;
        return ret;
 }
 
-static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
+                     struct io_comp_state *cs)
 {
-       struct io_async_msghdr *kmsg = NULL;
+       struct io_async_msghdr iomsg, *kmsg;
        struct socket *sock;
+       struct io_buffer *kbuf;
+       unsigned flags;
        int ret, cflags = 0;
 
        sock = sock_from_file(req->file, &ret);
-       if (sock) {
-               struct io_buffer *kbuf;
-               struct io_async_ctx io;
-               unsigned flags;
-
-               if (req->io) {
-                       kmsg = &req->io->msg;
-                       kmsg->msg.msg_name = &req->io->msg.addr;
-                       /* if iov is set, it's allocated already */
-                       if (!kmsg->iov)
-                               kmsg->iov = kmsg->fast_iov;
-                       kmsg->msg.msg_iter.iov = kmsg->iov;
-               } else {
-                       kmsg = &io.msg;
-                       kmsg->msg.msg_name = &io.msg.addr;
+       if (unlikely(!sock))
+               return ret;
 
-                       ret = io_recvmsg_copy_hdr(req, &io);
-                       if (ret)
-                               return ret;
-               }
+       if (req->io) {
+               kmsg = &req->io->msg;
+               kmsg->msg.msg_name = &req->io->msg.addr;
+               /* if iov is set, it's allocated already */
+               if (!kmsg->iov)
+                       kmsg->iov = kmsg->fast_iov;
+               kmsg->msg.msg_iter.iov = kmsg->iov;
+       } else {
+               ret = io_recvmsg_copy_hdr(req, &iomsg);
+               if (ret)
+                       return ret;
+               kmsg = &iomsg;
+       }
 
-               kbuf = io_recv_buffer_select(req, &cflags, !force_nonblock);
-               if (IS_ERR(kbuf)) {
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               kbuf = io_recv_buffer_select(req, !force_nonblock);
+               if (IS_ERR(kbuf))
                        return PTR_ERR(kbuf);
-               } else if (kbuf) {
-                       kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
-                       iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov,
-                                       1, req->sr_msg.len);
-               }
+               kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov,
+                               1, req->sr_msg.len);
+       }
 
-               flags = req->sr_msg.msg_flags;
-               if (flags & MSG_DONTWAIT)
-                       req->flags |= REQ_F_NOWAIT;
-               else if (force_nonblock)
-                       flags |= MSG_DONTWAIT;
+       flags = req->sr_msg.msg_flags;
+       if (flags & MSG_DONTWAIT)
+               req->flags |= REQ_F_NOWAIT;
+       else if (force_nonblock)
+               flags |= MSG_DONTWAIT;
 
-               ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
-                                               kmsg->uaddr, flags);
-               if (force_nonblock && ret == -EAGAIN) {
-                       ret = io_setup_async_msg(req, kmsg);
-                       if (ret != -EAGAIN)
-                               kfree(kbuf);
-                       return ret;
-               }
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               if (kbuf)
-                       kfree(kbuf);
-       }
+       ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
+                                       kmsg->uaddr, flags);
+       if (force_nonblock && ret == -EAGAIN)
+               return io_setup_async_msg(req, kmsg);
+       if (ret == -ERESTARTSYS)
+               ret = -EINTR;
 
-       if (kmsg && kmsg->iov != kmsg->fast_iov)
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
+       if (kmsg->iov != kmsg->fast_iov)
                kfree(kmsg->iov);
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       __io_cqring_add_event(req, ret, cflags);
        if (ret < 0)
                req_set_fail_links(req);
-       io_put_req(req);
+       __io_req_complete(req, ret, cflags, cs);
        return 0;
 }
 
-static int io_recv(struct io_kiocb *req, bool force_nonblock)
+static int io_recv(struct io_kiocb *req, bool force_nonblock,
+                  struct io_comp_state *cs)
 {
-       struct io_buffer *kbuf = NULL;
+       struct io_buffer *kbuf;
+       struct io_sr_msg *sr = &req->sr_msg;
+       struct msghdr msg;
+       void __user *buf = sr->buf;
        struct socket *sock;
+       struct iovec iov;
+       unsigned flags;
        int ret, cflags = 0;
 
        sock = sock_from_file(req->file, &ret);
-       if (sock) {
-               struct io_sr_msg *sr = &req->sr_msg;
-               void __user *buf = sr->buf;
-               struct msghdr msg;
-               struct iovec iov;
-               unsigned flags;
+       if (unlikely(!sock))
+               return ret;
 
-               kbuf = io_recv_buffer_select(req, &cflags, !force_nonblock);
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               kbuf = io_recv_buffer_select(req, !force_nonblock);
                if (IS_ERR(kbuf))
                        return PTR_ERR(kbuf);
-               else if (kbuf)
-                       buf = u64_to_user_ptr(kbuf->addr);
+               buf = u64_to_user_ptr(kbuf->addr);
+       }
 
-               ret = import_single_range(READ, buf, sr->len, &iov,
-                                               &msg.msg_iter);
-               if (ret) {
-                       kfree(kbuf);
-                       return ret;
-               }
+       ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
+       if (unlikely(ret))
+               goto out_free;
 
-               req->flags |= REQ_F_NEED_CLEANUP;
-               msg.msg_name = NULL;
-               msg.msg_control = NULL;
-               msg.msg_controllen = 0;
-               msg.msg_namelen = 0;
-               msg.msg_iocb = NULL;
-               msg.msg_flags = 0;
-
-               flags = req->sr_msg.msg_flags;
-               if (flags & MSG_DONTWAIT)
-                       req->flags |= REQ_F_NOWAIT;
-               else if (force_nonblock)
-                       flags |= MSG_DONTWAIT;
-
-               ret = sock_recvmsg(sock, &msg, flags);
-               if (force_nonblock && ret == -EAGAIN)
-                       return -EAGAIN;
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-       }
+       msg.msg_name = NULL;
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       msg.msg_namelen = 0;
+       msg.msg_iocb = NULL;
+       msg.msg_flags = 0;
 
-       kfree(kbuf);
-       req->flags &= ~REQ_F_NEED_CLEANUP;
-       __io_cqring_add_event(req, ret, cflags);
+       flags = req->sr_msg.msg_flags;
+       if (flags & MSG_DONTWAIT)
+               req->flags |= REQ_F_NOWAIT;
+       else if (force_nonblock)
+               flags |= MSG_DONTWAIT;
+
+       ret = sock_recvmsg(sock, &msg, flags);
+       if (force_nonblock && ret == -EAGAIN)
+               return -EAGAIN;
+       if (ret == -ERESTARTSYS)
+               ret = -EINTR;
+out_free:
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
        if (ret < 0)
                req_set_fail_links(req);
-       io_put_req(req);
+       __io_req_complete(req, ret, cflags, cs);
        return 0;
 }
 
@@ -3941,7 +4324,8 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock,
+                    struct io_comp_state *cs)
 {
        struct io_accept *accept = &req->accept;
        unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -3960,8 +4344,7 @@ static int io_accept(struct io_kiocb *req, bool force_nonblock)
                        ret = -EINTR;
                req_set_fail_links(req);
        }
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
@@ -3985,7 +4368,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                                        &io->connect.address);
 }
 
-static int io_connect(struct io_kiocb *req, bool force_nonblock)
+static int io_connect(struct io_kiocb *req, bool force_nonblock,
+                     struct io_comp_state *cs)
 {
        struct io_async_ctx __io, *io;
        unsigned file_flags;
@@ -4021,8 +4405,7 @@ static int io_connect(struct io_kiocb *req, bool force_nonblock)
 out:
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 #else /* !CONFIG_NET */
@@ -4031,12 +4414,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return -EOPNOTSUPP;
 }
 
-static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
+                     struct io_comp_state *cs)
 {
        return -EOPNOTSUPP;
 }
 
-static int io_send(struct io_kiocb *req, bool force_nonblock)
+static int io_send(struct io_kiocb *req, bool force_nonblock,
+                  struct io_comp_state *cs)
 {
        return -EOPNOTSUPP;
 }
@@ -4047,12 +4432,14 @@ static int io_recvmsg_prep(struct io_kiocb *req,
        return -EOPNOTSUPP;
 }
 
-static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
+                     struct io_comp_state *cs)
 {
        return -EOPNOTSUPP;
 }
 
-static int io_recv(struct io_kiocb *req, bool force_nonblock)
+static int io_recv(struct io_kiocb *req, bool force_nonblock,
+                  struct io_comp_state *cs)
 {
        return -EOPNOTSUPP;
 }
@@ -4062,7 +4449,8 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return -EOPNOTSUPP;
 }
 
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock,
+                    struct io_comp_state *cs)
 {
        return -EOPNOTSUPP;
 }
@@ -4072,7 +4460,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return -EOPNOTSUPP;
 }
 
-static int io_connect(struct io_kiocb *req, bool force_nonblock)
+static int io_connect(struct io_kiocb *req, bool force_nonblock,
+                     struct io_comp_state *cs)
 {
        return -EOPNOTSUPP;
 }
@@ -4084,33 +4473,9 @@ struct io_poll_table {
        int error;
 };
 
-static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
-{
-       struct task_struct *tsk = req->task;
-       struct io_ring_ctx *ctx = req->ctx;
-       int ret, notify = TWA_RESUME;
-
-       /*
-        * SQPOLL kernel thread doesn't need notification, just a wakeup.
-        * If we're not using an eventfd, then TWA_RESUME is always fine,
-        * as we won't have dependencies between request completions for
-        * other kernel wait conditions.
-        */
-       if (ctx->flags & IORING_SETUP_SQPOLL)
-               notify = 0;
-       else if (ctx->cq_ev_fd)
-               notify = TWA_SIGNAL;
-
-       ret = task_work_add(tsk, cb, notify);
-       if (!ret)
-               wake_up_process(tsk);
-       return ret;
-}
-
 static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
                           __poll_t mask, task_work_func_t func)
 {
-       struct task_struct *tsk;
        int ret;
 
        /* for instances that support it check for an event match first: */
@@ -4121,7 +4486,6 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 
        list_del_init(&poll->wait.entry);
 
-       tsk = req->task;
        req->result = mask;
        init_task_work(&req->task_work, func);
        /*
@@ -4132,6 +4496,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
         */
        ret = io_req_task_work_add(req, &req->task_work);
        if (unlikely(ret)) {
+               struct task_struct *tsk;
+
                WRITE_ONCE(poll->canceled, true);
                tsk = io_wq_get_task(req->ctx->io_wq);
                task_work_add(tsk, &req->task_work, 0);
@@ -4200,7 +4566,7 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
        hash_del(&req->hash_node);
        io_poll_complete(req, req->result, 0);
        req->flags |= REQ_F_COMP_LOCKED;
-       io_put_req_find_next(req, nxt);
+       *nxt = io_put_req_find_next(req);
        spin_unlock_irq(&ctx->completion_lock);
 
        io_cqring_ev_posted(ctx);
@@ -4212,13 +4578,8 @@ static void io_poll_task_func(struct callback_head *cb)
        struct io_kiocb *nxt = NULL;
 
        io_poll_task_handler(req, &nxt);
-       if (nxt) {
-               struct io_ring_ctx *ctx = nxt->ctx;
-
-               mutex_lock(&ctx->uring_lock);
-               __io_queue_sqe(nxt, NULL);
-               mutex_unlock(&ctx->uring_lock);
-       }
+       if (nxt)
+               __io_req_task_submit(nxt);
 }
 
 static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
@@ -4288,7 +4649,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 
        pt->error = 0;
        poll->head = head;
-       add_wait_queue(head, &poll->wait);
+
+       if (poll->events & EPOLLEXCLUSIVE)
+               add_wait_queue_exclusive(head, &poll->wait);
+       else
+               add_wait_queue(head, &poll->wait);
 }
 
 static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
@@ -4300,34 +4665,11 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
        __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
 }
 
-static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
-{
-       struct mm_struct *mm = current->mm;
-
-       if (mm) {
-               kthread_unuse_mm(mm);
-               mmput(mm);
-       }
-}
-
-static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
-                                  struct io_kiocb *req)
-{
-       if (io_op_defs[req->opcode].needs_mm && !current->mm) {
-               if (unlikely(!mmget_not_zero(ctx->sqo_mm)))
-                       return -EFAULT;
-               kthread_use_mm(ctx->sqo_mm);
-       }
-
-       return 0;
-}
-
 static void io_async_task_func(struct callback_head *cb)
 {
        struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
        struct async_poll *apoll = req->apoll;
        struct io_ring_ctx *ctx = req->ctx;
-       bool canceled = false;
 
        trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
 
@@ -4337,40 +4679,19 @@ static void io_async_task_func(struct callback_head *cb)
        }
 
        /* If req is still hashed, it cannot have been canceled. Don't check. */
-       if (hash_hashed(&req->hash_node)) {
+       if (hash_hashed(&req->hash_node))
                hash_del(&req->hash_node);
-       } else {
-               canceled = READ_ONCE(apoll->poll.canceled);
-               if (canceled) {
-                       io_cqring_fill_event(req, -ECANCELED);
-                       io_commit_cqring(ctx);
-               }
-       }
 
        io_poll_remove_double(req, apoll->double_poll);
        spin_unlock_irq(&ctx->completion_lock);
 
-       /* restore ->work in case we need to retry again */
-       if (req->flags & REQ_F_WORK_INITIALIZED)
-               memcpy(&req->work, &apoll->work, sizeof(req->work));
+       if (!READ_ONCE(apoll->poll.canceled))
+               __io_req_task_submit(req);
+       else
+               __io_req_task_cancel(req, -ECANCELED);
+
        kfree(apoll->double_poll);
        kfree(apoll);
-
-       if (!canceled) {
-               __set_current_state(TASK_RUNNING);
-               if (io_sq_thread_acquire_mm(ctx, req)) {
-                       io_cqring_add_event(req, -EFAULT);
-                       goto end_req;
-               }
-               mutex_lock(&ctx->uring_lock);
-               __io_queue_sqe(req, NULL);
-               mutex_unlock(&ctx->uring_lock);
-       } else {
-               io_cqring_ev_posted(ctx);
-end_req:
-               req_set_fail_links(req);
-               io_double_put_req(req);
-       }
 }
 
 static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -4403,8 +4724,8 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
        struct io_ring_ctx *ctx = req->ctx;
        bool cancel = false;
 
-       poll->file = req->file;
        io_init_poll_iocb(poll, mask, wake_func);
+       poll->file = req->file;
        poll->wait.private = req;
 
        ipt->pt._key = mask;
@@ -4444,7 +4765,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 
        if (!req->file || !file_can_poll(req->file))
                return false;
-       if (req->flags & (REQ_F_MUST_PUNT | REQ_F_POLLED))
+       if (req->flags & REQ_F_POLLED)
                return false;
        if (!def->pollin && !def->pollout)
                return false;
@@ -4455,9 +4776,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
        apoll->double_poll = NULL;
 
        req->flags |= REQ_F_POLLED;
-       if (req->flags & REQ_F_WORK_INITIALIZED)
-               memcpy(&apoll->work, &req->work, sizeof(req->work));
-
        io_get_req_task(req);
        req->apoll = apoll;
        INIT_HLIST_NODE(&req->hash_node);
@@ -4476,8 +4794,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
        if (ret) {
                io_poll_remove_double(req, apoll->double_poll);
                spin_unlock_irq(&ctx->completion_lock);
-               if (req->flags & REQ_F_WORK_INITIALIZED)
-                       memcpy(&req->work, &apoll->work, sizeof(req->work));
                kfree(apoll->double_poll);
                kfree(apoll);
                return false;
@@ -4520,14 +4836,6 @@ static bool io_poll_remove_one(struct io_kiocb *req)
                do_complete = __io_poll_remove_one(req, &apoll->poll);
                if (do_complete) {
                        io_put_req(req);
-                       /*
-                        * restore ->work because we will call
-                        * io_req_work_drop_env below when dropping the
-                        * final reference.
-                        */
-                       if (req->flags & REQ_F_WORK_INITIALIZED)
-                               memcpy(&req->work, &apoll->work,
-                                      sizeof(req->work));
                        kfree(apoll->double_poll);
                        kfree(apoll);
                }
@@ -4608,10 +4916,9 @@ static int io_poll_remove(struct io_kiocb *req)
        ret = io_poll_cancel(ctx, addr);
        spin_unlock_irq(&ctx->completion_lock);
 
-       io_cqring_add_event(req, ret);
        if (ret < 0)
                req_set_fail_links(req);
-       io_put_req(req);
+       io_req_complete(req, ret);
        return 0;
 }
 
@@ -4635,7 +4942,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_poll_iocb *poll = &req->poll;
-       u16 events;
+       u32 events;
 
        if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                return -EINVAL;
@@ -4644,8 +4951,12 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
        if (!poll->file)
                return -EBADF;
 
-       events = READ_ONCE(sqe->poll_events);
-       poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+       events = READ_ONCE(sqe->poll32_events);
+#ifdef __BIG_ENDIAN
+       events = swahw32(events);
+#endif
+       poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP |
+                      (events & EPOLLEXCLUSIVE);
 
        io_get_req_task(req);
        return 0;
@@ -4659,7 +4970,6 @@ static int io_poll_add(struct io_kiocb *req)
        __poll_t mask;
 
        INIT_HLIST_NODE(&req->hash_node);
-       INIT_LIST_HEAD(&req->list);
        ipt.pt._qproc = io_poll_queue_proc;
 
        mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
@@ -4686,15 +4996,16 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
        struct io_ring_ctx *ctx = req->ctx;
        unsigned long flags;
 
-       atomic_inc(&ctx->cq_timeouts);
-
        spin_lock_irqsave(&ctx->completion_lock, flags);
+       atomic_set(&req->ctx->cq_timeouts,
+               atomic_read(&req->ctx->cq_timeouts) + 1);
+
        /*
         * We could be racing with timeout deletion. If the list is empty,
         * then timeout lookup already found it and will be handling it.
         */
-       if (!list_empty(&req->list))
-               list_del_init(&req->list);
+       if (!list_empty(&req->timeout.list))
+               list_del_init(&req->timeout.list);
 
        io_cqring_fill_event(req, -ETIME);
        io_commit_cqring(ctx);
@@ -4711,9 +5022,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
        struct io_kiocb *req;
        int ret = -ENOENT;
 
-       list_for_each_entry(req, &ctx->timeout_list, list) {
+       list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
                if (user_data == req->user_data) {
-                       list_del_init(&req->list);
+                       list_del_init(&req->timeout.list);
                        ret = 0;
                        break;
                }
@@ -4795,7 +5106,6 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 
        data = &req->io->timeout;
        data->req = req;
-       req->flags |= REQ_F_TIMEOUT;
 
        if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
                return -EFAULT;
@@ -4823,8 +5133,7 @@ static int io_timeout(struct io_kiocb *req)
         * timeout event to be satisfied. If it isn't set, then this is
         * a pure timeout request, sequence isn't used.
         */
-       if (!off) {
-               req->flags |= REQ_F_TIMEOUT_NOSEQ;
+       if (io_is_timeout_noseq(req)) {
                entry = ctx->timeout_list.prev;
                goto add;
        }
@@ -4837,16 +5146,17 @@ static int io_timeout(struct io_kiocb *req)
         * the one we need first.
         */
        list_for_each_prev(entry, &ctx->timeout_list) {
-               struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+               struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
+                                                 timeout.list);
 
-               if (nxt->flags & REQ_F_TIMEOUT_NOSEQ)
+               if (io_is_timeout_noseq(nxt))
                        continue;
                /* nxt.seq is behind @tail, otherwise would've been completed */
                if (off >= nxt->timeout.target_seq - tail)
                        break;
        }
 add:
-       list_add(&req->list, entry);
+       list_add(&req->timeout.list, entry);
        data->timer.function = io_timeout_fn;
        hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
        spin_unlock_irq(&ctx->completion_lock);
@@ -4950,7 +5260,8 @@ static int io_files_update_prep(struct io_kiocb *req,
        return 0;
 }
 
-static int io_files_update(struct io_kiocb *req, bool force_nonblock)
+static int io_files_update(struct io_kiocb *req, bool force_nonblock,
+                          struct io_comp_state *cs)
 {
        struct io_ring_ctx *ctx = req->ctx;
        struct io_uring_files_update up;
@@ -4968,8 +5279,7 @@ static int io_files_update(struct io_kiocb *req, bool force_nonblock)
 
        if (ret < 0)
                req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
+       __io_req_complete(req, ret, 0, cs);
        return 0;
 }
 
@@ -4981,15 +5291,11 @@ static int io_req_defer_prep(struct io_kiocb *req,
        if (!sqe)
                return 0;
 
-       io_req_init_async(req);
-
-       if (io_op_defs[req->opcode].file_table) {
-               ret = io_grab_files(req);
-               if (unlikely(ret))
-                       return ret;
-       }
-
-       io_req_work_grab_env(req, &io_op_defs[req->opcode]);
+       if (io_alloc_async_ctx(req))
+               return -EAGAIN;
+       ret = io_prep_work_files(req);
+       if (unlikely(ret))
+               return ret;
 
        switch (req->opcode) {
        case IORING_OP_NOP:
@@ -5091,86 +5397,117 @@ static int io_req_defer_prep(struct io_kiocb *req,
        return ret;
 }
 
+static u32 io_get_sequence(struct io_kiocb *req)
+{
+       struct io_kiocb *pos;
+       struct io_ring_ctx *ctx = req->ctx;
+       u32 total_submitted, nr_reqs = 1;
+
+       if (req->flags & REQ_F_LINK_HEAD)
+               list_for_each_entry(pos, &req->link_list, link_list)
+                       nr_reqs++;
+
+       total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
+       return total_submitted - nr_reqs;
+}
+
 static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_ring_ctx *ctx = req->ctx;
+       struct io_defer_entry *de;
        int ret;
+       u32 seq;
 
        /* Still need defer if there is pending req in defer list. */
-       if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list))
+       if (likely(list_empty_careful(&ctx->defer_list) &&
+               !(req->flags & REQ_F_IO_DRAIN)))
+               return 0;
+
+       seq = io_get_sequence(req);
+       /* Still a chance to pass the sequence check */
+       if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
                return 0;
 
        if (!req->io) {
-               if (io_alloc_async_ctx(req))
-                       return -EAGAIN;
                ret = io_req_defer_prep(req, sqe);
-               if (ret < 0)
+               if (ret)
                        return ret;
        }
+       io_prep_async_link(req);
+       de = kmalloc(sizeof(*de), GFP_KERNEL);
+       if (!de)
+               return -ENOMEM;
 
        spin_lock_irq(&ctx->completion_lock);
-       if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
+       if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
                spin_unlock_irq(&ctx->completion_lock);
-               return 0;
+               kfree(de);
+               io_queue_async_work(req);
+               return -EIOCBQUEUED;
        }
 
        trace_io_uring_defer(ctx, req, req->user_data);
-       list_add_tail(&req->list, &ctx->defer_list);
+       de->req = req;
+       de->seq = seq;
+       list_add_tail(&de->list, &ctx->defer_list);
        spin_unlock_irq(&ctx->completion_lock);
        return -EIOCBQUEUED;
 }
 
-static void io_cleanup_req(struct io_kiocb *req)
+static void __io_clean_op(struct io_kiocb *req)
 {
        struct io_async_ctx *io = req->io;
 
-       switch (req->opcode) {
-       case IORING_OP_READV:
-       case IORING_OP_READ_FIXED:
-       case IORING_OP_READ:
-               if (req->flags & REQ_F_BUFFER_SELECTED)
+       if (req->flags & REQ_F_BUFFER_SELECTED) {
+               switch (req->opcode) {
+               case IORING_OP_READV:
+               case IORING_OP_READ_FIXED:
+               case IORING_OP_READ:
                        kfree((void *)(unsigned long)req->rw.addr);
-               /* fallthrough */
-       case IORING_OP_WRITEV:
-       case IORING_OP_WRITE_FIXED:
-       case IORING_OP_WRITE:
-               if (io->rw.iov != io->rw.fast_iov)
-                       kfree(io->rw.iov);
-               break;
-       case IORING_OP_RECVMSG:
-               if (req->flags & REQ_F_BUFFER_SELECTED)
-                       kfree(req->sr_msg.kbuf);
-               /* fallthrough */
-       case IORING_OP_SENDMSG:
-               if (io->msg.iov != io->msg.fast_iov)
-                       kfree(io->msg.iov);
-               break;
-       case IORING_OP_RECV:
-               if (req->flags & REQ_F_BUFFER_SELECTED)
+                       break;
+               case IORING_OP_RECVMSG:
+               case IORING_OP_RECV:
                        kfree(req->sr_msg.kbuf);
-               break;
-       case IORING_OP_OPENAT:
-       case IORING_OP_OPENAT2:
-               break;
-       case IORING_OP_SPLICE:
-       case IORING_OP_TEE:
-               io_put_file(req, req->splice.file_in,
-                           (req->splice.flags & SPLICE_F_FD_IN_FIXED));
-               break;
+                       break;
+               }
+               req->flags &= ~REQ_F_BUFFER_SELECTED;
+       }
+
+       if (req->flags & REQ_F_NEED_CLEANUP) {
+               switch (req->opcode) {
+               case IORING_OP_READV:
+               case IORING_OP_READ_FIXED:
+               case IORING_OP_READ:
+               case IORING_OP_WRITEV:
+               case IORING_OP_WRITE_FIXED:
+               case IORING_OP_WRITE:
+                       if (io->rw.iov != io->rw.fast_iov)
+                               kfree(io->rw.iov);
+                       break;
+               case IORING_OP_RECVMSG:
+               case IORING_OP_SENDMSG:
+                       if (io->msg.iov != io->msg.fast_iov)
+                               kfree(io->msg.iov);
+                       break;
+               case IORING_OP_SPLICE:
+               case IORING_OP_TEE:
+                       io_put_file(req, req->splice.file_in,
+                                   (req->splice.flags & SPLICE_F_FD_IN_FIXED));
+                       break;
+               }
+               req->flags &= ~REQ_F_NEED_CLEANUP;
        }
-
-       req->flags &= ~REQ_F_NEED_CLEANUP;
 }
 
 static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-                       bool force_nonblock)
+                       bool force_nonblock, struct io_comp_state *cs)
 {
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
        switch (req->opcode) {
        case IORING_OP_NOP:
-               ret = io_nop(req);
+               ret = io_nop(req, cs);
                break;
        case IORING_OP_READV:
        case IORING_OP_READ_FIXED:
@@ -5180,7 +5517,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret < 0)
                                break;
                }
-               ret = io_read(req, force_nonblock);
+               ret = io_read(req, force_nonblock, cs);
                break;
        case IORING_OP_WRITEV:
        case IORING_OP_WRITE_FIXED:
@@ -5190,7 +5527,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret < 0)
                                break;
                }
-               ret = io_write(req, force_nonblock);
+               ret = io_write(req, force_nonblock, cs);
                break;
        case IORING_OP_FSYNC:
                if (sqe) {
@@ -5232,9 +5569,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                                break;
                }
                if (req->opcode == IORING_OP_SENDMSG)
-                       ret = io_sendmsg(req, force_nonblock);
+                       ret = io_sendmsg(req, force_nonblock, cs);
                else
-                       ret = io_send(req, force_nonblock);
+                       ret = io_send(req, force_nonblock, cs);
                break;
        case IORING_OP_RECVMSG:
        case IORING_OP_RECV:
@@ -5244,9 +5581,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                                break;
                }
                if (req->opcode == IORING_OP_RECVMSG)
-                       ret = io_recvmsg(req, force_nonblock);
+                       ret = io_recvmsg(req, force_nonblock, cs);
                else
-                       ret = io_recv(req, force_nonblock);
+                       ret = io_recv(req, force_nonblock, cs);
                break;
        case IORING_OP_TIMEOUT:
                if (sqe) {
@@ -5270,7 +5607,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_accept(req, force_nonblock);
+               ret = io_accept(req, force_nonblock, cs);
                break;
        case IORING_OP_CONNECT:
                if (sqe) {
@@ -5278,7 +5615,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_connect(req, force_nonblock);
+               ret = io_connect(req, force_nonblock, cs);
                break;
        case IORING_OP_ASYNC_CANCEL:
                if (sqe) {
@@ -5310,7 +5647,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_close(req, force_nonblock);
+               ret = io_close(req, force_nonblock, cs);
                break;
        case IORING_OP_FILES_UPDATE:
                if (sqe) {
@@ -5318,7 +5655,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_files_update(req, force_nonblock);
+               ret = io_files_update(req, force_nonblock, cs);
                break;
        case IORING_OP_STATX:
                if (sqe) {
@@ -5358,7 +5695,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_epoll_ctl(req, force_nonblock);
+               ret = io_epoll_ctl(req, force_nonblock, cs);
                break;
        case IORING_OP_SPLICE:
                if (sqe) {
@@ -5374,7 +5711,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_provide_buffers(req, force_nonblock);
+               ret = io_provide_buffers(req, force_nonblock, cs);
                break;
        case IORING_OP_REMOVE_BUFFERS:
                if (sqe) {
@@ -5382,7 +5719,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        if (ret)
                                break;
                }
-               ret = io_remove_buffers(req, force_nonblock);
+               ret = io_remove_buffers(req, force_nonblock, cs);
                break;
        case IORING_OP_TEE:
                if (sqe) {
@@ -5417,25 +5754,15 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        return 0;
 }
 
-static void io_arm_async_linked_timeout(struct io_kiocb *req)
-{
-       struct io_kiocb *link;
-
-       /* link head's timeout is queued in io_queue_async_work() */
-       if (!(req->flags & REQ_F_QUEUE_TIMEOUT))
-               return;
-
-       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
-       io_queue_linked_timeout(link);
-}
-
-static void io_wq_submit_work(struct io_wq_work **workptr)
+static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
 {
-       struct io_wq_work *work = *workptr;
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+       struct io_kiocb *timeout;
        int ret = 0;
 
-       io_arm_async_linked_timeout(req);
+       timeout = io_prep_linked_timeout(req);
+       if (timeout)
+               io_queue_linked_timeout(timeout);
 
        /* if NO_CANCEL is set, we must still run the work */
        if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
@@ -5445,7 +5772,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 
        if (!ret) {
                do {
-                       ret = io_issue_sqe(req, NULL, false);
+                       ret = io_issue_sqe(req, NULL, false, NULL);
                        /*
                         * We can get EAGAIN for polled IO even though we're
                         * forcing a sync submission from here, since we can't
@@ -5459,11 +5786,10 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 
        if (ret) {
                req_set_fail_links(req);
-               io_cqring_add_event(req, ret);
-               io_put_req(req);
+               io_req_complete(req, ret);
        }
 
-       io_steal_work(req, workptr);
+       return io_steal_work(req);
 }
 
 static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
@@ -5520,6 +5846,8 @@ static int io_grab_files(struct io_kiocb *req)
        int ret = -EBADF;
        struct io_ring_ctx *ctx = req->ctx;
 
+       io_req_init_async(req);
+
        if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE))
                return 0;
        if (!ctx->ring_file)
@@ -5545,6 +5873,13 @@ static int io_grab_files(struct io_kiocb *req)
        return ret;
 }
 
+static inline int io_prep_work_files(struct io_kiocb *req)
+{
+       if (!io_op_defs[req->opcode].file_table)
+               return 0;
+       return io_grab_files(req);
+}
+
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 {
        struct io_timeout_data *data = container_of(timer,
@@ -5577,8 +5912,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
                io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
                io_put_req(prev);
        } else {
-               io_cqring_add_event(req, -ETIME);
-               io_put_req(req);
+               io_req_complete(req, -ETIME);
        }
        return HRTIMER_NORESTART;
 }
@@ -5611,8 +5945,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
 
        if (!(req->flags & REQ_F_LINK_HEAD))
                return NULL;
-       /* for polled retry, if flag is set, we already went through here */
-       if (req->flags & REQ_F_POLLED)
+       if (req->flags & REQ_F_LINK_TIMEOUT)
                return NULL;
 
        nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
@@ -5624,7 +5957,8 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
        return nxt;
 }
 
-static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+                          struct io_comp_state *cs)
 {
        struct io_kiocb *linked_timeout;
        struct io_kiocb *nxt;
@@ -5644,54 +5978,45 @@ again:
                        old_creds = override_creds(req->work.creds);
        }
 
-       ret = io_issue_sqe(req, sqe, true);
+       ret = io_issue_sqe(req, sqe, true, cs);
 
        /*
         * We async punt it if the file wasn't marked NOWAIT, or if the file
         * doesn't support non-blocking read/write attempts
         */
-       if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
-           (req->flags & REQ_F_MUST_PUNT))) {
-               if (io_arm_poll_handler(req)) {
-                       if (linked_timeout)
-                               io_queue_linked_timeout(linked_timeout);
-                       goto exit;
-               }
+       if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+               if (!io_arm_poll_handler(req)) {
 punt:
-               io_req_init_async(req);
-
-               if (io_op_defs[req->opcode].file_table) {
-                       ret = io_grab_files(req);
-                       if (ret)
+                       ret = io_prep_work_files(req);
+                       if (unlikely(ret))
                                goto err;
+                       /*
+                        * Queued up for async execution, worker will release
+                        * submit reference when the iocb is actually submitted.
+                        */
+                       io_queue_async_work(req);
                }
 
-               /*
-                * Queued up for async execution, worker will release
-                * submit reference when the iocb is actually submitted.
-                */
-               io_queue_async_work(req);
+               if (linked_timeout)
+                       io_queue_linked_timeout(linked_timeout);
                goto exit;
        }
 
+       if (unlikely(ret)) {
 err:
-       nxt = NULL;
-       /* drop submission reference */
-       io_put_req_find_next(req, &nxt);
-
-       if (linked_timeout) {
-               if (!ret)
-                       io_queue_linked_timeout(linked_timeout);
-               else
-                       io_put_req(linked_timeout);
-       }
-
-       /* and drop final reference, if we failed */
-       if (ret) {
-               io_cqring_add_event(req, ret);
+               /* un-prep timeout, so it'll be killed as any other linked */
+               req->flags &= ~REQ_F_LINK_TIMEOUT;
                req_set_fail_links(req);
                io_put_req(req);
+               io_req_complete(req, ret);
+               goto exit;
        }
+
+       /* drop submission reference */
+       nxt = io_put_req_find_next(req);
+       if (linked_timeout)
+               io_queue_linked_timeout(linked_timeout);
+
        if (nxt) {
                req = nxt;
 
@@ -5704,7 +6029,8 @@ exit:
                revert_creds(old_creds);
 }
 
-static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+                        struct io_comp_state *cs)
 {
        int ret;
 
@@ -5712,17 +6038,14 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (ret) {
                if (ret != -EIOCBQUEUED) {
 fail_req:
-                       io_cqring_add_event(req, ret);
                        req_set_fail_links(req);
-                       io_double_put_req(req);
+                       io_put_req(req);
+                       io_req_complete(req, ret);
                }
        } else if (req->flags & REQ_F_FORCE_ASYNC) {
                if (!req->io) {
-                       ret = -EAGAIN;
-                       if (io_alloc_async_ctx(req))
-                               goto fail_req;
                        ret = io_req_defer_prep(req, sqe);
-                       if (unlikely(ret < 0))
+                       if (unlikely(ret))
                                goto fail_req;
                }
 
@@ -5734,21 +6057,22 @@ fail_req:
                req->work.flags |= IO_WQ_WORK_CONCURRENT;
                io_queue_async_work(req);
        } else {
-               __io_queue_sqe(req, sqe);
+               __io_queue_sqe(req, sqe, cs);
        }
 }
 
-static inline void io_queue_link_head(struct io_kiocb *req)
+static inline void io_queue_link_head(struct io_kiocb *req,
+                                     struct io_comp_state *cs)
 {
        if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
-               io_cqring_add_event(req, -ECANCELED);
-               io_double_put_req(req);
+               io_put_req(req);
+               io_req_complete(req, -ECANCELED);
        } else
-               io_queue_sqe(req, NULL);
+               io_queue_sqe(req, NULL, cs);
 }
 
 static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-                        struct io_kiocb **link)
+                        struct io_kiocb **link, struct io_comp_state *cs)
 {
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
@@ -5774,21 +6098,19 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        head->flags |= REQ_F_IO_DRAIN;
                        ctx->drain_next = 1;
                }
-               if (io_alloc_async_ctx(req))
-                       return -EAGAIN;
-
                ret = io_req_defer_prep(req, sqe);
-               if (ret) {
+               if (unlikely(ret)) {
                        /* fail even hard links since we don't submit */
                        head->flags |= REQ_F_FAIL_LINK;
                        return ret;
                }
                trace_io_uring_link(ctx, req, head);
+               io_get_req_task(req);
                list_add_tail(&req->link_list, &head->link_list);
 
                /* last request of a link, enqueue the link */
                if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-                       io_queue_link_head(head);
+                       io_queue_link_head(head, cs);
                        *link = NULL;
                }
        } else {
@@ -5800,15 +6122,12 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        req->flags |= REQ_F_LINK_HEAD;
                        INIT_LIST_HEAD(&req->link_list);
 
-                       if (io_alloc_async_ctx(req))
-                               return -EAGAIN;
-
                        ret = io_req_defer_prep(req, sqe);
-                       if (ret)
+                       if (unlikely(ret))
                                req->flags |= REQ_F_FAIL_LINK;
                        *link = req;
                } else {
-                       io_queue_sqe(req, sqe);
+                       io_queue_sqe(req, sqe, cs);
                }
        }
 
@@ -5820,6 +6139,8 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
  */
 static void io_submit_state_end(struct io_submit_state *state)
 {
+       if (!list_empty(&state->comp.list))
+               io_submit_flush_completions(&state->comp);
        blk_finish_plug(&state->plug);
        io_state_file_put(state);
        if (state->free_reqs)
@@ -5830,9 +6151,15 @@ static void io_submit_state_end(struct io_submit_state *state)
  * Start submission side cache.
  */
 static void io_submit_state_start(struct io_submit_state *state,
-                                 unsigned int max_ios)
+                                 struct io_ring_ctx *ctx, unsigned int max_ios)
 {
        blk_start_plug(&state->plug);
+#ifdef CONFIG_BLOCK
+       state->plug.nowait = true;
+#endif
+       state->comp.nr = 0;
+       INIT_LIST_HEAD(&state->comp.list);
+       state->comp.ctx = ctx;
        state->free_reqs = 0;
        state->file = NULL;
        state->ios_left = max_ios;
@@ -5897,12 +6224,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        unsigned int sqe_flags;
        int id;
 
-       /*
-        * All io need record the previous position, if LINK vs DARIN,
-        * it can be used to mark the position of the first IO in the
-        * link list.
-        */
-       req->sequence = ctx->cached_sq_head - ctx->cached_sq_dropped;
        req->opcode = READ_ONCE(sqe->opcode);
        req->user_data = READ_ONCE(sqe->user_data);
        req->io = NULL;
@@ -5950,7 +6271,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
                          struct file *ring_file, int ring_fd)
 {
-       struct io_submit_state state, *statep = NULL;
+       struct io_submit_state state;
        struct io_kiocb *link = NULL;
        int i, submitted = 0;
 
@@ -5967,10 +6288,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
        if (!percpu_ref_tryget_many(&ctx->refs, nr))
                return -EAGAIN;
 
-       if (nr > IO_PLUG_THRESHOLD) {
-               io_submit_state_start(&state, nr);
-               statep = &state;
-       }
+       io_submit_state_start(&state, ctx, nr);
 
        ctx->ring_fd = ring_fd;
        ctx->ring_file = ring_file;
@@ -5985,28 +6303,28 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
                        io_consume_sqe(ctx);
                        break;
                }
-               req = io_alloc_req(ctx, statep);
+               req = io_alloc_req(ctx, &state);
                if (unlikely(!req)) {
                        if (!submitted)
                                submitted = -EAGAIN;
                        break;
                }
 
-               err = io_init_req(ctx, req, sqe, statep);
+               err = io_init_req(ctx, req, sqe, &state);
                io_consume_sqe(ctx);
                /* will complete beyond this point, count as submitted */
                submitted++;
 
                if (unlikely(err)) {
 fail_req:
-                       io_cqring_add_event(req, err);
-                       io_double_put_req(req);
+                       io_put_req(req);
+                       io_req_complete(req, err);
                        break;
                }
 
                trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
                                                true, io_async_submit(ctx));
-               err = io_submit_sqe(req, sqe, &link);
+               err = io_submit_sqe(req, sqe, &link, &state.comp);
                if (err)
                        goto fail_req;
        }
@@ -6017,9 +6335,8 @@ fail_req:
                percpu_ref_put_many(&ctx->refs, nr - ref_used);
        }
        if (link)
-               io_queue_link_head(link);
-       if (statep)
-               io_submit_state_end(&state);
+               io_queue_link_head(link, &state.comp);
+       io_submit_state_end(&state);
 
         /* Commit SQ ring head once we've consumed and submitted all SQEs */
        io_commit_sqring(ctx);
@@ -6027,6 +6344,21 @@ fail_req:
        return submitted;
 }
 
+static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
+{
+       /* Tell userspace we may need a wakeup call */
+       spin_lock_irq(&ctx->completion_lock);
+       ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
+       spin_unlock_irq(&ctx->completion_lock);
+}
+
+static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
+{
+       spin_lock_irq(&ctx->completion_lock);
+       ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+       spin_unlock_irq(&ctx->completion_lock);
+}
+
 static int io_sq_thread(void *data)
 {
        struct io_ring_ctx *ctx = data;
@@ -6043,12 +6375,12 @@ static int io_sq_thread(void *data)
        while (!kthread_should_park()) {
                unsigned int to_submit;
 
-               if (!list_empty(&ctx->poll_list)) {
+               if (!list_empty(&ctx->iopoll_list)) {
                        unsigned nr_events = 0;
 
                        mutex_lock(&ctx->uring_lock);
-                       if (!list_empty(&ctx->poll_list))
-                               io_iopoll_getevents(ctx, &nr_events, 0);
+                       if (!list_empty(&ctx->iopoll_list) && !need_resched())
+                               io_do_iopoll(ctx, &nr_events, 0);
                        else
                                timeout = jiffies + ctx->sq_thread_idle;
                        mutex_unlock(&ctx->uring_lock);
@@ -6067,7 +6399,7 @@ static int io_sq_thread(void *data)
                         * adding ourselves to the waitqueue, as the unuse/drop
                         * may sleep.
                         */
-                       io_sq_thread_drop_mm(ctx);
+                       io_sq_thread_drop_mm();
 
                        /*
                         * We're polling. If we're within the defined idle
@@ -6076,11 +6408,10 @@ static int io_sq_thread(void *data)
                         * more IO, we should wait for the application to
                         * reap events and wake us up.
                         */
-                       if (!list_empty(&ctx->poll_list) || need_resched() ||
+                       if (!list_empty(&ctx->iopoll_list) || need_resched() ||
                            (!time_after(jiffies, timeout) && ret != -EBUSY &&
                            !percpu_ref_is_dying(&ctx->refs))) {
-                               if (current->task_works)
-                                       task_work_run();
+                               io_run_task_work();
                                cond_resched();
                                continue;
                        }
@@ -6090,21 +6421,18 @@ static int io_sq_thread(void *data)
 
                        /*
                         * While doing polled IO, before going to sleep, we need
-                        * to check if there are new reqs added to poll_list, it
-                        * is because reqs may have been punted to io worker and
-                        * will be added to poll_list later, hence check the
-                        * poll_list again.
+                        * to check if there are new reqs added to iopoll_list,
+                        * it is because reqs may have been punted to io worker
+                        * and will be added to iopoll_list later, hence check
+                        * the iopoll_list again.
                         */
                        if ((ctx->flags & IORING_SETUP_IOPOLL) &&
-                           !list_empty_careful(&ctx->poll_list)) {
+                           !list_empty_careful(&ctx->iopoll_list)) {
                                finish_wait(&ctx->sqo_wait, &wait);
                                continue;
                        }
 
-                       /* Tell userspace we may need a wakeup call */
-                       spin_lock_irq(&ctx->completion_lock);
-                       ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
-                       spin_unlock_irq(&ctx->completion_lock);
+                       io_ring_set_wakeup_flag(ctx);
 
                        to_submit = io_sqring_entries(ctx);
                        if (!to_submit || ret == -EBUSY) {
@@ -6112,9 +6440,9 @@ static int io_sq_thread(void *data)
                                        finish_wait(&ctx->sqo_wait, &wait);
                                        break;
                                }
-                               if (current->task_works) {
-                                       task_work_run();
+                               if (io_run_task_work()) {
                                        finish_wait(&ctx->sqo_wait, &wait);
+                                       io_ring_clear_wakeup_flag(ctx);
                                        continue;
                                }
                                if (signal_pending(current))
@@ -6122,17 +6450,13 @@ static int io_sq_thread(void *data)
                                schedule();
                                finish_wait(&ctx->sqo_wait, &wait);
 
-                               spin_lock_irq(&ctx->completion_lock);
-                               ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
-                               spin_unlock_irq(&ctx->completion_lock);
+                               io_ring_clear_wakeup_flag(ctx);
                                ret = 0;
                                continue;
                        }
                        finish_wait(&ctx->sqo_wait, &wait);
 
-                       spin_lock_irq(&ctx->completion_lock);
-                       ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
-                       spin_unlock_irq(&ctx->completion_lock);
+                       io_ring_clear_wakeup_flag(ctx);
                }
 
                mutex_lock(&ctx->uring_lock);
@@ -6142,10 +6466,9 @@ static int io_sq_thread(void *data)
                timeout = jiffies + ctx->sq_thread_idle;
        }
 
-       if (current->task_works)
-               task_work_run();
+       io_run_task_work();
 
-       io_sq_thread_drop_mm(ctx);
+       io_sq_thread_drop_mm();
        revert_creds(old_cred);
 
        kthread_parkme();
@@ -6208,9 +6531,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        do {
                if (io_cqring_events(ctx, false) >= min_events)
                        return 0;
-               if (!current->task_works)
+               if (!io_run_task_work())
                        break;
-               task_work_run();
        } while (1);
 
        if (sig) {
@@ -6232,8 +6554,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
                                                TASK_INTERRUPTIBLE);
                /* make sure we run task_work before checking for signals */
-               if (current->task_works)
-                       task_work_run();
+               if (io_run_task_work())
+                       continue;
                if (signal_pending(current)) {
                        if (current->jobctl & JOBCTL_TASK_WORK) {
                                spin_lock_irq(&current->sighand->siglock);
@@ -7019,17 +7341,21 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
        return 0;
 err:
        io_finish_async(ctx);
-       mmdrop(ctx->sqo_mm);
-       ctx->sqo_mm = NULL;
+       if (ctx->sqo_mm) {
+               mmdrop(ctx->sqo_mm);
+               ctx->sqo_mm = NULL;
+       }
        return ret;
 }
 
-static void io_unaccount_mem(struct user_struct *user, unsigned long nr_pages)
+static inline void __io_unaccount_mem(struct user_struct *user,
+                                     unsigned long nr_pages)
 {
        atomic_long_sub(nr_pages, &user->locked_vm);
 }
 
-static int io_account_mem(struct user_struct *user, unsigned long nr_pages)
+static inline int __io_account_mem(struct user_struct *user,
+                                  unsigned long nr_pages)
 {
        unsigned long page_limit, cur_pages, new_pages;
 
@@ -7047,6 +7373,41 @@ static int io_account_mem(struct user_struct *user, unsigned long nr_pages)
        return 0;
 }
 
+static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
+                            enum io_mem_account acct)
+{
+       if (ctx->limit_mem)
+               __io_unaccount_mem(ctx->user, nr_pages);
+
+       if (ctx->sqo_mm) {
+               if (acct == ACCT_LOCKED)
+                       ctx->sqo_mm->locked_vm -= nr_pages;
+               else if (acct == ACCT_PINNED)
+                       atomic64_sub(nr_pages, &ctx->sqo_mm->pinned_vm);
+       }
+}
+
+static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
+                         enum io_mem_account acct)
+{
+       int ret;
+
+       if (ctx->limit_mem) {
+               ret = __io_account_mem(ctx->user, nr_pages);
+               if (ret)
+                       return ret;
+       }
+
+       if (ctx->sqo_mm) {
+               if (acct == ACCT_LOCKED)
+                       ctx->sqo_mm->locked_vm += nr_pages;
+               else if (acct == ACCT_PINNED)
+                       atomic64_add(nr_pages, &ctx->sqo_mm->pinned_vm);
+       }
+
+       return 0;
+}
+
 static void io_mem_free(void *ptr)
 {
        struct page *page;
@@ -7083,6 +7444,9 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
                return SIZE_MAX;
 #endif
 
+       if (sq_offset)
+               *sq_offset = off;
+
        sq_array_size = array_size(sizeof(u32), sq_entries);
        if (sq_array_size == SIZE_MAX)
                return SIZE_MAX;
@@ -7090,9 +7454,6 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
        if (check_add_overflow(off, sq_array_size, &off))
                return SIZE_MAX;
 
-       if (sq_offset)
-               *sq_offset = off;
-
        return off;
 }
 
@@ -7121,8 +7482,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
                for (j = 0; j < imu->nr_bvecs; j++)
                        unpin_user_page(imu->bvec[j].bv_page);
 
-               if (ctx->account_mem)
-                       io_unaccount_mem(ctx->user, imu->nr_bvecs);
+               io_unaccount_mem(ctx, imu->nr_bvecs, ACCT_PINNED);
                kvfree(imu->bvec);
                imu->nr_bvecs = 0;
        }
@@ -7205,11 +7565,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
                start = ubuf >> PAGE_SHIFT;
                nr_pages = end - start;
 
-               if (ctx->account_mem) {
-                       ret = io_account_mem(ctx->user, nr_pages);
-                       if (ret)
-                               goto err;
-               }
+               ret = io_account_mem(ctx, nr_pages, ACCT_PINNED);
+               if (ret)
+                       goto err;
 
                ret = 0;
                if (!pages || nr_pages > got_pages) {
@@ -7222,8 +7580,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
                                        GFP_KERNEL);
                        if (!pages || !vmas) {
                                ret = -ENOMEM;
-                               if (ctx->account_mem)
-                                       io_unaccount_mem(ctx->user, nr_pages);
+                               io_unaccount_mem(ctx, nr_pages, ACCT_PINNED);
                                goto err;
                        }
                        got_pages = nr_pages;
@@ -7233,8 +7590,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
                                                GFP_KERNEL);
                ret = -ENOMEM;
                if (!imu->bvec) {
-                       if (ctx->account_mem)
-                               io_unaccount_mem(ctx->user, nr_pages);
+                       io_unaccount_mem(ctx, nr_pages, ACCT_PINNED);
                        goto err;
                }
 
@@ -7265,8 +7621,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
                         */
                        if (pret > 0)
                                unpin_user_pages(pages, pret);
-                       if (ctx->account_mem)
-                               io_unaccount_mem(ctx->user, nr_pages);
+                       io_unaccount_mem(ctx, nr_pages, ACCT_PINNED);
                        kvfree(imu->bvec);
                        goto err;
                }
@@ -7350,11 +7705,12 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
 static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
        io_finish_async(ctx);
-       if (ctx->sqo_mm)
+       io_sqe_buffer_unregister(ctx);
+       if (ctx->sqo_mm) {
                mmdrop(ctx->sqo_mm);
+               ctx->sqo_mm = NULL;
+       }
 
-       io_iopoll_reap_events(ctx);
-       io_sqe_buffer_unregister(ctx);
        io_sqe_files_unregister(ctx);
        io_eventfd_unregister(ctx);
        io_destroy_buffers(ctx);
@@ -7418,11 +7774,8 @@ static int io_remove_personalities(int id, void *p, void *data)
 
 static void io_ring_exit_work(struct work_struct *work)
 {
-       struct io_ring_ctx *ctx;
-
-       ctx = container_of(work, struct io_ring_ctx, exit_work);
-       if (ctx->rings)
-               io_cqring_overflow_flush(ctx, true);
+       struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
+                                              exit_work);
 
        /*
         * If we're doing polled IO and end up having requests being
@@ -7430,11 +7783,11 @@ static void io_ring_exit_work(struct work_struct *work)
         * we're waiting for refs to drop. We need to reap these manually,
         * as nobody else will be looking for them.
         */
-       while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) {
-               io_iopoll_reap_events(ctx);
+       do {
                if (ctx->rings)
                        io_cqring_overflow_flush(ctx, true);
-       }
+               io_iopoll_try_reap_events(ctx);
+       } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
        io_ring_ctx_free(ctx);
 }
 
@@ -7450,10 +7803,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
        if (ctx->io_wq)
                io_wq_cancel_all(ctx->io_wq);
 
-       io_iopoll_reap_events(ctx);
        /* if we failed setting up the ctx, we might not have any rings */
        if (ctx->rings)
                io_cqring_overflow_flush(ctx, true);
+       io_iopoll_try_reap_events(ctx);
        idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
 
        /*
@@ -7461,9 +7814,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
         * is closed but resources aren't reaped yet. This can cause
         * spurious failure in setting up a new ring.
         */
-       if (ctx->account_mem)
-               io_unaccount_mem(ctx->user,
-                               ring_pages(ctx->sq_entries, ctx->cq_entries));
+       io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries),
+                        ACCT_LOCKED);
 
        INIT_WORK(&ctx->exit_work, io_ring_exit_work);
        queue_work(system_wq, &ctx->exit_work);
@@ -7519,17 +7871,14 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 
                if (cancel_req->flags & REQ_F_OVERFLOW) {
                        spin_lock_irq(&ctx->completion_lock);
-                       list_del(&cancel_req->list);
+                       list_del(&cancel_req->compl.list);
                        cancel_req->flags &= ~REQ_F_OVERFLOW;
-                       if (list_empty(&ctx->cq_overflow_list)) {
-                               clear_bit(0, &ctx->sq_check_overflow);
-                               clear_bit(0, &ctx->cq_check_overflow);
-                               ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
-                       }
-                       spin_unlock_irq(&ctx->completion_lock);
 
+                       io_cqring_mark_overflow(ctx);
                        WRITE_ONCE(ctx->rings->cq_overflow,
                                atomic_inc_return(&ctx->cached_cq_overflow));
+                       io_commit_cqring(ctx);
+                       spin_unlock_irq(&ctx->completion_lock);
 
                        /*
                         * Put inflight ref and overflow ref. If that's
@@ -7652,8 +8001,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
        int submitted = 0;
        struct fd f;
 
-       if (current->task_works)
-               task_work_run();
+       io_run_task_work();
 
        if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
                return -EINVAL;
@@ -7692,8 +8040,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                        goto out;
        }
        if (flags & IORING_ENTER_GETEVENTS) {
-               unsigned nr_events = 0;
-
                min_complete = min(min_complete, ctx->cq_entries);
 
                /*
@@ -7704,7 +8050,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                 */
                if (ctx->flags & IORING_SETUP_IOPOLL &&
                    !(ctx->flags & IORING_SETUP_SQPOLL)) {
-                       ret = io_iopoll_check(ctx, &nr_events, min_complete);
+                       ret = io_iopoll_check(ctx, min_complete);
                } else {
                        ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
                }
@@ -7909,7 +8255,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 {
        struct user_struct *user = NULL;
        struct io_ring_ctx *ctx;
-       bool account_mem;
+       bool limit_mem;
        int ret;
 
        if (!entries)
@@ -7948,10 +8294,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
        }
 
        user = get_uid(current_user());
-       account_mem = !capable(CAP_IPC_LOCK);
+       limit_mem = !capable(CAP_IPC_LOCK);
 
-       if (account_mem) {
-               ret = io_account_mem(user,
+       if (limit_mem) {
+               ret = __io_account_mem(user,
                                ring_pages(p->sq_entries, p->cq_entries));
                if (ret) {
                        free_uid(user);
@@ -7961,14 +8307,13 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 
        ctx = io_ring_ctx_alloc(p);
        if (!ctx) {
-               if (account_mem)
-                       io_unaccount_mem(user, ring_pages(p->sq_entries,
+               if (limit_mem)
+                       __io_unaccount_mem(user, ring_pages(p->sq_entries,
                                                                p->cq_entries));
                free_uid(user);
                return -ENOMEM;
        }
        ctx->compat = in_compat_syscall();
-       ctx->account_mem = account_mem;
        ctx->user = user;
        ctx->creds = get_current_cred();
 
@@ -8000,12 +8345,22 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 
        p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
                        IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
-                       IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+                       IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
+                       IORING_FEAT_POLL_32BITS;
 
        if (copy_to_user(params, p, sizeof(*p))) {
                ret = -EFAULT;
                goto err;
        }
+
+       /*
+        * Account memory _before_ installing the file descriptor. Once
+        * the descriptor is installed, it can get closed at any time.
+        */
+       io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries),
+                      ACCT_LOCKED);
+       ctx->limit_mem = limit_mem;
+
        /*
         * Install ring fd as the very last thing, so we don't risk someone
         * having closed it before we finish setup
@@ -8289,7 +8644,8 @@ static int __init io_uring_init(void)
        BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
        BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
        BUILD_BUG_SQE_ELEM(28, __u32,  fsync_flags);
-       BUILD_BUG_SQE_ELEM(28, __u16,  poll_events);
+       BUILD_BUG_SQE_ELEM(28, /* compat */ __u16,  poll_events);
+       BUILD_BUG_SQE_ELEM(28, __u32,  poll32_events);
        BUILD_BUG_SQE_ELEM(28, __u32,  sync_range_flags);
        BUILD_BUG_SQE_ELEM(28, __u32,  msg_flags);
        BUILD_BUG_SQE_ELEM(28, __u32,  timeout_flags);
index d634561..78f5c96 100644 (file)
@@ -612,9 +612,6 @@ static bool rootdir_empty(struct super_block *sb, unsigned long block)
 
 /*
  * Initialize the superblock and read the root inode.
- *
- * Note: a check_disk_change() has been done immediately prior
- * to this call, so we don't need to check again.
  */
 static int isofs_fill_super(struct super_block *s, void *data, int silent)
 {
index eb8b9e2..2935d4c 100644 (file)
@@ -36,6 +36,7 @@
 
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
+#include <linux/blkdev.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
index 66acea9..bde787c 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/quotaops.h>
+#include <linux/blkdev.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
index 7df0f9f..938fe32 100644 (file)
@@ -1282,6 +1282,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
                                if (!new_fl)
                                        goto out;
                                locks_copy_lock(new_fl, request);
+                               locks_move_blocks(new_fl, request);
                                request = new_fl;
                                new_fl = NULL;
                                locks_insert_lock_ctx(request, &fl->fl_list);
index 3c48114..a87d439 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 
 #include "dir.h"
 #include "aops.h"
index 37d3869..837971e 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/blkdev.h>
 
 static int devinfo_show(struct seq_file *f, void *v)
 {
index 7b4bac9..bb02989 100644 (file)
@@ -78,6 +78,7 @@
 #include <linux/namei.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
+#include <linux/blkdev.h>
 #include "../internal.h" /* ugh */
 
 #include <linux/uaccess.h>
index ff33651..155b828 100644 (file)
@@ -15,6 +15,7 @@
 #include "reiserfs.h"
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <linux/blkdev.h>
 
 /*
  * LOCKING:
index 52de290..6e264dd 100644 (file)
@@ -339,7 +339,6 @@ out:
        return ret;
 }
 
-/* Should pair with userfaultfd_signal_pending() */
 static inline long userfaultfd_get_blocking_state(unsigned int flags)
 {
        if (flags & FAULT_FLAG_INTERRUPTIBLE)
@@ -351,18 +350,6 @@ static inline long userfaultfd_get_blocking_state(unsigned int flags)
        return TASK_UNINTERRUPTIBLE;
 }
 
-/* Should pair with userfaultfd_get_blocking_state() */
-static inline bool userfaultfd_signal_pending(unsigned int flags)
-{
-       if (flags & FAULT_FLAG_INTERRUPTIBLE)
-               return signal_pending(current);
-
-       if (flags & FAULT_FLAG_KILLABLE)
-               return fatal_signal_pending(current);
-
-       return false;
-}
-
 /*
  * The locking rules involved in returning VM_FAULT_RETRY depending on
  * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
@@ -516,33 +503,9 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
                                                       vmf->flags, reason);
        mmap_read_unlock(mm);
 
-       if (likely(must_wait && !READ_ONCE(ctx->released) &&
-                  !userfaultfd_signal_pending(vmf->flags))) {
+       if (likely(must_wait && !READ_ONCE(ctx->released))) {
                wake_up_poll(&ctx->fd_wqh, EPOLLIN);
                schedule();
-               ret |= VM_FAULT_MAJOR;
-
-               /*
-                * False wakeups can orginate even from rwsem before
-                * up_read() however userfaults will wait either for a
-                * targeted wakeup on the specific uwq waitqueue from
-                * wake_userfault() or for signals or for uffd
-                * release.
-                */
-               while (!READ_ONCE(uwq.waken)) {
-                       /*
-                        * This needs the full smp_store_mb()
-                        * guarantee as the state write must be
-                        * visible to other CPUs before reading
-                        * uwq.waken from other CPUs.
-                        */
-                       set_current_state(blocking_state);
-                       if (READ_ONCE(uwq.waken) ||
-                           READ_ONCE(ctx->released) ||
-                           userfaultfd_signal_pending(vmf->flags))
-                               break;
-                       schedule();
-               }
        }
 
        __set_current_state(TASK_RUNNING);
index d007db0..bfe0280 100644 (file)
@@ -221,11 +221,20 @@ out:
 void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
 {
        /*
-        * Multiple processes may race to set ->i_verity_info, so use cmpxchg.
-        * This pairs with the READ_ONCE() in fsverity_get_info().
+        * Multiple tasks may race to set ->i_verity_info, so use
+        * cmpxchg_release().  This pairs with the smp_load_acquire() in
+        * fsverity_get_info().  I.e., here we publish ->i_verity_info with a
+        * RELEASE barrier so that other tasks can ACQUIRE it.
         */
-       if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL)
+       if (cmpxchg_release(&inode->i_verity_info, NULL, vi) != NULL) {
+               /* Lost the race, so free the fsverity_info we allocated. */
                fsverity_free_info(vi);
+               /*
+                * Afterwards, the caller may access ->i_verity_info directly,
+                * so make sure to ACQUIRE the winning fsverity_info.
+                */
+               (void)fsverity_get_info(inode);
+       }
 }
 
 void fsverity_free_info(struct fsverity_info *vi)
index 00db81e..fdbff48 100644 (file)
@@ -1080,7 +1080,7 @@ xfs_file_open(
                return -EFBIG;
        if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
                return -EIO;
-       file->f_mode |= FMODE_NOWAIT;
+       file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
        return 0;
 }
 
index 4bcc3e6..b03333f 100644 (file)
@@ -132,5 +132,5 @@ xfs_pwork_guess_datadev_parallelism(
         * For now we'll go with the most conservative setting possible,
         * which is two threads for an SSD and 1 thread everywhere else.
         */
-       return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
+       return blk_queue_nonrot(btp->bt_bdev->bd_disk->queue) ? 2 : 1;
 }
index 5afb6ce..a3abcc4 100644 (file)
@@ -588,8 +588,13 @@ bool acpi_dma_supported(struct acpi_device *adev);
 enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
 int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
                       u64 *size);
-int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr);
-
+int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
+                          const u32 *input_id);
+static inline int acpi_dma_configure(struct device *dev,
+                                    enum dev_dma_attr attr)
+{
+       return acpi_dma_configure_id(dev, attr, NULL);
+}
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
                                           u64 address, bool check_children);
 int acpi_is_root_bridge(acpi_handle);
index b0b163b..bdcac69 100644 (file)
@@ -415,6 +415,13 @@ struct acpi_table_tpm2 {
        /* Platform-specific data follows */
 };
 
+/* Optional trailer for revision 4 holding platform-specific data */
+struct acpi_tpm2_phy {
+       u8  start_method_specific[12];
+       u32 log_area_minimum_length;
+       u64 log_area_start_address;
+};
+
 /* Values for start_method above */
 
 #define ACPI_TPM2_NOT_ALLOWED                       0
index 44ec80e..74b0612 100644 (file)
@@ -45,6 +45,7 @@ mandatory-y += pci.h
 mandatory-y += percpu.h
 mandatory-y += pgalloc.h
 mandatory-y += preempt.h
+mandatory-y += rwonce.h
 mandatory-y += sections.h
 mandatory-y += serial.h
 mandatory-y += shmparam.h
index 286867f..11f96f4 100644 (file)
@@ -159,8 +159,6 @@ ATOMIC_OP(xor, ^)
  * resource counting etc..
  */
 
-#define ATOMIC_INIT(i) { (i) }
-
 /**
  * atomic_read - read atomic variable
  * @v: pointer of type atomic_t
index 2eacaf7..fec97dc 100644 (file)
@@ -13,7 +13,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/compiler.h>
+#include <asm/rwonce.h>
 
 #ifndef nop
 #define nop()  asm volatile ("nop")
 #define dma_wmb()      wmb()
 #endif
 
-#ifndef read_barrier_depends
-#define read_barrier_depends()         do { } while (0)
-#endif
-
 #ifndef __smp_mb
 #define __smp_mb()     mb()
 #endif
 #define __smp_wmb()    wmb()
 #endif
 
-#ifndef __smp_read_barrier_depends
-#define __smp_read_barrier_depends()   read_barrier_depends()
-#endif
-
 #ifdef CONFIG_SMP
 
 #ifndef smp_mb
 #define smp_wmb()      __smp_wmb()
 #endif
 
-#ifndef smp_read_barrier_depends
-#define smp_read_barrier_depends()     __smp_read_barrier_depends()
-#endif
-
 #else  /* !CONFIG_SMP */
 
 #ifndef smp_mb
 #define smp_wmb()      barrier()
 #endif
 
-#ifndef smp_read_barrier_depends
-#define smp_read_barrier_depends()     do { } while (0)
-#endif
-
 #endif /* CONFIG_SMP */
 
 #ifndef __smp_store_mb
@@ -196,7 +180,6 @@ do {                                                                        \
 #define virt_mb() __smp_mb()
 #define virt_rmb() __smp_rmb()
 #define virt_wmb() __smp_wmb()
-#define virt_read_barrier_depends() __smp_read_barrier_depends()
 #define virt_store_mb(var, value) __smp_store_mb(var, value)
 #define virt_mb__before_atomic() __smp_mb__before_atomic()
 #define virt_mb__after_atomic()        __smp_mb__after_atomic()
index c94e33a..18b0f4e 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_GENERIC_BUG_H
 
 #include <linux/compiler.h>
+#include <linux/instrumentation.h>
 
 #define CUT_HERE               "------------[ cut here ]------------\n"
 
index 8b1e020..30a3aab 100644 (file)
@@ -456,7 +456,7 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
 
 #if !defined(inb) && !defined(_inb)
 #define _inb _inb
-static inline u16 _inb(unsigned long addr)
+static inline u8 _inb(unsigned long addr)
 {
        u8 val;
 
@@ -482,7 +482,7 @@ static inline u16 _inw(unsigned long addr)
 
 #if !defined(inl) && !defined(_inl)
 #define _inl _inl
-static inline u16 _inl(unsigned long addr)
+static inline u32 _inl(unsigned long addr)
 {
        u32 val;
 
index fde943d..2b26cd7 100644 (file)
@@ -11,6 +11,7 @@
 #define __ASM_GENERIC_QSPINLOCK_H
 
 #include <asm-generic/qspinlock_types.h>
+#include <linux/atomic.h>
 
 /**
  * queued_spin_is_locked - is the spinlock locked?
index 56d1309..2fd1fb8 100644 (file)
@@ -9,15 +9,7 @@
 #ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
 #define __ASM_GENERIC_QSPINLOCK_TYPES_H
 
-/*
- * Including atomic.h with PARAVIRT on will cause compilation errors because
- * of recursive header file incluson via paravirt_types.h. So don't include
- * it if PARAVIRT is on.
- */
-#ifndef CONFIG_PARAVIRT
 #include <linux/types.h>
-#include <linux/atomic.h>
-#endif
 
 typedef struct qspinlock {
        union {
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
new file mode 100644 (file)
index 0000000..8d0a628
--- /dev/null
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
+ *
+ * These two macros will also work on aggregate data types like structs or
+ * unions.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+#ifndef __ASM_GENERIC_RWONCE_H
+#define __ASM_GENERIC_RWONCE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler_types.h>
+#include <linux/kasan-checks.h>
+#include <linux/kcsan-checks.h>
+
+/*
+ * Yes, this permits 64-bit accesses on 32-bit architectures. These will
+ * actually be atomic in some cases (namely Armv7 + LPAE), but for others we
+ * rely on the access being split into 2x32-bit accesses for a 32-bit quantity
+ * (e.g. a virtual address) and a strong prevailing wind.
+ */
+#define compiletime_assert_rwonce_type(t)                                      \
+       compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
+               "Unsupported access size for {READ,WRITE}_ONCE().")
+
+/*
+ * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
+ * atomicity. Note that this may result in tears!
+ */
+#ifndef __READ_ONCE
+#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
+#endif
+
+#define READ_ONCE(x)                                                   \
+({                                                                     \
+       compiletime_assert_rwonce_type(x);                              \
+       __READ_ONCE(x);                                                 \
+})
+
+#define __WRITE_ONCE(x, val)                                           \
+do {                                                                   \
+       *(volatile typeof(x) *)&(x) = (val);                            \
+} while (0)
+
+#define WRITE_ONCE(x, val)                                             \
+do {                                                                   \
+       compiletime_assert_rwonce_type(x);                              \
+       __WRITE_ONCE(x, val);                                           \
+} while (0)
+
+static __no_sanitize_or_inline
+unsigned long __read_once_word_nocheck(const void *addr)
+{
+       return __READ_ONCE(*(unsigned long *)addr);
+}
+
+/*
+ * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
+ * word from memory atomically but without telling KASAN/KCSAN. This is
+ * usually used by unwinding code when walking the stack of a running process.
+ */
+#define READ_ONCE_NOCHECK(x)                                           \
+({                                                                     \
+       compiletime_assert(sizeof(x) == sizeof(unsigned long),          \
+               "Unsupported access size for READ_ONCE_NOCHECK().");    \
+       (typeof(x))__read_once_word_nocheck(&(x));                      \
+})
+
+static __no_kasan_or_inline
+unsigned long read_word_at_a_time(const void *addr)
+{
+       kasan_check_read(addr, 1);
+       return *(unsigned long *)addr;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_GENERIC_RWONCE_H */
index 3f1649a..ef75ec8 100644 (file)
@@ -512,6 +512,38 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 }
 #endif
 
+/*
+ * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
+ * and set corresponding cleared_*.
+ */
+static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
+                                    unsigned long address, unsigned long size)
+{
+       __tlb_adjust_range(tlb, address, size);
+       tlb->cleared_ptes = 1;
+}
+
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+                                    unsigned long address, unsigned long size)
+{
+       __tlb_adjust_range(tlb, address, size);
+       tlb->cleared_pmds = 1;
+}
+
+static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
+                                    unsigned long address, unsigned long size)
+{
+       __tlb_adjust_range(tlb, address, size);
+       tlb->cleared_puds = 1;
+}
+
+static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
+                                    unsigned long address, unsigned long size)
+{
+       __tlb_adjust_range(tlb, address, size);
+       tlb->cleared_p4ds = 1;
+}
+
 #ifndef __tlb_remove_tlb_entry
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 #endif
@@ -525,19 +557,17 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
  */
 #define tlb_remove_tlb_entry(tlb, ptep, address)               \
        do {                                                    \
-               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
-               tlb->cleared_ptes = 1;                          \
+               tlb_flush_pte_range(tlb, address, PAGE_SIZE);   \
                __tlb_remove_tlb_entry(tlb, ptep, address);     \
        } while (0)
 
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)       \
        do {                                                    \
                unsigned long _sz = huge_page_size(h);          \
-               __tlb_adjust_range(tlb, address, _sz);          \
                if (_sz == PMD_SIZE)                            \
-                       tlb->cleared_pmds = 1;                  \
+                       tlb_flush_pmd_range(tlb, address, _sz); \
                else if (_sz == PUD_SIZE)                       \
-                       tlb->cleared_puds = 1;                  \
+                       tlb_flush_pud_range(tlb, address, _sz); \
                __tlb_remove_tlb_entry(tlb, ptep, address);     \
        } while (0)
 
@@ -551,8 +581,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 
 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)                   \
        do {                                                            \
-               __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);       \
-               tlb->cleared_pmds = 1;                                  \
+               tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE);      \
                __tlb_remove_pmd_tlb_entry(tlb, pmdp, address);         \
        } while (0)
 
@@ -566,8 +595,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 
 #define tlb_remove_pud_tlb_entry(tlb, pudp, address)                   \
        do {                                                            \
-               __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);       \
-               tlb->cleared_puds = 1;                                  \
+               tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE);      \
                __tlb_remove_pud_tlb_entry(tlb, pudp, address);         \
        } while (0)
 
@@ -592,9 +620,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 #ifndef pte_free_tlb
 #define pte_free_tlb(tlb, ptep, address)                       \
        do {                                                    \
-               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+               tlb_flush_pmd_range(tlb, address, PAGE_SIZE);   \
                tlb->freed_tables = 1;                          \
-               tlb->cleared_pmds = 1;                          \
                __pte_free_tlb(tlb, ptep, address);             \
        } while (0)
 #endif
@@ -602,9 +629,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 #ifndef pmd_free_tlb
 #define pmd_free_tlb(tlb, pmdp, address)                       \
        do {                                                    \
-               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+               tlb_flush_pud_range(tlb, address, PAGE_SIZE);   \
                tlb->freed_tables = 1;                          \
-               tlb->cleared_puds = 1;                          \
                __pmd_free_tlb(tlb, pmdp, address);             \
        } while (0)
 #endif
@@ -612,9 +638,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 #ifndef pud_free_tlb
 #define pud_free_tlb(tlb, pudp, address)                       \
        do {                                                    \
-               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+               tlb_flush_p4d_range(tlb, address, PAGE_SIZE);   \
                tlb->freed_tables = 1;                          \
-               tlb->cleared_p4ds = 1;                          \
                __pud_free_tlb(tlb, pudp, address);             \
        } while (0)
 #endif
index 052e0f0..de8493c 100644 (file)
 #endif
 
 /*
- * Align to a 32 byte boundary equal to the
- * alignment gcc 4.5 uses for a struct
+ * GCC 4.5 and later have a 32 bytes section alignment for structures.
+ * Except GCC 4.9, that feels the need to align on 64 bytes.
  */
+#if __GNUC__ == 4 && __GNUC_MINOR__ == 9
+#define STRUCT_ALIGNMENT 64
+#else
 #define STRUCT_ALIGNMENT 32
+#endif
 #define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT)
 
+/*
+ * The order of the sched class addresses are important, as they are
+ * used to determine the order of the priority of each sched class in
+ * relation to each other.
+ */
+#define SCHED_DATA                             \
+       STRUCT_ALIGN();                         \
+       __begin_sched_classes = .;              \
+       *(__idle_sched_class)                   \
+       *(__fair_sched_class)                   \
+       *(__rt_sched_class)                     \
+       *(__dl_sched_class)                     \
+       *(__stop_sched_class)                   \
+       __end_sched_classes = .;
+
 /* The actual configuration determine if the init/exit sections
  * are handled as text/data or they can be discarded (which
  * often happens at runtime)
        .rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {           \
                __start_rodata = .;                                     \
                *(.rodata) *(.rodata.*)                                 \
+               SCHED_DATA                                              \
                RO_AFTER_INIT_DATA      /* Read only after init */      \
                . = ALIGN(8);                                           \
                __start___tracepoints_ptrs = .;                         \
index 2b4d2b0..fcde59c 100644 (file)
@@ -106,6 +106,24 @@ struct acomp_alg {
  */
 struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
                                        u32 mask);
+/**
+ * crypto_alloc_acomp_node() -- allocate ACOMPRESS tfm handle with desired NUMA node
+ * @alg_name:  is the cra_name / name or cra_driver_name / driver name of the
+ *             compression algorithm e.g. "deflate"
+ * @type:      specifies the type of the algorithm
+ * @mask:      specifies the mask for the algorithm
+ * @node:      specifies the NUMA node the ZIP hardware belongs to
+ *
+ * Allocate a handle for a compression algorithm. Drivers should try to use
+ * (de)compressors on the specified NUMA node.
+ * The returned struct crypto_acomp is the handle that is required for any
+ * subsequent API invocation for the compression operations.
+ *
+ * Return:     allocated handle in case of success; IS_ERR() is true in case
+ *             of an error, PTR_ERR() returns the error code.
+ */
+struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
+                                       u32 mask, int node);
 
 static inline struct crypto_tfm *crypto_acomp_tfm(struct crypto_acomp *tfm)
 {
index 00a9cf9..143d884 100644 (file)
@@ -116,7 +116,7 @@ struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
 void *crypto_spawn_tfm2(struct crypto_spawn *spawn);
 
 struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb);
-int crypto_check_attr_type(struct rtattr **tb, u32 type);
+int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret);
 const char *crypto_attr_alg_name(struct rtattr *rta);
 int crypto_attr_u32(struct rtattr *rta, u32 *num);
 int crypto_inst_setname(struct crypto_instance *inst, const char *name,
@@ -235,18 +235,29 @@ static inline struct crypto_async_request *crypto_get_backlog(
               container_of(queue->backlog, struct crypto_async_request, list);
 }
 
-static inline int crypto_requires_off(u32 type, u32 mask, u32 off)
+static inline u32 crypto_requires_off(struct crypto_attr_type *algt, u32 off)
 {
-       return (type ^ off) & mask & off;
+       return (algt->type ^ off) & algt->mask & off;
 }
 
 /*
- * Returns CRYPTO_ALG_ASYNC if type/mask requires the use of sync algorithms.
- * Otherwise returns zero.
+ * When an algorithm uses another algorithm (e.g., if it's an instance of a
+ * template), these are the flags that should always be set on the "outer"
+ * algorithm if any "inner" algorithm has them set.
  */
-static inline int crypto_requires_sync(u32 type, u32 mask)
+#define CRYPTO_ALG_INHERITED_FLAGS     \
+       (CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK |  \
+        CRYPTO_ALG_ALLOCATES_MEMORY)
+
+/*
+ * Given the type and mask that specify the flags restrictions on a template
+ * instance being created, return the mask that should be passed to
+ * crypto_grab_*() (along with type=0) to honor any request the user made to
+ * have any of the CRYPTO_ALG_INHERITED_FLAGS clear.
+ */
+static inline u32 crypto_algt_inherited_mask(struct crypto_attr_type *algt)
 {
-       return crypto_requires_off(type, mask, CRYPTO_ALG_ASYNC);
+       return crypto_requires_off(algt, CRYPTO_ALG_INHERITED_FLAGS);
 }
 
 noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
index 2676f4f..3a1c72f 100644 (file)
 #define CHACHA_BLOCK_SIZE      64
 #define CHACHAPOLY_IV_SIZE     12
 
-#ifdef CONFIG_X86_64
-#define CHACHA_STATE_WORDS     ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
-#else
 #define CHACHA_STATE_WORDS     (CHACHA_BLOCK_SIZE / sizeof(u32))
-#endif
 
 /* 192-bit nonce, then 64-bit stream position */
 #define XCHACHA_IV_SIZE                32
index 234ee28..d2ac3ff 100644 (file)
@@ -45,4 +45,6 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len
                                         const u64 nonce,
                                         const u8 key[CHACHA20POLY1305_KEY_SIZE]);
 
+bool chacha20poly1305_selftest(void);
+
 #endif /* __CHACHA20POLY1305_H */
index 4829d23..19ce91f 100644 (file)
@@ -687,7 +687,7 @@ static inline void ahash_request_set_crypt(struct ahash_request *req,
  * The message digest API is able to maintain state information for the
  * caller.
  *
- * The synchronous message digest API can store user-related context in in its
+ * The synchronous message digest API can store user-related context in its
  * shash_desc request data structure.
  */
 
index 088c1de..ee64123 100644 (file)
@@ -135,6 +135,7 @@ struct af_alg_async_req {
  *                     SG?
  * @enc:               Cryptographic operation to be performed when
  *                     recvmsg is invoked.
+ * @init:              True if metadata has been sent.
  * @len:               Length of memory allocated for this data structure.
  */
 struct af_alg_ctx {
@@ -151,6 +152,7 @@ struct af_alg_ctx {
        bool more;
        bool merge;
        bool enc;
+       bool init;
 
        unsigned int len;
 };
@@ -226,7 +228,7 @@ unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset);
 void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
                      size_t dst_offset);
 void af_alg_wmem_wakeup(struct sock *sk);
-int af_alg_wait_for_data(struct sock *sk, unsigned flags);
+int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min);
 int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
                   unsigned int ivsize);
 ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
index 229d376..7fd7126 100644 (file)
@@ -20,7 +20,7 @@ struct aead_geniv_ctx {
 };
 
 struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
-                                      struct rtattr **tb, u32 type, u32 mask);
+                                      struct rtattr **tb);
 int aead_init_geniv(struct crypto_aead *tfm);
 void aead_exit_geniv(struct crypto_aead *tfm);
 
index 10753ff..4ff3da8 100644 (file)
@@ -147,6 +147,7 @@ static inline void sha256_init(struct sha256_state *sctx)
 }
 void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len);
 void sha256_final(struct sha256_state *sctx, u8 *out);
+void sha256(const u8 *data, unsigned int len, u8 *out);
 
 static inline void sha224_init(struct sha256_state *sctx)
 {
index 141e769..5663f71 100644 (file)
@@ -18,7 +18,7 @@
  *     @iv: Initialisation Vector
  *     @src: Source SG list
  *     @dst: Destination SG list
- *     @base: Underlying async request request
+ *     @base: Underlying async request
  *     @__ctx: Start of private context data
  */
 struct skcipher_request {
index 6c3ef49..e73dea5 100644 (file)
@@ -865,6 +865,18 @@ struct drm_mode_config {
         */
        bool prefer_shadow_fbdev;
 
+       /**
+        * @fbdev_use_iomem:
+        *
+        * Set to true if framebuffer reside in iomem.
+        * When set to true memcpy_toio() is used when copying the framebuffer in
+        * drm_fb_helper.drm_fb_helper_dirty_blit_real().
+        *
+        * FIXME: This should be replaced with a per-mapping is_iomem
+        * flag (like ttm does), and then used everywhere in fbdev code.
+        */
+       bool fbdev_use_iomem;
+
        /**
         * @quirk_addfb_prefer_xbgr_30bpp:
         *
index d661cd0..6d2c474 100644 (file)
@@ -905,6 +905,13 @@ static inline int acpi_dma_configure(struct device *dev,
        return 0;
 }
 
+static inline int acpi_dma_configure_id(struct device *dev,
+                                       enum dev_dma_attr attr,
+                                       const u32 *input_id)
+{
+       return 0;
+}
+
 #define ACPI_PTR(_ptr) (NULL)
 
 static inline void acpi_device_set_enumerated(struct acpi_device *adev)
index 8e7e2ec..20a3212 100644 (file)
@@ -28,27 +28,29 @@ void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
 void acpi_iort_init(void);
-u32 iort_msi_map_rid(struct device *dev, u32 req_id);
-struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
+u32 iort_msi_map_id(struct device *dev, u32 id);
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
+                                         enum irq_domain_bus_token bus_token);
 void acpi_configure_pmsi_domain(struct device *dev);
 int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 /* IOMMU interface */
 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
-const struct iommu_ops *iort_iommu_configure(struct device *dev);
+const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
+                                               const u32 *id_in);
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
 #else
 static inline void acpi_iort_init(void) { }
-static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
-{ return req_id; }
-static inline struct irq_domain *iort_get_device_domain(struct device *dev,
-                                                       u32 req_id)
+static inline u32 iort_msi_map_id(struct device *dev, u32 id)
+{ return id; }
+static inline struct irq_domain *iort_get_device_domain(
+       struct device *dev, u32 id, enum irq_domain_bus_token bus_token)
 { return NULL; }
 static inline void acpi_configure_pmsi_domain(struct device *dev) { }
 /* IOMMU interface */
 static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
                                  u64 *size) { }
-static inline const struct iommu_ops *iort_iommu_configure(
-                                     struct device *dev)
+static inline const struct iommu_ops *iort_iommu_configure_id(
+                                     struct device *dev, const u32 *id_in)
 { return NULL; }
 static inline
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
index 0566cb3..69b1dab 100644 (file)
@@ -39,8 +39,8 @@ static inline unsigned long topology_get_thermal_pressure(int cpu)
        return per_cpu(thermal_pressure, cpu);
 }
 
-void arch_set_thermal_pressure(struct cpumask *cpus,
-                              unsigned long th_pressure);
+void topology_set_thermal_pressure(const struct cpumask *cpus,
+                                  unsigned long th_pressure);
 
 struct cpu_topology {
        int thread_id;
index 56d6a5c..efcbde7 100644 (file)
                           ARM_SMCCC_SMC_32,                            \
                           0, 0x7fff)
 
+/* Paravirtualised time calls (defined by ARM DEN0057A) */
+#define ARM_SMCCC_HV_PV_TIME_FEATURES                          \
+       ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,                 \
+                          ARM_SMCCC_SMC_64,                    \
+                          ARM_SMCCC_OWNER_STANDARD_HYP,        \
+                          0x20)
+
+#define ARM_SMCCC_HV_PV_TIME_ST                                        \
+       ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,                 \
+                          ARM_SMCCC_SMC_64,                    \
+                          ARM_SMCCC_OWNER_STANDARD_HYP,        \
+                          0x21)
+
+/*
+ * Return codes defined in ARM DEN 0070A
+ * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C
+ */
+#define SMCCC_RET_SUCCESS                      0
+#define SMCCC_RET_NOT_SUPPORTED                        -1
+#define SMCCC_RET_NOT_REQUIRED                 -2
+#define SMCCC_RET_INVALID_PARAMETER            -3
+
 #ifndef __ASSEMBLY__
 
 #include <linux/linkage.h>
@@ -331,15 +353,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
  */
 #define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
 
-/*
- * Return codes defined in ARM DEN 0070A
- * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C
- */
-#define SMCCC_RET_SUCCESS                      0
-#define SMCCC_RET_NOT_SUPPORTED                        -1
-#define SMCCC_RET_NOT_REQUIRED                 -2
-#define SMCCC_RET_INVALID_PARAMETER            -3
-
 /*
  * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED.
  * Used when the SMCCC conduit is not defined. The empty asm statement
@@ -385,18 +398,5 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
                method;                                                 \
        })
 
-/* Paravirtualised time calls (defined by ARM DEN0057A) */
-#define ARM_SMCCC_HV_PV_TIME_FEATURES                          \
-       ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,                 \
-                          ARM_SMCCC_SMC_64,                    \
-                          ARM_SMCCC_OWNER_STANDARD_HYP,        \
-                          0x20)
-
-#define ARM_SMCCC_HV_PV_TIME_ST                                        \
-       ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,                 \
-                          ARM_SMCCC_SMC_64,                    \
-                          ARM_SMCCC_OWNER_STANDARD_HYP,        \
-                          0x21)
-
 #endif /*__ASSEMBLY__*/
 #endif /*__LINUX_ARM_SMCCC_H*/
index 90a7e84..fff9367 100644 (file)
@@ -33,8 +33,6 @@ enum wb_congested_state {
        WB_sync_congested,      /* The sync queue is getting full */
 };
 
-typedef int (congested_fn)(void *, int);
-
 enum wb_stat_item {
        WB_RECLAIMABLE,
        WB_WRITEBACK,
@@ -87,26 +85,6 @@ struct wb_completion {
 #define DEFINE_WB_COMPLETION(cmpl, bdi)        \
        struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
 
-/*
- * For cgroup writeback, multiple wb's may map to the same blkcg.  Those
- * wb's can operate mostly independently but should share the congested
- * state.  To facilitate such sharing, the congested state is tracked using
- * the following struct which is created on demand, indexed by blkcg ID on
- * its bdi, and refcounted.
- */
-struct bdi_writeback_congested {
-       unsigned long state;            /* WB_[a]sync_congested flags */
-       refcount_t refcnt;              /* nr of attached wb's and blkg */
-
-#ifdef CONFIG_CGROUP_WRITEBACK
-       struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
-                                        * on bdi unregistration. For memcg-wb
-                                        * internal use only! */
-       int blkcg_id;                   /* ID of the associated blkcg */
-       struct rb_node rb_node;         /* on bdi->cgwb_congestion_tree */
-#endif
-};
-
 /*
  * Each wb (bdi_writeback) can perform writeback operations, is measured
  * and throttled, independently.  Without cgroup writeback, each bdi
@@ -140,7 +118,7 @@ struct bdi_writeback {
 
        struct percpu_counter stat[NR_WB_STAT_ITEMS];
 
-       struct bdi_writeback_congested *congested;
+       unsigned long congested;        /* WB_[a]sync_congested flags */
 
        unsigned long bw_time_stamp;    /* last time write bw is updated */
        unsigned long dirtied_stamp;
@@ -190,8 +168,6 @@ struct backing_dev_info {
        struct list_head bdi_list;
        unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
        unsigned long io_pages; /* max allowed IO size */
-       congested_fn *congested_fn; /* Function pointer if device is md/dm */
-       void *congested_data;   /* Pointer to aux data for congested func */
 
        struct kref refcnt;     /* Reference counter for the structure */
        unsigned int capabilities; /* Device capabilities */
@@ -208,11 +184,8 @@ struct backing_dev_info {
        struct list_head wb_list; /* list of all wbs */
 #ifdef CONFIG_CGROUP_WRITEBACK
        struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
-       struct rb_root cgwb_congested_tree; /* their congested states */
        struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
        struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
-#else
-       struct bdi_writeback_congested *wb_congested;
 #endif
        wait_queue_head_t wb_waitq;
 
@@ -232,18 +205,8 @@ enum {
        BLK_RW_SYNC     = 1,
 };
 
-void clear_wb_congested(struct bdi_writeback_congested *congested, int sync);
-void set_wb_congested(struct bdi_writeback_congested *congested, int sync);
-
-static inline void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
-{
-       clear_wb_congested(bdi->wb.congested, sync);
-}
-
-static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
-{
-       set_wb_congested(bdi->wb.congested, sync);
-}
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
+void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 
 struct wb_lock_cookie {
        bool locked;
index 6b3504b..0b06b2d 100644 (file)
@@ -169,11 +169,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 
 static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 {
-       struct backing_dev_info *bdi = wb->bdi;
-
-       if (bdi->congested_fn)
-               return bdi->congested_fn(bdi->congested_data, cong_bits);
-       return wb->congested->state & cong_bits;
+       return wb->congested & cong_bits;
 }
 
 long congestion_wait(int sync, long timeout);
@@ -224,9 +220,6 @@ static inline int bdi_sched_wait(void *word)
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 
-struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
-void wb_congested_put(struct bdi_writeback_congested *congested);
 struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css);
 struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
@@ -404,19 +397,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
        return false;
 }
 
-static inline struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
-{
-       refcount_inc(&bdi->wb_congested->refcnt);
-       return bdi->wb_congested;
-}
-
-static inline void wb_congested_put(struct bdi_writeback_congested *congested)
-{
-       if (refcount_dec_and_test(&congested->refcnt))
-               kfree(congested);
-}
-
 static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 {
        return &bdi->wb;
index 91676d4..c6d7653 100644 (file)
@@ -8,8 +8,6 @@
 #include <linux/highmem.h>
 #include <linux/mempool.h>
 #include <linux/ioprio.h>
-
-#ifdef CONFIG_BLOCK
 /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
 #include <linux/blk_types.h>
 
@@ -491,21 +489,12 @@ do {                                              \
 #define bio_dev(bio) \
        disk_devt((bio)->bi_disk)
 
-#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-void bio_associate_blkg_from_page(struct bio *bio, struct page *page);
-#else
-static inline void bio_associate_blkg_from_page(struct bio *bio,
-                                               struct page *page) { }
-#endif
-
 #ifdef CONFIG_BLK_CGROUP
-void bio_disassociate_blkg(struct bio *bio);
 void bio_associate_blkg(struct bio *bio);
 void bio_associate_blkg_from_css(struct bio *bio,
                                 struct cgroup_subsys_state *css);
 void bio_clone_blkg_association(struct bio *dst, struct bio *src);
 #else  /* CONFIG_BLK_CGROUP */
-static inline void bio_disassociate_blkg(struct bio *bio) { }
 static inline void bio_associate_blkg(struct bio *bio) { }
 static inline void bio_associate_blkg_from_css(struct bio *bio,
                                               struct cgroup_subsys_state *css)
@@ -824,5 +813,4 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
                bio->bi_opf |= REQ_NOWAIT;
 }
 
-#endif /* CONFIG_BLOCK */
 #endif /* __LINUX_BIO_H */
index a57ebe2..c8fc979 100644 (file)
@@ -109,12 +109,6 @@ struct blkcg_gq {
        struct hlist_node               blkcg_node;
        struct blkcg                    *blkcg;
 
-       /*
-        * Each blkg gets congested separately and the congestion state is
-        * propagated to the matching bdi_writeback_congested.
-        */
-       struct bdi_writeback_congested  *wb_congested;
-
        /* all non-root blkcg_gq's are guaranteed to have access to parent */
        struct blkcg_gq                 *parent;
 
@@ -183,10 +177,6 @@ extern bool blkcg_debug_stats;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
                                      struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                     struct request_queue *q);
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-                                   struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
 void blkcg_exit_queue(struct request_queue *q);
 
@@ -480,32 +470,6 @@ static inline bool blkg_tryget(struct blkcg_gq *blkg)
        return blkg && percpu_ref_tryget(&blkg->refcnt);
 }
 
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This needs to be called rcu protected.  As the failure mode here is to walk
- * up the blkg tree, this ensure that the blkg->parent pointers are always
- * valid.  This returns the blkg that it ended up taking a reference on or %NULL
- * if no reference was taken.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
-       struct blkcg_gq *ret_blkg = NULL;
-
-       WARN_ON_ONCE(!rcu_read_lock_held());
-
-       while (blkg) {
-               if (blkg_tryget(blkg)) {
-                       ret_blkg = blkg;
-                       break;
-               }
-               blkg = blkg->parent;
-       }
-
-       return ret_blkg;
-}
-
 /**
  * blkg_put - put a blkg reference
  * @blkg: blkg to put
@@ -547,14 +511,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
                if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
                                              (p_blkg)->q, false)))
 
-#ifdef CONFIG_BLK_DEV_THROTTLING
-extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
-                          struct bio *bio);
-#else
-static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
-                                 struct bio *bio) { return false; }
-#endif
-
 bool __blkcg_punt_bio_submit(struct bio *bio);
 
 static inline bool blkcg_punt_bio_submit(struct bio *bio)
@@ -570,65 +526,6 @@ static inline void blkcg_bio_issue_init(struct bio *bio)
        bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 }
 
-static inline bool blkcg_bio_issue_check(struct request_queue *q,
-                                        struct bio *bio)
-{
-       struct blkcg_gq *blkg;
-       bool throtl = false;
-
-       rcu_read_lock();
-
-       if (!bio->bi_blkg) {
-               char b[BDEVNAME_SIZE];
-
-               WARN_ONCE(1,
-                         "no blkg associated for bio on block-device: %s\n",
-                         bio_devname(bio, b));
-               bio_associate_blkg(bio);
-       }
-
-       blkg = bio->bi_blkg;
-
-       throtl = blk_throtl_bio(q, blkg, bio);
-
-       if (!throtl) {
-               struct blkg_iostat_set *bis;
-               int rwd, cpu;
-
-               if (op_is_discard(bio->bi_opf))
-                       rwd = BLKG_IOSTAT_DISCARD;
-               else if (op_is_write(bio->bi_opf))
-                       rwd = BLKG_IOSTAT_WRITE;
-               else
-                       rwd = BLKG_IOSTAT_READ;
-
-               cpu = get_cpu();
-               bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
-               u64_stats_update_begin(&bis->sync);
-
-               /*
-                * If the bio is flagged with BIO_CGROUP_ACCT it means this is a
-                * split bio and we would have already accounted for the size of
-                * the bio.
-                */
-               if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
-                       bio_set_flag(bio, BIO_CGROUP_ACCT);
-                       bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
-               }
-               bis->cur.ios[rwd]++;
-
-               u64_stats_update_end(&bis->sync);
-               if (cgroup_subsys_on_dfl(io_cgrp_subsys))
-                       cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
-               put_cpu();
-       }
-
-       blkcg_bio_issue_init(bio);
-
-       rcu_read_unlock();
-       return !throtl;
-}
-
 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
 {
        if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
@@ -702,6 +599,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
                atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 }
 
+void blk_cgroup_bio_start(struct bio *bio);
 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
 void blkcg_maybe_throttle_current(void);
@@ -755,8 +653,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { }
 
 static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
 static inline void blkcg_bio_issue_init(struct bio *bio) { }
-static inline bool blkcg_bio_issue_check(struct request_queue *q,
-                                        struct bio *bio) { return true; }
+static inline void blk_cgroup_bio_start(struct bio *bio) { }
 
 #define blk_queue_for_each_rl(rl, q)   \
        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
index d6fcae1..9d2d5ad 100644 (file)
@@ -267,27 +267,9 @@ struct blk_mq_queue_data {
        bool last;
 };
 
-typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
-               const struct blk_mq_queue_data *);
-typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
-typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
-typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
-typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
-typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
-typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
-typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *,
-               unsigned int, unsigned int);
-typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
-               unsigned int);
-
 typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
                bool);
 typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
-typedef int (poll_fn)(struct blk_mq_hw_ctx *);
-typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
-typedef bool (busy_fn)(struct request_queue *);
-typedef void (complete_fn)(struct request *);
-typedef void (cleanup_rq_fn)(struct request *);
 
 /**
  * struct blk_mq_ops - Callback functions that implements block driver
@@ -297,7 +279,8 @@ struct blk_mq_ops {
        /**
         * @queue_rq: Queue a new request from block IO.
         */
-       queue_rq_fn             *queue_rq;
+       blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *,
+                                const struct blk_mq_queue_data *);
 
        /**
         * @commit_rqs: If a driver uses bd->last to judge when to submit
@@ -306,7 +289,7 @@ struct blk_mq_ops {
         * purpose of kicking the hardware (which the last request otherwise
         * would have done).
         */
-       commit_rqs_fn           *commit_rqs;
+       void (*commit_rqs)(struct blk_mq_hw_ctx *);
 
        /**
         * @get_budget: Reserve budget before queue request, once .queue_rq is
@@ -314,37 +297,38 @@ struct blk_mq_ops {
         * reserved budget. Also we have to handle failure case
         * of .get_budget for avoiding I/O deadlock.
         */
-       get_budget_fn           *get_budget;
+       bool (*get_budget)(struct request_queue *);
+
        /**
         * @put_budget: Release the reserved budget.
         */
-       put_budget_fn           *put_budget;
+       void (*put_budget)(struct request_queue *);
 
        /**
         * @timeout: Called on request timeout.
         */
-       timeout_fn              *timeout;
+       enum blk_eh_timer_return (*timeout)(struct request *, bool);
 
        /**
         * @poll: Called to poll for completion of a specific tag.
         */
-       poll_fn                 *poll;
+       int (*poll)(struct blk_mq_hw_ctx *);
 
        /**
         * @complete: Mark the request as complete.
         */
-       complete_fn             *complete;
+       void (*complete)(struct request *);
 
        /**
         * @init_hctx: Called when the block layer side of a hardware queue has
         * been set up, allowing the driver to allocate/init matching
         * structures.
         */
-       init_hctx_fn            *init_hctx;
+       int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int);
        /**
         * @exit_hctx: Ditto for exit/teardown.
         */
-       exit_hctx_fn            *exit_hctx;
+       void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
 
        /**
         * @init_request: Called for every command allocated by the block layer
@@ -353,11 +337,13 @@ struct blk_mq_ops {
         * Tag greater than or equal to queue_depth is for setting up
         * flush request.
         */
-       init_request_fn         *init_request;
+       int (*init_request)(struct blk_mq_tag_set *set, struct request *,
+                           unsigned int, unsigned int);
        /**
         * @exit_request: Ditto for exit/teardown.
         */
-       exit_request_fn         *exit_request;
+       void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
+                            unsigned int);
 
        /**
         * @initialize_rq_fn: Called from inside blk_get_request().
@@ -368,18 +354,18 @@ struct blk_mq_ops {
         * @cleanup_rq: Called before freeing one request which isn't completed
         * yet, and usually for freeing the driver private data.
         */
-       cleanup_rq_fn           *cleanup_rq;
+       void (*cleanup_rq)(struct request *);
 
        /**
         * @busy: If set, returns whether or not this queue currently is busy.
         */
-       busy_fn                 *busy;
+       bool (*busy)(struct request_queue *);
 
        /**
         * @map_queues: This allows drivers specify their own queue mapping by
         * overriding the setup-time function that builds the mq_map.
         */
-       map_queues_fn           *map_queues;
+       int (*map_queues)(struct blk_mq_tag_set *set);
 
 #ifdef CONFIG_BLK_DEBUG_FS
        /**
@@ -447,8 +433,6 @@ enum {
        BLK_MQ_REQ_NOWAIT       = (__force blk_mq_req_flags_t)(1 << 0),
        /* allocate from reserved pool */
        BLK_MQ_REQ_RESERVED     = (__force blk_mq_req_flags_t)(1 << 1),
-       /* allocate internal/sched tag */
-       BLK_MQ_REQ_INTERNAL     = (__force blk_mq_req_flags_t)(1 << 2),
        /* set RQF_PREEMPT */
        BLK_MQ_REQ_PREEMPT      = (__force blk_mq_req_flags_t)(1 << 3),
 };
@@ -503,8 +487,8 @@ void __blk_mq_end_request(struct request *rq, blk_status_t error);
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-bool blk_mq_complete_request(struct request *rq);
-void blk_mq_force_complete_rq(struct request *rq);
+void blk_mq_complete_request(struct request *rq);
+bool blk_mq_complete_request_remote(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
                           struct bio *bio, unsigned int nr_segs);
 bool blk_mq_queue_stopped(struct request_queue *q);
@@ -537,6 +521,15 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
 
 unsigned int blk_mq_rq_cpu(struct request *rq);
 
+bool __blk_should_fake_timeout(struct request_queue *q);
+static inline bool blk_should_fake_timeout(struct request_queue *q)
+{
+       if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
+           test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
+               return __blk_should_fake_timeout(q);
+       return false;
+}
+
 /**
  * blk_mq_rq_from_pdu - cast a PDU to a request
  * @pdu: the PDU (Protocol Data Unit) to be casted
@@ -589,6 +582,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq)
                rq->q->mq_ops->cleanup_rq(rq);
 }
 
-blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio);
+blk_qc_t blk_mq_submit_bio(struct bio *bio);
 
 #endif
index ccb895f..4ecf4fe 100644 (file)
@@ -14,12 +14,39 @@ struct bio_set;
 struct bio;
 struct bio_integrity_payload;
 struct page;
-struct block_device;
 struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 struct bio_crypt_ctx;
 
+struct block_device {
+       dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
+       int                     bd_openers;
+       struct inode *          bd_inode;       /* will die */
+       struct super_block *    bd_super;
+       struct mutex            bd_mutex;       /* open/close mutex */
+       void *                  bd_claiming;
+       void *                  bd_holder;
+       int                     bd_holders;
+       bool                    bd_write_holder;
+#ifdef CONFIG_SYSFS
+       struct list_head        bd_holder_disks;
+#endif
+       struct block_device *   bd_contains;
+       u8                      bd_partno;
+       struct hd_struct *      bd_part;
+       /* number of times partitions within this device have been opened. */
+       unsigned                bd_part_count;
+       int                     bd_invalidated;
+       struct gendisk *        bd_disk;
+       struct backing_dev_info *bd_bdi;
+
+       /* The counter of freeze processes */
+       int                     bd_fsfreeze_count;
+       /* Mutex for freeze */
+       struct mutex            bd_fsfreeze_mutex;
+} __randomize_layout;
+
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
  * Alpha cannot write a byte atomically, so we need to use 32-bit value.
@@ -300,12 +327,8 @@ enum req_opf {
        REQ_OP_DISCARD          = 3,
        /* securely erase sectors */
        REQ_OP_SECURE_ERASE     = 5,
-       /* reset a zone write pointer */
-       REQ_OP_ZONE_RESET       = 6,
        /* write the same sector many times */
        REQ_OP_WRITE_SAME       = 7,
-       /* reset all the zone present on the device */
-       REQ_OP_ZONE_RESET_ALL   = 8,
        /* write the zero filled sector many times */
        REQ_OP_WRITE_ZEROES     = 9,
        /* Open a zone */
@@ -316,6 +339,10 @@ enum req_opf {
        REQ_OP_ZONE_FINISH      = 12,
        /* write data at the current zone write pointer */
        REQ_OP_ZONE_APPEND      = 13,
+       /* reset a zone write pointer */
+       REQ_OP_ZONE_RESET       = 15,
+       /* reset all the zone present on the device */
+       REQ_OP_ZONE_RESET_ALL   = 17,
 
        /* SCSI passthrough using struct scsi_request */
        REQ_OP_SCSI_IN          = 32,
index 5724141..06ecb2c 100644 (file)
@@ -4,9 +4,6 @@
 
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
-
-#ifdef CONFIG_BLOCK
-
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
@@ -289,8 +286,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
 
 struct blk_queue_ctx;
 
-typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
-
 struct bio_vec;
 
 enum blk_eh_timer_return {
@@ -401,8 +396,6 @@ struct request_queue {
        struct blk_queue_stats  *stats;
        struct rq_qos           *rq_qos;
 
-       make_request_fn         *make_request_fn;
-
        const struct blk_mq_ops *mq_ops;
 
        /* sw queues */
@@ -528,9 +521,9 @@ struct request_queue {
        unsigned int            sg_timeout;
        unsigned int            sg_reserved_size;
        int                     node;
+       struct mutex            debugfs_mutex;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
        struct blk_trace __rcu  *blk_trace;
-       struct mutex            blk_trace_mutex;
 #endif
        /*
         * for flush operations
@@ -574,8 +567,9 @@ struct request_queue {
        struct list_head        tag_set_list;
        struct bio_set          bio_split;
 
-#ifdef CONFIG_BLK_DEBUG_FS
        struct dentry           *debugfs_dir;
+
+#ifdef CONFIG_BLK_DEBUG_FS
        struct dentry           *sched_debugfs_dir;
        struct dentry           *rqos_debugfs_dir;
 #endif
@@ -584,8 +578,6 @@ struct request_queue {
 
        size_t                  cmd_size;
 
-       struct work_struct      release_work;
-
 #define BLK_MAX_WRITE_HINTS    5
        u64                     write_hints[BLK_MAX_WRITE_HINTS];
 };
@@ -861,8 +853,7 @@ static inline void rq_flush_dcache_pages(struct request *rq)
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
-extern blk_qc_t generic_make_request(struct bio *bio);
-extern blk_qc_t direct_make_request(struct bio *bio);
+blk_qc_t submit_bio_noacct(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern struct request *blk_get_request(struct request_queue *, unsigned int op,
@@ -876,7 +867,7 @@ extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
-extern void blk_queue_split(struct request_queue *, struct bio **);
+extern void blk_queue_split(struct bio **);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
                              unsigned int, void __user *);
@@ -1079,7 +1070,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq);
 extern bool blk_update_request(struct request *rq, blk_status_t error,
                               unsigned int nr_bytes);
 
-extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 
 /*
@@ -1166,13 +1156,13 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
        return __blk_rq_map_sg(q, rq, sglist, &last_sg);
 }
 extern void blk_dump_rq_flags(struct request *, char *);
-extern long nr_blockdev_pages(void);
 
 bool __must_check blk_get_queue(struct request_queue *);
-struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
+struct request_queue *blk_alloc_queue(int node_id);
 extern void blk_put_queue(struct request_queue *);
 extern void blk_set_queue_dying(struct request_queue *);
 
+#ifdef CONFIG_BLOCK
 /*
  * blk_plug permits building a queue of related requests by holding the I/O
  * fragments for a short period. This allows merging of sequential requests
@@ -1190,6 +1180,7 @@ struct blk_plug {
        struct list_head cb_list; /* md requires an unplug callback */
        unsigned short rq_count;
        bool multiple_queues;
+       bool nowait;
 };
 #define BLK_MAX_REQUEST_COUNT 16
 #define BLK_PLUG_FLUSH_SIZE (128 * 1024)
@@ -1232,9 +1223,47 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
                 !list_empty(&plug->cb_list));
 }
 
+int blkdev_issue_flush(struct block_device *, gfp_t);
+long nr_blockdev_pages(void);
+#else /* CONFIG_BLOCK */
+struct blk_plug {
+};
+
+static inline void blk_start_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_finish_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_flush_plug(struct task_struct *task)
+{
+}
+
+static inline void blk_schedule_flush_plug(struct task_struct *task)
+{
+}
+
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+       return false;
+}
+
+static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
+{
+       return 0;
+}
+
+static inline long nr_blockdev_pages(void)
+{
+       return 0;
+}
+#endif /* CONFIG_BLOCK */
+
 extern void blk_io_schedule(void);
 
-int blkdev_issue_flush(struct block_device *, gfp_t);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
                sector_t nr_sects, gfp_t gfp_mask, struct page *page);
 
@@ -1516,7 +1545,7 @@ static inline unsigned int blksize_bits(unsigned int size)
 
 static inline unsigned int block_size(struct block_device *bdev)
 {
-       return bdev->bd_block_size;
+       return 1 << bdev->bd_inode->i_blkbits;
 }
 
 int kblockd_schedule_work(struct work_struct *work);
@@ -1746,6 +1775,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { }
 
 
 struct block_device_operations {
+       blk_qc_t (*submit_bio) (struct bio *bio);
        int (*open) (struct block_device *, fmode_t);
        void (*release) (struct gendisk *, fmode_t);
        int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
@@ -1753,8 +1783,6 @@ struct block_device_operations {
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        unsigned int (*check_events) (struct gendisk *disk,
                                      unsigned int clearing);
-       /* ->media_changed() is DEPRECATED, use ->check_events() instead */
-       int (*media_changed) (struct gendisk *);
        void (*unlock_native_capacity) (struct gendisk *);
        int (*revalidate_disk) (struct gendisk *);
        int (*getgeo)(struct block_device *, struct hd_geometry *);
@@ -1834,52 +1862,6 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 }
 #endif /* CONFIG_BLK_DEV_ZONED */
 
-#else /* CONFIG_BLOCK */
-
-struct block_device;
-
-/*
- * stubs for when the block layer is configured out
- */
-#define buffer_heads_over_limit 0
-
-static inline long nr_blockdev_pages(void)
-{
-       return 0;
-}
-
-struct blk_plug {
-};
-
-static inline void blk_start_plug(struct blk_plug *plug)
-{
-}
-
-static inline void blk_finish_plug(struct blk_plug *plug)
-{
-}
-
-static inline void blk_flush_plug(struct task_struct *task)
-{
-}
-
-static inline void blk_schedule_flush_plug(struct task_struct *task)
-{
-}
-
-
-static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-{
-       return false;
-}
-
-static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
-{
-       return 0;
-}
-
-#endif /* CONFIG_BLOCK */
-
 static inline void blk_wake_io_task(struct task_struct *waiter)
 {
        /*
@@ -1893,7 +1875,6 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
                wake_up_process(waiter);
 }
 
-#ifdef CONFIG_BLOCK
 unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
                unsigned int op);
 void disk_end_io_acct(struct gendisk *disk, unsigned int op,
@@ -1919,6 +1900,53 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
 {
        return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
 }
-#endif /* CONFIG_BLOCK */
 
+int bdev_read_only(struct block_device *bdev);
+int set_blocksize(struct block_device *bdev, int size);
+
+const char *bdevname(struct block_device *bdev, char *buffer);
+struct block_device *lookup_bdev(const char *);
+
+void blkdev_show(struct seq_file *seqf, off_t offset);
+
+#define BDEVNAME_SIZE  32      /* Largest string for a blockdev identifier */
+#define BDEVT_SIZE     10      /* Largest string for MAJ:MIN for blkdev */
+#ifdef CONFIG_BLOCK
+#define BLKDEV_MAJOR_MAX       512
+#else
+#define BLKDEV_MAJOR_MAX       0
+#endif
+
+int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+               void *holder);
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
+int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
+               void *holder);
+void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+               void *holder);
+void blkdev_put(struct block_device *bdev, fmode_t mode);
+
+struct block_device *I_BDEV(struct inode *inode);
+struct block_device *bdget(dev_t);
+struct block_device *bdgrab(struct block_device *bdev);
+void bdput(struct block_device *);
+
+#ifdef CONFIG_BLOCK
+void invalidate_bdev(struct block_device *bdev);
+int sync_blockdev(struct block_device *bdev);
+#else
+static inline void invalidate_bdev(struct block_device *bdev)
+{
+}
+static inline int sync_blockdev(struct block_device *bdev)
+{
+       return 0;
+}
 #endif
+int fsync_bdev(struct block_device *bdev);
+
+struct super_block *freeze_bdev(struct block_device *bdev);
+int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+
+#endif /* _LINUX_BLKDEV_H */
index 22fb11e..6b47f94 100644 (file)
@@ -406,6 +406,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; }
 static inline void invalidate_inode_buffers(struct inode *inode) {}
 static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+#define buffer_heads_over_limit 0
 
 #endif /* CONFIG_BLOCK */
 #endif /* _LINUX_BUFFER_HEAD_H */
index 8543fa5..f48d0a3 100644 (file)
@@ -73,7 +73,6 @@ struct cdrom_device_ops {
        int (*drive_status) (struct cdrom_device_info *, int);
        unsigned int (*check_events) (struct cdrom_device_info *cdi,
                                      unsigned int clearing, int slot);
-       int (*media_changed) (struct cdrom_device_info *, int);
        int (*tray_move) (struct cdrom_device_info *, int);
        int (*lock_door) (struct cdrom_device_info *, int);
        int (*select_speed) (struct cdrom_device_info *, int);
@@ -107,7 +106,6 @@ extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
                       fmode_t mode, unsigned int cmd, unsigned long arg);
 extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
                                       unsigned int clearing);
-extern int cdrom_media_changed(struct cdrom_device_info *);
 
 extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi);
 extern void unregister_cdrom(struct cdrom_device_info *cdi);
index 204e768..6810d80 100644 (file)
@@ -120,65 +120,12 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 /* Annotate a C jump table to allow objtool to follow the code flow */
 #define __annotate_jump_table __section(.rodata..c_jump_table)
 
-#ifdef CONFIG_DEBUG_ENTRY
-/* Begin/end of an instrumentation safe region */
-#define instrumentation_begin() ({                                     \
-       asm volatile("%c0: nop\n\t"                                             \
-                    ".pushsection .discard.instr_begin\n\t"            \
-                    ".long %c0b - .\n\t"                               \
-                    ".popsection\n\t" : : "i" (__COUNTER__));          \
-})
-
-/*
- * Because instrumentation_{begin,end}() can nest, objtool validation considers
- * _begin() a +1 and _end() a -1 and computes a sum over the instructions.
- * When the value is greater than 0, we consider instrumentation allowed.
- *
- * There is a problem with code like:
- *
- * noinstr void foo()
- * {
- *     instrumentation_begin();
- *     ...
- *     if (cond) {
- *             instrumentation_begin();
- *             ...
- *             instrumentation_end();
- *     }
- *     bar();
- *     instrumentation_end();
- * }
- *
- * If instrumentation_end() would be an empty label, like all the other
- * annotations, the inner _end(), which is at the end of a conditional block,
- * would land on the instruction after the block.
- *
- * If we then consider the sum of the !cond path, we'll see that the call to
- * bar() is with a 0-value, even though, we meant it to happen with a positive
- * value.
- *
- * To avoid this, have _end() be a NOP instruction, this ensures it will be
- * part of the condition block and does not escape.
- */
-#define instrumentation_end() ({                                       \
-       asm volatile("%c0: nop\n\t"                                     \
-                    ".pushsection .discard.instr_end\n\t"              \
-                    ".long %c0b - .\n\t"                               \
-                    ".popsection\n\t" : : "i" (__COUNTER__));          \
-})
-#endif /* CONFIG_DEBUG_ENTRY */
-
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
 #define __annotate_jump_table
 #endif
 
-#ifndef instrumentation_begin
-#define instrumentation_begin()                do { } while(0)
-#define instrumentation_end()          do { } while(0)
-#endif
-
 #ifndef ASM_UNREACHABLE
 # define ASM_UNREACHABLE
 #endif
@@ -230,28 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
 #endif
 
-/*
- * Prevent the compiler from merging or refetching reads or writes. The
- * compiler is also forbidden from reordering successive instances of
- * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
- * particular ordering. One way to make the compiler aware of ordering is to
- * put the two invocations of READ_ONCE or WRITE_ONCE in different C
- * statements.
- *
- * These two macros will also work on aggregate data types like structs or
- * unions.
- *
- * Their two major use cases are: (1) Mediating communication between
- * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
- * mutilate accesses that either do not require ordering or that interact
- * with an explicit memory barrier or atomic instruction that provides the
- * required ordering.
- */
-#include <asm/barrier.h>
-#include <linux/kasan-checks.h>
-#include <linux/kcsan-checks.h>
-
 /**
  * data_race - mark an expression as containing intentional data races
  *
@@ -272,65 +197,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
        __v;                                                            \
 })
 
-/*
- * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
- * atomicity or dependency ordering guarantees. Note that this may result
- * in tears!
- */
-#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
-
-#define __READ_ONCE_SCALAR(x)                                          \
-({                                                                     \
-       __unqual_scalar_typeof(x) __x = __READ_ONCE(x);                 \
-       smp_read_barrier_depends();                                     \
-       (typeof(x))__x;                                                 \
-})
-
-#define READ_ONCE(x)                                                   \
-({                                                                     \
-       compiletime_assert_rwonce_type(x);                              \
-       __READ_ONCE_SCALAR(x);                                          \
-})
-
-#define __WRITE_ONCE(x, val)                                           \
-do {                                                                   \
-       *(volatile typeof(x) *)&(x) = (val);                            \
-} while (0)
-
-#define WRITE_ONCE(x, val)                                             \
-do {                                                                   \
-       compiletime_assert_rwonce_type(x);                              \
-       __WRITE_ONCE(x, val);                                           \
-} while (0)
-
-static __no_sanitize_or_inline
-unsigned long __read_once_word_nocheck(const void *addr)
-{
-       return __READ_ONCE(*(unsigned long *)addr);
-}
-
-/*
- * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
- * word from memory atomically but without telling KASAN/KCSAN. This is
- * usually used by unwinding code when walking the stack of a running process.
- */
-#define READ_ONCE_NOCHECK(x)                                           \
-({                                                                     \
-       unsigned long __x;                                              \
-       compiletime_assert(sizeof(x) == sizeof(__x),                    \
-               "Unsupported access size for READ_ONCE_NOCHECK().");    \
-       __x = __read_once_word_nocheck(&(x));                           \
-       smp_read_barrier_depends();                                     \
-       (typeof(x))__x;                                                 \
-})
-
-static __no_kasan_or_inline
-unsigned long read_word_at_a_time(const void *addr)
-{
-       kasan_check_read(addr, 1);
-       return *(unsigned long *)addr;
-}
-
 #endif /* __KERNEL__ */
 
 /*
@@ -354,57 +220,6 @@ static inline void *offset_to_ptr(const int *off)
 
 #endif /* __ASSEMBLY__ */
 
-/* Compile time object size, -1 for unknown */
-#ifndef __compiletime_object_size
-# define __compiletime_object_size(obj) -1
-#endif
-#ifndef __compiletime_warning
-# define __compiletime_warning(message)
-#endif
-#ifndef __compiletime_error
-# define __compiletime_error(message)
-#endif
-
-#ifdef __OPTIMIZE__
-# define __compiletime_assert(condition, msg, prefix, suffix)          \
-       do {                                                            \
-               extern void prefix ## suffix(void) __compiletime_error(msg); \
-               if (!(condition))                                       \
-                       prefix ## suffix();                             \
-       } while (0)
-#else
-# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
-#endif
-
-#define _compiletime_assert(condition, msg, prefix, suffix) \
-       __compiletime_assert(condition, msg, prefix, suffix)
-
-/**
- * compiletime_assert - break build and emit msg if condition is false
- * @condition: a compile-time constant condition to check
- * @msg:       a message to emit if condition is false
- *
- * In tradition of POSIX assert, this macro will break the build if the
- * supplied condition is *false*, emitting the supplied error message if the
- * compiler has support to do so.
- */
-#define compiletime_assert(condition, msg) \
-       _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
-
-#define compiletime_assert_atomic_type(t)                              \
-       compiletime_assert(__native_word(t),                            \
-               "Need native word sized stores/loads for atomicity.")
-
-/*
- * Yes, this permits 64-bit accesses on 32-bit architectures. These will
- * actually be atomic in some cases (namely Armv7 + LPAE), but for others we
- * rely on the access being split into 2x32-bit accesses for a 32-bit quantity
- * (e.g. a virtual address) and a strong prevailing wind.
- */
-#define compiletime_assert_rwonce_type(t)                                      \
-       compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
-               "Unsupported access size for {READ,WRITE}_ONCE().")
-
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)     BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 
@@ -414,4 +229,6 @@ static inline void *offset_to_ptr(const int *off)
  */
 #define prevent_tail_call_optimization()       mb()
 
+#include <asm/rwonce.h>
+
 #endif /* __LINUX_COMPILER_H */
index 01dd58c..2e231ba 100644 (file)
@@ -275,6 +275,47 @@ struct ftrace_likely_data {
        (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
         sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
+/* Compile time object size, -1 for unknown */
+#ifndef __compiletime_object_size
+# define __compiletime_object_size(obj) -1
+#endif
+#ifndef __compiletime_warning
+# define __compiletime_warning(message)
+#endif
+#ifndef __compiletime_error
+# define __compiletime_error(message)
+#endif
+
+#ifdef __OPTIMIZE__
+# define __compiletime_assert(condition, msg, prefix, suffix)          \
+       do {                                                            \
+               extern void prefix ## suffix(void) __compiletime_error(msg); \
+               if (!(condition))                                       \
+                       prefix ## suffix();                             \
+       } while (0)
+#else
+# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
+#endif
+
+#define _compiletime_assert(condition, msg, prefix, suffix) \
+       __compiletime_assert(condition, msg, prefix, suffix)
+
+/**
+ * compiletime_assert - break build and emit msg if condition is false
+ * @condition: a compile-time constant condition to check
+ * @msg:       a message to emit if condition is false
+ *
+ * In tradition of POSIX assert, this macro will break the build if the
+ * supplied condition is *false*, emitting the supplied error message if the
+ * compiler has support to do so.
+ */
+#define compiletime_assert(condition, msg) \
+       _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
+
+#define compiletime_assert_atomic_type(t)                              \
+       compiletime_assert(__native_word(t),                            \
+               "Need native word sized stores/loads for atomicity.")
+
 /* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
index 981b880..d53cd33 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/sched.h>
 #include <linux/vtime.h>
 #include <linux/context_tracking_state.h>
+#include <linux/instrumentation.h>
+
 #include <asm/ptrace.h>
 
 
index 5269258..8aa84c0 100644 (file)
@@ -64,6 +64,7 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
                                        char *buf);
 extern ssize_t cpu_show_itlb_multihit(struct device *dev,
                                      struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
index 763863d..ef90e07 100644 (file)
@@ -16,9 +16,8 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/bug.h>
+#include <linux/refcount.h>
 #include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/uaccess.h>
 #include <linux/completion.h>
 
 /*
@@ -61,8 +60,8 @@
 #define CRYPTO_ALG_ASYNC               0x00000080
 
 /*
- * Set this bit if and only if the algorithm requires another algorithm of
- * the same type to handle corner cases.
+ * Set if the algorithm (or an algorithm which it uses) requires another
+ * algorithm of the same type to handle corner cases.
  */
 #define CRYPTO_ALG_NEED_FALLBACK       0x00000100
 
  */
 #define CRYPTO_NOLOAD                  0x00008000
 
+/*
+ * The algorithm may allocate memory during request processing, i.e. during
+ * encryption, decryption, or hashing.  Users can request an algorithm with this
+ * flag unset if they can't handle memory allocation failures.
+ *
+ * This flag is currently only implemented for algorithms of type "skcipher",
+ * "aead", "ahash", "shash", and "cipher".  Algorithms of other types might not
+ * have this flag set even if they allocate memory.
+ *
+ * In some edge cases, algorithms can allocate memory regardless of this flag.
+ * To avoid these cases, users must obey the following usage constraints:
+ *    skcipher:
+ *     - The IV buffer and all scatterlist elements must be aligned to the
+ *       algorithm's alignmask.
+ *     - If the data were to be divided into chunks of size
+ *       crypto_skcipher_walksize() (with any remainder going at the end), no
+ *       chunk can cross a page boundary or a scatterlist element boundary.
+ *    aead:
+ *     - The IV buffer and all scatterlist elements must be aligned to the
+ *       algorithm's alignmask.
+ *     - The first scatterlist element must contain all the associated data,
+ *       and its pages must be !PageHighMem.
+ *     - If the plaintext/ciphertext were to be divided into chunks of size
+ *       crypto_aead_walksize() (with the remainder going at the end), no chunk
+ *       can cross a page boundary or a scatterlist element boundary.
+ *    ahash:
+ *     - The result buffer must be aligned to the algorithm's alignmask.
+ *     - crypto_ahash_finup() must not be used unless the algorithm implements
+ *       ->finup() natively.
+ */
+#define CRYPTO_ALG_ALLOCATES_MEMORY    0x00010000
+
 /*
  * Transform masks and values (for crt_flags).
  */
@@ -595,6 +626,8 @@ int crypto_has_alg(const char *name, u32 type, u32 mask);
 struct crypto_tfm {
 
        u32 crt_flags;
+
+       int node;
        
        void (*exit)(struct crypto_tfm *tfm);
        
index d39abad..14e6cf8 100644 (file)
@@ -4,6 +4,8 @@
 
 #include <asm/dasd.h>
 
+struct gendisk;
+
 extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info);
 
 #endif
diff --git a/include/linux/decompress/unzstd.h b/include/linux/decompress/unzstd.h
new file mode 100644 (file)
index 0000000..56d539a
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_DECOMPRESS_UNZSTD_H
+#define LINUX_DECOMPRESS_UNZSTD_H
+
+int unzstd(unsigned char *inbuf, long len,
+          long (*fill)(void*, unsigned long),
+          long (*flush)(void*, unsigned long),
+          unsigned char *output,
+          long *pos,
+          void (*error_fn)(char *x));
+#endif
index 73dec4b..93096e5 100644 (file)
@@ -322,12 +322,6 @@ struct dm_target {
        bool discards_supported:1;
 };
 
-/* Each target can link one of these into the table */
-struct dm_target_callbacks {
-       struct list_head list;
-       int (*congested_fn) (struct dm_target_callbacks *, int);
-};
-
 void *dm_per_bio_data(struct bio *bio, size_t data_size);
 struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size);
 unsigned dm_bio_get_target_bio_nr(const struct bio *bio);
@@ -478,11 +472,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 int dm_table_add_target(struct dm_table *t, const char *type,
                        sector_t start, sector_t len, char *params);
 
-/*
- * Target_ctr should call this if it needs to add any callbacks.
- */
-void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
-
 /*
  * Target can use this to set the table's type.
  * Can only ever be called from a target's ctr.
index f5abba8..bd7ec3e 100644 (file)
@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File does not contribute to nr_files count */
 #define FMODE_NOACCOUNT                ((__force fmode_t)0x20000000)
 
+/* File supports async buffered reads */
+#define FMODE_BUF_RASYNC       ((__force fmode_t)0x40000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
@@ -315,6 +318,8 @@ enum rw_hint {
 #define IOCB_SYNC              (1 << 5)
 #define IOCB_WRITE             (1 << 6)
 #define IOCB_NOWAIT            (1 << 7)
+/* iocb->ki_waitq is valid */
+#define IOCB_WAITQ             (1 << 8)
 #define IOCB_NOIO              (1 << 9)
 
 struct kiocb {
@@ -329,7 +334,10 @@ struct kiocb {
        int                     ki_flags;
        u16                     ki_hint;
        u16                     ki_ioprio; /* See linux/ioprio.h */
-       unsigned int            ki_cookie; /* for ->iopoll */
+       union {
+               unsigned int            ki_cookie; /* for ->iopoll */
+               struct wait_page_queue  *ki_waitq; /* for async buffered IO */
+       };
 
        randomized_struct_fields_end
 };
@@ -471,45 +479,6 @@ struct address_space {
         * must be enforced here for CRIS, to let the least significant bit
         * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
         */
-struct request_queue;
-
-struct block_device {
-       dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
-       int                     bd_openers;
-       struct inode *          bd_inode;       /* will die */
-       struct super_block *    bd_super;
-       struct mutex            bd_mutex;       /* open/close mutex */
-       void *                  bd_claiming;
-       void *                  bd_holder;
-       int                     bd_holders;
-       bool                    bd_write_holder;
-#ifdef CONFIG_SYSFS
-       struct list_head        bd_holder_disks;
-#endif
-       struct block_device *   bd_contains;
-       unsigned                bd_block_size;
-       u8                      bd_partno;
-       struct hd_struct *      bd_part;
-       /* number of times partitions within this device have been opened. */
-       unsigned                bd_part_count;
-       int                     bd_invalidated;
-       struct gendisk *        bd_disk;
-       struct request_queue *  bd_queue;
-       struct backing_dev_info *bd_bdi;
-       struct list_head        bd_list;
-       /*
-        * Private data.  You must have bd_claim'ed the block_device
-        * to use this.  NOTE:  bd_claim allows an owner to claim
-        * the same device multiple times, the owner must take special
-        * care to not mess up bd_private for that case.
-        */
-       unsigned long           bd_private;
-
-       /* The counter of freeze processes */
-       int                     bd_fsfreeze_count;
-       /* Mutex for freeze */
-       struct mutex            bd_fsfreeze_mutex;
-} __randomize_layout;
 
 /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
 #define PAGECACHE_TAG_DIRTY    XA_MARK_0
@@ -908,8 +877,6 @@ static inline unsigned imajor(const struct inode *inode)
        return MAJOR(inode->i_rdev);
 }
 
-extern struct block_device *I_BDEV(struct inode *inode);
-
 struct fown_struct {
        rwlock_t lock;          /* protects pid, uid, euid fields */
        struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
@@ -1381,6 +1348,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_NODIRATIME  2048    /* Do not update directory access times */
 #define SB_SILENT      32768
 #define SB_POSIXACL    (1<<16) /* VFS does not apply the umask */
+#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */
 #define SB_KERNMOUNT   (1<<22) /* this is a kern_mount call */
 #define SB_I_VERSION   (1<<23) /* Update inode I_version field */
 #define SB_LAZYTIME    (1<<25) /* Update the on-disk [acm]times lazily */
@@ -1775,14 +1743,6 @@ struct dir_context {
        loff_t pos;
 };
 
-struct block_device_operations;
-
-/* These macros are for out of kernel modules to test that
- * the kernel supports the unlocked_ioctl and compat_ioctl
- * fields in struct file_operations. */
-#define HAVE_COMPAT_IOCTL 1
-#define HAVE_UNLOCKED_IOCTL 1
-
 /*
  * These flags let !MMU mmap() govern direct device mapping vs immediate
  * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@ -2264,18 +2224,9 @@ struct file_system_type {
 
 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
 
-#ifdef CONFIG_BLOCK
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int));
-#else
-static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data,
-       int (*fill_super)(struct super_block *, void *, int))
-{
-       return ERR_PTR(-ENODEV);
-}
-#endif
 extern struct dentry *mount_single(struct file_system_type *fs_type,
        int flags, void *data,
        int (*fill_super)(struct super_block *, void *, int));
@@ -2284,14 +2235,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type,
        int (*fill_super)(struct super_block *, void *, int));
 extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
 void generic_shutdown_super(struct super_block *sb);
-#ifdef CONFIG_BLOCK
 void kill_block_super(struct super_block *sb);
-#else
-static inline void kill_block_super(struct super_block *sb)
-{
-       BUG();
-}
-#endif
 void kill_anon_super(struct super_block *sb);
 void kill_litter_super(struct super_block *sb);
 void deactivate_super(struct super_block *sb);
@@ -2581,93 +2525,16 @@ extern struct kmem_cache *names_cachep;
 #define __getname()            kmem_cache_alloc(names_cachep, GFP_KERNEL)
 #define __putname(name)                kmem_cache_free(names_cachep, (void *)(name))
 
-#ifdef CONFIG_BLOCK
-extern int register_blkdev(unsigned int, const char *);
-extern void unregister_blkdev(unsigned int, const char *);
-extern struct block_device *bdget(dev_t);
-extern struct block_device *bdgrab(struct block_device *bdev);
-extern void bd_set_size(struct block_device *, loff_t size);
-extern void bd_forget(struct inode *inode);
-extern void bdput(struct block_device *);
-extern void invalidate_bdev(struct block_device *);
-extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
-extern int sync_blockdev(struct block_device *bdev);
-extern struct super_block *freeze_bdev(struct block_device *);
-extern void emergency_thaw_all(void);
-extern void emergency_thaw_bdev(struct super_block *sb);
-extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
-extern int fsync_bdev(struct block_device *);
-
 extern struct super_block *blockdev_superblock;
-
 static inline bool sb_is_blkdev_sb(struct super_block *sb)
 {
-       return sb == blockdev_superblock;
-}
-#else
-static inline void bd_forget(struct inode *inode) {}
-static inline int sync_blockdev(struct block_device *bdev) { return 0; }
-static inline void invalidate_bdev(struct block_device *bdev) {}
-
-static inline struct super_block *freeze_bdev(struct block_device *sb)
-{
-       return NULL;
-}
-
-static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
-{
-       return 0;
+       return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
 }
 
-static inline int emergency_thaw_bdev(struct super_block *sb)
-{
-       return 0;
-}
-
-static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
-{
-}
-
-static inline bool sb_is_blkdev_sb(struct super_block *sb)
-{
-       return false;
-}
-#endif
+void emergency_thaw_all(void);
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
-#ifdef CONFIG_BLOCK
-extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
-extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
-extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
-                                              void *holder);
-extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
-                                             void *holder);
-extern struct block_device *bd_start_claiming(struct block_device *bdev,
-                                             void *holder);
-extern void bd_finish_claiming(struct block_device *bdev,
-                              struct block_device *whole, void *holder);
-extern void bd_abort_claiming(struct block_device *bdev,
-                             struct block_device *whole, void *holder);
-extern void blkdev_put(struct block_device *bdev, fmode_t mode);
-
-#ifdef CONFIG_SYSFS
-extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
-extern void bd_unlink_disk_holder(struct block_device *bdev,
-                                 struct gendisk *disk);
-#else
-static inline int bd_link_disk_holder(struct block_device *bdev,
-                                     struct gendisk *disk)
-{
-       return 0;
-}
-static inline void bd_unlink_disk_holder(struct block_device *bdev,
-                                        struct gendisk *disk)
-{
-}
-#endif
-#endif
 
 /* fs/char_dev.c */
 #define CHRDEV_MAJOR_MAX 512
@@ -2698,31 +2565,12 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
        __unregister_chrdev(major, 0, 256, name);
 }
 
-/* fs/block_dev.c */
-#define BDEVNAME_SIZE  32      /* Largest string for a blockdev identifier */
-#define BDEVT_SIZE     10      /* Largest string for MAJ:MIN for blkdev */
-
-#ifdef CONFIG_BLOCK
-#define BLKDEV_MAJOR_MAX       512
-extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
-extern void blkdev_show(struct seq_file *,off_t);
-
-#else
-#define BLKDEV_MAJOR_MAX       0
-#endif
-
 extern void init_special_inode(struct inode *, umode_t, dev_t);
 
 /* Invalid inode operations -- fs/bad_inode.c */
 extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
-#ifdef CONFIG_BLOCK
-extern int revalidate_disk(struct gendisk *);
-extern int check_disk_change(struct block_device *);
-extern int __invalidate_device(struct block_device *, bool);
-#endif
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                        pgoff_t start, pgoff_t end);
 
@@ -3123,10 +2971,6 @@ static inline void remove_inode_hash(struct inode *inode)
 
 extern void inode_sb_list_add(struct inode *inode);
 
-#ifdef CONFIG_BLOCK
-extern int bdev_read_only(struct block_device *);
-#endif
-extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
 
@@ -3439,22 +3283,28 @@ static inline int iocb_flags(struct file *file)
 
 static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
 {
+       int kiocb_flags = 0;
+
+       if (!flags)
+               return 0;
        if (unlikely(flags & ~RWF_SUPPORTED))
                return -EOPNOTSUPP;
 
        if (flags & RWF_NOWAIT) {
                if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
                        return -EOPNOTSUPP;
-               ki->ki_flags |= IOCB_NOWAIT;
+               kiocb_flags |= IOCB_NOWAIT;
        }
        if (flags & RWF_HIPRI)
-               ki->ki_flags |= IOCB_HIPRI;
+               kiocb_flags |= IOCB_HIPRI;
        if (flags & RWF_DSYNC)
-               ki->ki_flags |= IOCB_DSYNC;
+               kiocb_flags |= IOCB_DSYNC;
        if (flags & RWF_SYNC)
-               ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+               kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
        if (flags & RWF_APPEND)
-               ki->ki_flags |= IOCB_APPEND;
+               kiocb_flags |= IOCB_APPEND;
+
+       ki->ki_flags |= kiocb_flags;
        return 0;
 }
 
index 2862ca5..991ff85 100644 (file)
@@ -69,12 +69,20 @@ struct fscrypt_operations {
        bool (*has_stable_inodes)(struct super_block *sb);
        void (*get_ino_and_lblk_bits)(struct super_block *sb,
                                      int *ino_bits_ret, int *lblk_bits_ret);
+       int (*get_num_devices)(struct super_block *sb);
+       void (*get_devices)(struct super_block *sb,
+                           struct request_queue **devs);
 };
 
-static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
 {
-       /* pairs with cmpxchg_release() in fscrypt_get_encryption_info() */
-       return READ_ONCE(inode->i_crypt_info) != NULL;
+       /*
+        * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info().
+        * I.e., another task may publish ->i_crypt_info concurrently, executing
+        * a RELEASE barrier.  We need to use smp_load_acquire() here to safely
+        * ACQUIRE the memory the other task published.
+        */
+       return smp_load_acquire(&inode->i_crypt_info);
 }
 
 /**
@@ -231,9 +239,9 @@ static inline void fscrypt_set_ops(struct super_block *sb,
 }
 #else  /* !CONFIG_FS_ENCRYPTION */
 
-static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
 {
-       return false;
+       return NULL;
 }
 
 static inline bool fscrypt_needs_contents_encryption(const struct inode *inode)
@@ -537,6 +545,99 @@ static inline void fscrypt_set_ops(struct super_block *sb,
 
 #endif /* !CONFIG_FS_ENCRYPTION */
 
+/* inline_crypt.c */
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+
+bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode);
+
+void fscrypt_set_bio_crypt_ctx(struct bio *bio,
+                              const struct inode *inode, u64 first_lblk,
+                              gfp_t gfp_mask);
+
+void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio,
+                                 const struct buffer_head *first_bh,
+                                 gfp_t gfp_mask);
+
+bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
+                          u64 next_lblk);
+
+bool fscrypt_mergeable_bio_bh(struct bio *bio,
+                             const struct buffer_head *next_bh);
+
+#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
+
+static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
+{
+       return false;
+}
+
+static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio,
+                                            const struct inode *inode,
+                                            u64 first_lblk, gfp_t gfp_mask) { }
+
+static inline void fscrypt_set_bio_crypt_ctx_bh(
+                                        struct bio *bio,
+                                        const struct buffer_head *first_bh,
+                                        gfp_t gfp_mask) { }
+
+static inline bool fscrypt_mergeable_bio(struct bio *bio,
+                                        const struct inode *inode,
+                                        u64 next_lblk)
+{
+       return true;
+}
+
+static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
+                                           const struct buffer_head *next_bh)
+{
+       return true;
+}
+#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
+
+/**
+ * fscrypt_inode_uses_inline_crypto() - test whether an inode uses inline
+ *                                     encryption
+ * @inode: an inode. If encrypted, its key must be set up.
+ *
+ * Return: true if the inode requires file contents encryption and if the
+ *        encryption should be done in the block layer via blk-crypto rather
+ *        than in the filesystem layer.
+ */
+static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode)
+{
+       return fscrypt_needs_contents_encryption(inode) &&
+              __fscrypt_inode_uses_inline_crypto(inode);
+}
+
+/**
+ * fscrypt_inode_uses_fs_layer_crypto() - test whether an inode uses fs-layer
+ *                                       encryption
+ * @inode: an inode. If encrypted, its key must be set up.
+ *
+ * Return: true if the inode requires file contents encryption and if the
+ *        encryption should be done in the filesystem layer rather than in the
+ *        block layer via blk-crypto.
+ */
+static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode)
+{
+       return fscrypt_needs_contents_encryption(inode) &&
+              !__fscrypt_inode_uses_inline_crypto(inode);
+}
+
+/**
+ * fscrypt_has_encryption_key() - check whether an inode has had its key set up
+ * @inode: the inode to check
+ *
+ * Return: %true if the inode has had its encryption key set up, else %false.
+ *
+ * Usually this should be preceded by fscrypt_get_encryption_info() to try to
+ * set up the key first.
+ */
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+       return fscrypt_get_info(inode) != NULL;
+}
+
 /**
  * fscrypt_require_key() - require an inode's encryption key
  * @inode: the inode we need the key for
index 78201a6..c1144a4 100644 (file)
@@ -115,8 +115,13 @@ struct fsverity_operations {
 
 static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 {
-       /* pairs with the cmpxchg() in fsverity_set_info() */
-       return READ_ONCE(inode->i_verity_info);
+       /*
+        * Pairs with the cmpxchg_release() in fsverity_set_info().
+        * I.e., another task may publish ->i_verity_info concurrently,
+        * executing a RELEASE barrier.  We need to use smp_load_acquire() here
+        * to safely ACQUIRE the memory the other task published.
+        */
+       return smp_load_acquire(&inode->i_verity_info);
 }
 
 /* enable.c */
index e339dac..ce2c06f 100644 (file)
@@ -58,9 +58,6 @@ struct ftrace_direct_func;
 const char *
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
                   unsigned long *off, char **modname, char *sym);
-int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
-                          char *type, char *name,
-                          char *module_name, int *exported);
 #else
 static inline const char *
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
@@ -68,6 +65,13 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
 {
        return NULL;
 }
+#endif
+
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
+                          char *type, char *name,
+                          char *module_name, int *exported);
+#else
 static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
                                         char *type, char *name,
                                         char *module_name, int *exported)
@@ -76,7 +80,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val
 }
 #endif
 
-
 #ifdef CONFIG_FUNCTION_TRACER
 
 extern int ftrace_enabled;
@@ -207,6 +210,7 @@ struct ftrace_ops {
        struct ftrace_ops_hash          old_hash;
        unsigned long                   trampoline;
        unsigned long                   trampoline_size;
+       struct list_head                list;
 #endif
 };
 
index 392aad5..4ab8534 100644 (file)
 #include <linux/blk_types.h>
 #include <asm/local.h>
 
-#ifdef CONFIG_BLOCK
-
 #define dev_to_disk(device)    container_of((device), struct gendisk, part0.__dev)
 #define dev_to_part(device)    container_of((device), struct hd_struct, __dev)
 #define disk_to_dev(disk)      (&(disk)->part0.__dev)
 #define part_to_dev(part)      (&((part)->__dev))
 
+extern const struct device_type disk_type;
 extern struct device_type part_type;
 extern struct class block_class;
 
@@ -337,12 +336,9 @@ static inline void set_capacity(struct gendisk *disk, sector_t size)
        disk->part0.nr_sects = size;
 }
 
-extern dev_t blk_lookup_devt(const char *name, int partno);
-
 int bdev_disk_changed(struct block_device *bdev, bool invalidate);
 int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
 int blk_drop_partitions(struct block_device *bdev);
-extern void printk_all_partitions(void);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);
 extern struct kobject *get_disk_and_module(struct gendisk *disk);
@@ -373,10 +369,40 @@ extern void blk_unregister_region(dev_t devt, unsigned long range);
 
 #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
 
-#else /* CONFIG_BLOCK */
+int register_blkdev(unsigned int major, const char *name);
+void unregister_blkdev(unsigned int major, const char *name);
 
-static inline void printk_all_partitions(void) { }
+int revalidate_disk(struct gendisk *disk);
+int check_disk_change(struct block_device *bdev);
+int __invalidate_device(struct block_device *bdev, bool kill_dirty);
+void bd_set_size(struct block_device *bdev, loff_t size);
 
+/* for drivers/char/raw.c: */
+int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
+long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
+
+#ifdef CONFIG_SYSFS
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+#else
+static inline int bd_link_disk_holder(struct block_device *bdev,
+                                     struct gendisk *disk)
+{
+       return 0;
+}
+static inline void bd_unlink_disk_holder(struct block_device *bdev,
+                                        struct gendisk *disk)
+{
+}
+#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_BLOCK
+void printk_all_partitions(void);
+dev_t blk_lookup_devt(const char *name, int partno);
+#else /* CONFIG_BLOCK */
+static inline void printk_all_partitions(void)
+{
+}
 static inline dev_t blk_lookup_devt(const char *name, int partno)
 {
        dev_t devt = MKDEV(0, 0);
index 03c9fec..754f67a 100644 (file)
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
 /*
  * nmi_enter() can nest up to 15 times; see NMI_BITS.
  */
-#define nmi_enter()                                            \
+#define __nmi_enter()                                          \
        do {                                                    \
+               lockdep_off();                                  \
                arch_nmi_enter();                               \
                printk_nmi_enter();                             \
-               lockdep_off();                                  \
                BUG_ON(in_nmi() == NMI_MASK);                   \
                __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);       \
-               rcu_nmi_enter();                                \
+       } while (0)
+
+#define nmi_enter()                                            \
+       do {                                                    \
+               __nmi_enter();                                  \
                lockdep_hardirq_enter();                        \
+               rcu_nmi_enter();                                \
                instrumentation_begin();                        \
                ftrace_nmi_enter();                             \
                instrumentation_end();                          \
        } while (0)
 
+#define __nmi_exit()                                           \
+       do {                                                    \
+               BUG_ON(!in_nmi());                              \
+               __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);       \
+               printk_nmi_exit();                              \
+               arch_nmi_exit();                                \
+               lockdep_on();                                   \
+       } while (0)
+
 #define nmi_exit()                                             \
        do {                                                    \
                instrumentation_begin();                        \
                ftrace_nmi_exit();                              \
                instrumentation_end();                          \
-               lockdep_hardirq_exit();                         \
                rcu_nmi_exit();                                 \
-               BUG_ON(!in_nmi());                              \
-               __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);       \
-               lockdep_on();                                   \
-               printk_nmi_exit();                              \
-               arch_nmi_exit();                                \
+               lockdep_hardirq_exit();                         \
+               __nmi_exit();                                   \
        } while (0)
 
 #endif /* LINUX_HARDIRQ_H */
index ee328cf..4e7714c 100644 (file)
@@ -1001,7 +1001,7 @@ static inline u32 i2c_acpi_find_bus_speed(struct device *dev)
 static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
                                        int index, struct i2c_board_info *info)
 {
-       return NULL;
+       return ERR_PTR(-ENODEV);
 }
 static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
 {
diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h
new file mode 100644 (file)
index 0000000..93e2ad6
--- /dev/null
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_INSTRUMENTATION_H
+#define __LINUX_INSTRUMENTATION_H
+
+#if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_STACK_VALIDATION)
+
+/* Begin/end of an instrumentation safe region */
+#define instrumentation_begin() ({                                     \
+       asm volatile("%c0: nop\n\t"                                             \
+                    ".pushsection .discard.instr_begin\n\t"            \
+                    ".long %c0b - .\n\t"                               \
+                    ".popsection\n\t" : : "i" (__COUNTER__));          \
+})
+
+/*
+ * Because instrumentation_{begin,end}() can nest, objtool validation considers
+ * _begin() a +1 and _end() a -1 and computes a sum over the instructions.
+ * When the value is greater than 0, we consider instrumentation allowed.
+ *
+ * There is a problem with code like:
+ *
+ * noinstr void foo()
+ * {
+ *     instrumentation_begin();
+ *     ...
+ *     if (cond) {
+ *             instrumentation_begin();
+ *             ...
+ *             instrumentation_end();
+ *     }
+ *     bar();
+ *     instrumentation_end();
+ * }
+ *
+ * If instrumentation_end() would be an empty label, like all the other
+ * annotations, the inner _end(), which is at the end of a conditional block,
+ * would land on the instruction after the block.
+ *
+ * If we then consider the sum of the !cond path, we'll see that the call to
+ * bar() is with a 0-value, even though, we meant it to happen with a positive
+ * value.
+ *
+ * To avoid this, have _end() be a NOP instruction, this ensures it will be
+ * part of the condition block and does not escape.
+ */
+#define instrumentation_end() ({                                       \
+       asm volatile("%c0: nop\n\t"                                     \
+                    ".pushsection .discard.instr_end\n\t"              \
+                    ".long %c0b - .\n\t"                               \
+                    ".popsection\n\t" : : "i" (__COUNTER__));          \
+})
+#else
+# define instrumentation_begin()       do { } while(0)
+# define instrumentation_end()         do { } while(0)
+#endif
+
+#endif /* __LINUX_INSTRUMENTATION_H */
index 8d5bc2c..1b7f4df 100644 (file)
@@ -213,6 +213,8 @@ struct irq_data {
  *                               required
  * IRQD_HANDLE_ENFORCE_IRQCTX  - Enforce that handle_irq_*() is only invoked
  *                               from actual interrupt context.
+ * IRQD_AFFINITY_ON_ACTIVATE   - Affinity is set on activation. Don't call
+ *                               irq_chip::irq_set_affinity() when deactivated.
  */
 enum {
        IRQD_TRIGGER_MASK               = 0xf,
@@ -237,6 +239,7 @@ enum {
        IRQD_CAN_RESERVE                = (1 << 26),
        IRQD_MSI_NOMASK_QUIRK           = (1 << 27),
        IRQD_HANDLE_ENFORCE_IRQCTX      = (1 << 28),
+       IRQD_AFFINITY_ON_ACTIVATE       = (1 << 29),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -421,6 +424,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
        return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
 }
 
+static inline void irqd_set_affinity_on_activate(struct irq_data *d)
+{
+       __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
+}
+
+static inline bool irqd_affinity_on_activate(struct irq_data *d)
+{
+       return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
index 6384d28..bd5c557 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/typecheck.h>
 #include <asm/irqflags.h>
+#include <asm/percpu.h>
 
 /* Currently lockdep_softirqs_on/off is used only by lockdep */
 #ifdef CONFIG_PROVE_LOCKING
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+
+/* Per-task IRQ trace events information. */
+struct irqtrace_events {
+       unsigned int    irq_events;
+       unsigned long   hardirq_enable_ip;
+       unsigned long   hardirq_disable_ip;
+       unsigned int    hardirq_enable_event;
+       unsigned int    hardirq_disable_event;
+       unsigned long   softirq_disable_ip;
+       unsigned long   softirq_enable_ip;
+       unsigned int    softirq_disable_event;
+       unsigned int    softirq_enable_event;
+};
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
   extern void trace_hardirqs_on_prepare(void);
   extern void trace_hardirqs_off_finish(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p)    ((p)->hardirq_context)
+# define lockdep_hardirq_context()     (this_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)    ((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p)   ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled()    (this_cpu_read(hardirqs_enabled))
 # define lockdep_softirqs_enabled(p)   ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter()               \
-do {                                           \
-       if (!current->hardirq_context++)        \
-               current->hardirq_threaded = 0;  \
+# define lockdep_hardirq_enter()                       \
+do {                                                   \
+       if (this_cpu_inc_return(hardirq_context) == 1)  \
+               current->hardirq_threaded = 0;          \
 } while (0)
 # define lockdep_hardirq_threaded()            \
 do {                                           \
@@ -50,7 +68,7 @@ do {                                          \
 } while (0)
 # define lockdep_hardirq_exit()                        \
 do {                                           \
-       current->hardirq_context--;             \
+       this_cpu_dec(hardirq_context);          \
 } while (0)
 # define lockdep_softirq_enter()               \
 do {                                           \
@@ -104,9 +122,9 @@ do {                                                \
 # define trace_hardirqs_off_finish()           do { } while (0)
 # define trace_hardirqs_on()           do { } while (0)
 # define trace_hardirqs_off()          do { } while (0)
-# define lockdep_hardirq_context(p)    0
+# define lockdep_hardirq_context(    0
 # define lockdep_softirq_context(p)    0
-# define lockdep_hardirqs_enabled(p)   0
+# define lockdep_hardirqs_enabled(   0
 # define lockdep_softirqs_enabled(p)   0
 # define lockdep_hardirq_enter()       do { } while (0)
 # define lockdep_hardirq_threaded()    do { } while (0)
index d56128d..4aaa297 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/bit_spinlock.h>
+#include <linux/blkdev.h>
 #include <crypto/hash.h>
 #endif
 
index 6adf90f..45b8cdc 100644 (file)
@@ -242,6 +242,7 @@ struct kprobe_insn_cache {
        struct mutex mutex;
        void *(*alloc)(void);   /* allocate insn page */
        void (*free)(void *);   /* free insn page */
+       const char *sym;        /* symbol for insn pages */
        struct list_head pages; /* list of kprobe_insn_page */
        size_t insn_size;       /* size of instruction slot */
        int nr_garbage;
@@ -272,6 +273,10 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr)   \
 {                                                                      \
        return __is_insn_slot_addr(&kprobe_##__name##_slots, addr);     \
 }
+#define KPROBE_INSN_PAGE_SYM           "kprobe_insn_page"
+#define KPROBE_OPTINSN_PAGE_SYM                "kprobe_optinsn_page"
+int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
+                            unsigned long *value, char *type, char *sym);
 #else /* __ARCH_WANT_KPROBES_INSN_SLOT */
 #define DEFINE_INSN_CACHE_OPS(__name)                                  \
 static inline bool is_kprobe_##__name##_slot(unsigned long addr)       \
@@ -377,6 +382,11 @@ void dump_kprobe(struct kprobe *kp);
 void *alloc_insn_page(void);
 void free_insn_page(void *page);
 
+int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+                      char *sym);
+
+int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
+                           char *type, char *sym);
 #else /* !CONFIG_KPROBES: */
 
 static inline int kprobes_built_in(void)
@@ -439,6 +449,11 @@ static inline bool within_kprobe_blacklist(unsigned long addr)
 {
        return true;
 }
+static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value,
+                                    char *type, char *sym)
+{
+       return -ERANGE;
+}
 #endif /* CONFIG_KPROBES */
 static inline int disable_kretprobe(struct kretprobe *rp)
 {
index ee8ec2e..1db2237 100644 (file)
@@ -631,7 +631,6 @@ static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev,
        return last;
 }
 
-typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
                                int flags);
@@ -650,7 +649,7 @@ struct nvm_tgt_type {
        int flags;
 
        /* target entry points */
-       nvm_tgt_make_rq_fn *make_rq;
+       const struct block_device_operations *bops;
        nvm_tgt_capacity_fn *capacity;
 
        /* module-specific init/teardown */
index aff44d3..0d0d17a 100644 (file)
@@ -282,6 +282,24 @@ static inline int list_empty(const struct list_head *head)
        return READ_ONCE(head->next) == head;
 }
 
+/**
+ * list_del_init_careful - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ *
+ * This is the same as list_del_init(), except designed to be used
+ * together with list_empty_careful() in a way to guarantee ordering
+ * of other memory operations.
+ *
+ * Any memory operations done before a list_del_init_careful() are
+ * guaranteed to be visible after a list_empty_careful() test.
+ */
+static inline void list_del_init_careful(struct list_head *entry)
+{
+       __list_del_entry(entry);
+       entry->prev = entry;
+       smp_store_release(&entry->next, entry);
+}
+
 /**
  * list_empty_careful - tests whether a list is empty and not being modified
  * @head: the list to test
@@ -297,7 +315,7 @@ static inline int list_empty(const struct list_head *head)
  */
 static inline int list_empty_careful(const struct list_head *head)
 {
-       struct list_head *next = head->next;
+       struct list_head *next = smp_load_acquire(&head->next);
        return (next == head) && (next == head->prev);
 }
 
index 8fce5c9..39a3569 100644 (file)
 #ifndef __LINUX_LOCKDEP_H
 #define __LINUX_LOCKDEP_H
 
+#include <linux/lockdep_types.h>
+#include <asm/percpu.h>
+
 struct task_struct;
-struct lockdep_map;
 
 /* for sysctl */
 extern int prove_locking;
 extern int lock_stat;
 
-#define MAX_LOCKDEP_SUBCLASSES         8UL
-
-#include <linux/types.h>
-
-enum lockdep_wait_type {
-       LD_WAIT_INV = 0,        /* not checked, catch all */
-
-       LD_WAIT_FREE,           /* wait free, rcu etc.. */
-       LD_WAIT_SPIN,           /* spin loops, raw_spinlock_t etc.. */
-
-#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
-       LD_WAIT_CONFIG,         /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
-#else
-       LD_WAIT_CONFIG = LD_WAIT_SPIN,
-#endif
-       LD_WAIT_SLEEP,          /* sleeping locks, mutex_t etc.. */
-
-       LD_WAIT_MAX,            /* must be last */
-};
-
 #ifdef CONFIG_LOCKDEP
 
 #include <linux/linkage.h>
@@ -44,147 +26,6 @@ enum lockdep_wait_type {
 #include <linux/debug_locks.h>
 #include <linux/stacktrace.h>
 
-/*
- * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
- * the total number of states... :-(
- */
-#define XXX_LOCK_USAGE_STATES          (1+2*4)
-
-/*
- * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
- * cached in the instance of lockdep_map
- *
- * Currently main class (subclass == 0) and signle depth subclass
- * are cached in lockdep_map. This optimization is mainly targeting
- * on rq->lock. double_rq_lock() acquires this highly competitive with
- * single depth.
- */
-#define NR_LOCKDEP_CACHING_CLASSES     2
-
-/*
- * A lockdep key is associated with each lock object. For static locks we use
- * the lock address itself as the key. Dynamically allocated lock objects can
- * have a statically or dynamically allocated key. Dynamically allocated lock
- * keys must be registered before being used and must be unregistered before
- * the key memory is freed.
- */
-struct lockdep_subclass_key {
-       char __one_byte;
-} __attribute__ ((__packed__));
-
-/* hash_entry is used to keep track of dynamically allocated keys. */
-struct lock_class_key {
-       union {
-               struct hlist_node               hash_entry;
-               struct lockdep_subclass_key     subkeys[MAX_LOCKDEP_SUBCLASSES];
-       };
-};
-
-extern struct lock_class_key __lockdep_no_validate__;
-
-struct lock_trace;
-
-#define LOCKSTAT_POINTS                4
-
-/*
- * The lock-class itself. The order of the structure members matters.
- * reinit_class() zeroes the key member and all subsequent members.
- */
-struct lock_class {
-       /*
-        * class-hash:
-        */
-       struct hlist_node               hash_entry;
-
-       /*
-        * Entry in all_lock_classes when in use. Entry in free_lock_classes
-        * when not in use. Instances that are being freed are on one of the
-        * zapped_classes lists.
-        */
-       struct list_head                lock_entry;
-
-       /*
-        * These fields represent a directed graph of lock dependencies,
-        * to every node we attach a list of "forward" and a list of
-        * "backward" graph nodes.
-        */
-       struct list_head                locks_after, locks_before;
-
-       const struct lockdep_subclass_key *key;
-       unsigned int                    subclass;
-       unsigned int                    dep_gen_id;
-
-       /*
-        * IRQ/softirq usage tracking bits:
-        */
-       unsigned long                   usage_mask;
-       const struct lock_trace         *usage_traces[XXX_LOCK_USAGE_STATES];
-
-       /*
-        * Generation counter, when doing certain classes of graph walking,
-        * to ensure that we check one node only once:
-        */
-       int                             name_version;
-       const char                      *name;
-
-       short                           wait_type_inner;
-       short                           wait_type_outer;
-
-#ifdef CONFIG_LOCK_STAT
-       unsigned long                   contention_point[LOCKSTAT_POINTS];
-       unsigned long                   contending_point[LOCKSTAT_POINTS];
-#endif
-} __no_randomize_layout;
-
-#ifdef CONFIG_LOCK_STAT
-struct lock_time {
-       s64                             min;
-       s64                             max;
-       s64                             total;
-       unsigned long                   nr;
-};
-
-enum bounce_type {
-       bounce_acquired_write,
-       bounce_acquired_read,
-       bounce_contended_write,
-       bounce_contended_read,
-       nr_bounce_types,
-
-       bounce_acquired = bounce_acquired_write,
-       bounce_contended = bounce_contended_write,
-};
-
-struct lock_class_stats {
-       unsigned long                   contention_point[LOCKSTAT_POINTS];
-       unsigned long                   contending_point[LOCKSTAT_POINTS];
-       struct lock_time                read_waittime;
-       struct lock_time                write_waittime;
-       struct lock_time                read_holdtime;
-       struct lock_time                write_holdtime;
-       unsigned long                   bounces[nr_bounce_types];
-};
-
-struct lock_class_stats lock_stats(struct lock_class *class);
-void clear_lock_stats(struct lock_class *class);
-#endif
-
-/*
- * Map the lock object (the lock instance) to the lock-class object.
- * This is embedded into specific lock instances:
- */
-struct lockdep_map {
-       struct lock_class_key           *key;
-       struct lock_class               *class_cache[NR_LOCKDEP_CACHING_CLASSES];
-       const char                      *name;
-       short                           wait_type_outer; /* can be taken in this context */
-       short                           wait_type_inner; /* presents this context */
-#ifdef CONFIG_LOCK_STAT
-       int                             cpu;
-       unsigned long                   ip;
-#endif
-};
-
 static inline void lockdep_copy_map(struct lockdep_map *to,
                                    struct lockdep_map *from)
 {
@@ -440,8 +281,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
 
 extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
 
-struct pin_cookie { unsigned int val; };
-
 #define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
 
 extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
@@ -520,10 +359,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
 # define lockdep_reset()               do { debug_locks = 1; } while (0)
 # define lockdep_free_key_range(start, size)   do { } while (0)
 # define lockdep_sys_exit()                    do { } while (0)
-/*
- * The class key takes no space if lockdep is disabled:
- */
-struct lock_class_key { };
 
 static inline void lockdep_register_key(struct lock_class_key *key)
 {
@@ -533,11 +368,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
 {
 }
 
-/*
- * The lockdep_map takes no space if lockdep is disabled:
- */
-struct lockdep_map { };
-
 #define lockdep_depth(tsk)     (0)
 
 #define lockdep_is_held_type(l, r)             (1)
@@ -549,8 +379,6 @@ struct lockdep_map { };
 
 #define lockdep_recursing(tsk)                 (0)
 
-struct pin_cookie { };
-
 #define NIL_COOKIE (struct pin_cookie){ }
 
 #define lockdep_pin_lock(l)                    ({ struct pin_cookie cookie = { }; cookie; })
@@ -703,38 +531,58 @@ do {                                                                      \
        lock_release(&(lock)->dep_map, _THIS_IP_);                      \
 } while (0)
 
-#define lockdep_assert_irqs_enabled()  do {                            \
-               WARN_ONCE(debug_locks && !current->lockdep_recursion && \
-                         !current->hardirqs_enabled,                   \
-                         "IRQs not enabled as expected\n");            \
-       } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
 
-#define lockdep_assert_irqs_disabled() do {                            \
-               WARN_ONCE(debug_locks && !current->lockdep_recursion && \
-                         current->hardirqs_enabled,                    \
-                         "IRQs not disabled as expected\n");           \
-       } while (0)
+#define lockdep_assert_irqs_enabled()                                  \
+do {                                                                   \
+       WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));  \
+} while (0)
 
-#define lockdep_assert_in_irq() do {                                   \
-               WARN_ONCE(debug_locks && !current->lockdep_recursion && \
-                         !current->hardirq_context,                    \
-                         "Not in hardirq as expected\n");              \
-       } while (0)
+#define lockdep_assert_irqs_disabled()                                 \
+do {                                                                   \
+       WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled));   \
+} while (0)
+
+#define lockdep_assert_in_irq()                                                \
+do {                                                                   \
+       WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context));   \
+} while (0)
+
+#define lockdep_assert_preemption_enabled()                            \
+do {                                                                   \
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)   &&              \
+                    debug_locks                        &&              \
+                    (preempt_count() != 0              ||              \
+                     !this_cpu_read(hardirqs_enabled)));               \
+} while (0)
+
+#define lockdep_assert_preemption_disabled()                           \
+do {                                                                   \
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)   &&              \
+                    debug_locks                        &&              \
+                    (preempt_count() == 0              &&              \
+                     this_cpu_read(hardirqs_enabled)));                \
+} while (0)
 
 #else
 # define might_lock(lock) do { } while (0)
 # define might_lock_read(lock) do { } while (0)
 # define might_lock_nested(lock, subclass) do { } while (0)
+
 # define lockdep_assert_irqs_enabled() do { } while (0)
 # define lockdep_assert_irqs_disabled() do { } while (0)
 # define lockdep_assert_in_irq() do { } while (0)
+
+# define lockdep_assert_preemption_enabled() do { } while (0)
+# define lockdep_assert_preemption_disabled() do { } while (0)
 #endif
 
 #ifdef CONFIG_PROVE_RAW_LOCK_NESTING
 
 # define lockdep_assert_RT_in_threaded_ctx() do {                      \
                WARN_ONCE(debug_locks && !current->lockdep_recursion && \
-                         current->hardirq_context &&                   \
+                         lockdep_hardirq_context() &&                  \
                          !(current->hardirq_threaded || current->irq_config),  \
                          "Not in threaded context on PREEMPT_RT as expected\n");       \
 } while (0)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
new file mode 100644 (file)
index 0000000..bb35b44
--- /dev/null
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Runtime locking correctness validator
+ *
+ *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * see Documentation/locking/lockdep-design.rst for more details.
+ */
+#ifndef __LINUX_LOCKDEP_TYPES_H
+#define __LINUX_LOCKDEP_TYPES_H
+
+#include <linux/types.h>
+
+#define MAX_LOCKDEP_SUBCLASSES         8UL
+
+enum lockdep_wait_type {
+       LD_WAIT_INV = 0,        /* not checked, catch all */
+
+       LD_WAIT_FREE,           /* wait free, rcu etc.. */
+       LD_WAIT_SPIN,           /* spin loops, raw_spinlock_t etc.. */
+
+#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
+       LD_WAIT_CONFIG,         /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+#else
+       LD_WAIT_CONFIG = LD_WAIT_SPIN,
+#endif
+       LD_WAIT_SLEEP,          /* sleeping locks, mutex_t etc.. */
+
+       LD_WAIT_MAX,            /* must be last */
+};
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
+ * the total number of states... :-(
+ */
+#define XXX_LOCK_USAGE_STATES          (1+2*4)
+
+/*
+ * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
+ * cached in the instance of lockdep_map
+ *
+ * Currently main class (subclass == 0) and signle depth subclass
+ * are cached in lockdep_map. This optimization is mainly targeting
+ * on rq->lock. double_rq_lock() acquires this highly competitive with
+ * single depth.
+ */
+#define NR_LOCKDEP_CACHING_CLASSES     2
+
+/*
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
+ */
+struct lockdep_subclass_key {
+       char __one_byte;
+} __attribute__ ((__packed__));
+
+/* hash_entry is used to keep track of dynamically allocated keys. */
+struct lock_class_key {
+       union {
+               struct hlist_node               hash_entry;
+               struct lockdep_subclass_key     subkeys[MAX_LOCKDEP_SUBCLASSES];
+       };
+};
+
+extern struct lock_class_key __lockdep_no_validate__;
+
+struct lock_trace;
+
+#define LOCKSTAT_POINTS                4
+
+/*
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
+ */
+struct lock_class {
+       /*
+        * class-hash:
+        */
+       struct hlist_node               hash_entry;
+
+       /*
+        * Entry in all_lock_classes when in use. Entry in free_lock_classes
+        * when not in use. Instances that are being freed are on one of the
+        * zapped_classes lists.
+        */
+       struct list_head                lock_entry;
+
+       /*
+        * These fields represent a directed graph of lock dependencies,
+        * to every node we attach a list of "forward" and a list of
+        * "backward" graph nodes.
+        */
+       struct list_head                locks_after, locks_before;
+
+       const struct lockdep_subclass_key *key;
+       unsigned int                    subclass;
+       unsigned int                    dep_gen_id;
+
+       /*
+        * IRQ/softirq usage tracking bits:
+        */
+       unsigned long                   usage_mask;
+       const struct lock_trace         *usage_traces[XXX_LOCK_USAGE_STATES];
+
+       /*
+        * Generation counter, when doing certain classes of graph walking,
+        * to ensure that we check one node only once:
+        */
+       int                             name_version;
+       const char                      *name;
+
+       short                           wait_type_inner;
+       short                           wait_type_outer;
+
+#ifdef CONFIG_LOCK_STAT
+       unsigned long                   contention_point[LOCKSTAT_POINTS];
+       unsigned long                   contending_point[LOCKSTAT_POINTS];
+#endif
+} __no_randomize_layout;
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+       s64                             min;
+       s64                             max;
+       s64                             total;
+       unsigned long                   nr;
+};
+
+enum bounce_type {
+       bounce_acquired_write,
+       bounce_acquired_read,
+       bounce_contended_write,
+       bounce_contended_read,
+       nr_bounce_types,
+
+       bounce_acquired = bounce_acquired_write,
+       bounce_contended = bounce_contended_write,
+};
+
+struct lock_class_stats {
+       unsigned long                   contention_point[LOCKSTAT_POINTS];
+       unsigned long                   contending_point[LOCKSTAT_POINTS];
+       struct lock_time                read_waittime;
+       struct lock_time                write_waittime;
+       struct lock_time                read_holdtime;
+       struct lock_time                write_holdtime;
+       unsigned long                   bounces[nr_bounce_types];
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+       struct lock_class_key           *key;
+       struct lock_class               *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+       const char                      *name;
+       short                           wait_type_outer; /* can be taken in this context */
+       short                           wait_type_inner; /* presents this context */
+#ifdef CONFIG_LOCK_STAT
+       int                             cpu;
+       unsigned long                   ip;
+#endif
+};
+
+struct pin_cookie { unsigned int val; };
+
+#else /* !CONFIG_LOCKDEP */
+
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+
+/*
+ * The lockdep_map takes no space if lockdep is disabled:
+ */
+struct lockdep_map { };
+
+struct pin_cookie { };
+
+#endif /* !LOCKDEP */
+
+#endif /* __LINUX_LOCKDEP_TYPES_H */
index 11a2674..d097119 100644 (file)
@@ -263,6 +263,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 }
 #endif /* mul_u64_u32_div */
 
+u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+
 #define DIV64_U64_ROUND_UP(ll, d)      \
        ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); })
 
index 017fae8..9d925db 100644 (file)
@@ -77,16 +77,12 @@ struct memblock_type {
  * @current_limit: physical address of the current allocation limit
  * @memory: usable memory regions
  * @reserved: reserved memory regions
- * @physmem: all physical memory
  */
 struct memblock {
        bool bottom_up;  /* is bottom up direction? */
        phys_addr_t current_limit;
        struct memblock_type memory;
        struct memblock_type reserved;
-#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
-       struct memblock_type physmem;
-#endif
 };
 
 extern struct memblock memblock;
@@ -145,6 +141,30 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
 
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
+                                       phys_addr_t *out_start,
+                                       phys_addr_t *out_end)
+{
+       extern struct memblock_type physmem;
+
+       __next_mem_range(idx, NUMA_NO_NODE, MEMBLOCK_NONE, &physmem, type,
+                        out_start, out_end, NULL);
+}
+
+/**
+ * for_each_physmem_range - iterate through physmem areas not included in type.
+ * @i: u64 used as loop variable
+ * @type: ptr to memblock_type which excludes from the iteration, can be %NULL
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ */
+#define for_each_physmem_range(i, type, p_start, p_end)                        \
+       for (i = 0, __next_physmem_range(&i, type, p_start, p_end);     \
+            i != (u64)ULLONG_MAX;                                      \
+            __next_physmem_range(&i, type, p_start, p_end))
+#endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */
+
 /**
  * for_each_mem_range - iterate through memblock areas from type_a and not
  * included in type_b. Or just type_a if type_b is NULL.
index 073b79e..1340e02 100644 (file)
@@ -4381,6 +4381,7 @@ struct mlx5_ifc_query_vport_state_out_bits {
 enum {
        MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT  = 0x0,
        MLX5_VPORT_STATE_OP_MOD_ESW_VPORT   = 0x1,
+       MLX5_VPORT_STATE_OP_MOD_UPLINK      = 0x2,
 };
 
 struct mlx5_ifc_arm_monitor_counter_in_bits {
index 7bd6d8a..5d906df 100644 (file)
@@ -63,6 +63,9 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
 int mpi_cmp_ui(MPI u, ulong v);
 int mpi_cmp(MPI u, MPI v);
 
+/*-- mpi-sub-ui.c --*/
+int mpi_sub_ui(MPI w, MPI u, unsigned long vval);
+
 /*-- mpi-bit.c --*/
 void mpi_normalize(MPI a);
 unsigned mpi_get_nbits(MPI a);
index 0c5ef54..c1e79f7 100644 (file)
@@ -5,6 +5,8 @@
 
 #ifndef _LINUX_NOSPEC_H
 #define _LINUX_NOSPEC_H
+
+#include <linux/compiler.h>
 #include <asm/barrier.h>
 
 struct task_struct;
index c669c0a..60abe3f 100644 (file)
@@ -554,7 +554,7 @@ bool of_console_check(struct device_node *dn, char *name, int index);
 
 extern int of_cpu_node_to_id(struct device_node *np);
 
-int of_map_rid(struct device_node *np, u32 rid,
+int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out);
 
@@ -978,7 +978,7 @@ static inline int of_cpu_node_to_id(struct device_node *np)
        return -ENODEV;
 }
 
-static inline int of_map_rid(struct device_node *np, u32 rid,
+static inline int of_map_id(struct device_node *np, u32 id,
                             const char *map_name, const char *map_mask_name,
                             struct device_node **target, u32 *id_out)
 {
index 8d31e39..07ca187 100644 (file)
@@ -55,9 +55,15 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
        return of_node_get(cpu_dev->of_node);
 }
 
-int of_dma_configure(struct device *dev,
+int of_dma_configure_id(struct device *dev,
                     struct device_node *np,
-                    bool force_dma);
+                    bool force_dma, const u32 *id);
+static inline int of_dma_configure(struct device *dev,
+                                  struct device_node *np,
+                                  bool force_dma)
+{
+       return of_dma_configure_id(dev, np, force_dma, NULL);
+}
 #else /* CONFIG_OF */
 
 static inline int of_driver_match_device(struct device *dev,
@@ -106,6 +112,12 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
        return NULL;
 }
 
+static inline int of_dma_configure_id(struct device *dev,
+                                  struct device_node *np,
+                                  bool force_dma)
+{
+       return 0;
+}
 static inline int of_dma_configure(struct device *dev,
                                   struct device_node *np,
                                   bool force_dma)
index f3d40dd..16f4b3e 100644 (file)
@@ -13,7 +13,8 @@ extern int of_get_dma_window(struct device_node *dn, const char *prefix,
                             size_t *size);
 
 extern const struct iommu_ops *of_iommu_configure(struct device *dev,
-                                       struct device_node *master_np);
+                                       struct device_node *master_np,
+                                       const u32 *id);
 
 #else
 
@@ -25,7 +26,8 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
 }
 
 static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
-                                        struct device_node *master_np)
+                                        struct device_node *master_np,
+                                        const u32 *id)
 {
        return NULL;
 }
index 1214cab..e8b7813 100644 (file)
@@ -52,9 +52,10 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev,
                                            struct device_node *np,
                                            enum irq_domain_bus_token token);
 extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
-                                                      u32 rid);
+                                                       u32 id,
+                                                       u32 bus_token);
 extern void of_msi_configure(struct device *dev, struct device_node *np);
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
+u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -85,17 +86,17 @@ static inline struct irq_domain *of_msi_get_domain(struct device *dev,
        return NULL;
 }
 static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
-                                                             u32 rid)
+                                               u32 id, u32 bus_token)
 {
        return NULL;
 }
 static inline void of_msi_configure(struct device *dev, struct device_node *np)
 {
 }
-static inline u32 of_msi_map_rid(struct device *dev,
-                                struct device_node *msi_np, u32 rid_in)
+static inline u32 of_msi_map_id(struct device *dev,
+                                struct device_node *msi_np, u32 id_in)
 {
-       return rid_in;
+       return id_in;
 }
 #endif
 
index 7302eff..a433f13 100644 (file)
@@ -66,17 +66,6 @@ struct padata_serial_queue {
        struct parallel_data *pd;
 };
 
-/**
- * struct padata_parallel_queue - The percpu padata parallel queue
- *
- * @reorder: List to wait for reordering after parallel processing.
- * @num_obj: Number of objects that are processed by this cpu.
- */
-struct padata_parallel_queue {
-       struct padata_list    reorder;
-       atomic_t              num_obj;
-};
-
 /**
  * struct padata_cpumask - The cpumasks for the parallel/serial workers
  *
@@ -93,7 +82,7 @@ struct padata_cpumask {
  * that depends on the cpumask in use.
  *
  * @ps: padata_shell object.
- * @pqueue: percpu padata queues used for parallelization.
+ * @reorder_list: percpu reorder lists
  * @squeue: percpu padata queues used for serialuzation.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @seq_nr: Sequence number of the parallelized data object.
@@ -105,7 +94,7 @@ struct padata_cpumask {
  */
 struct parallel_data {
        struct padata_shell             *ps;
-       struct padata_parallel_queue    __percpu *pqueue;
+       struct padata_list              __percpu *reorder_list;
        struct padata_serial_queue      __percpu *squeue;
        atomic_t                        refcnt;
        unsigned int                    seq_nr;
@@ -167,7 +156,6 @@ struct padata_mt_job {
  * @serial_wq: The workqueue used for serial work.
  * @pslist: List of padata_shell objects attached to this instance.
  * @cpumask: User supplied cpumasks for parallel and serial works.
- * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask.
  * @kobj: padata instance kernel object.
  * @lock: padata instance lock.
  * @flags: padata flags.
@@ -179,7 +167,6 @@ struct padata_instance {
        struct workqueue_struct         *serial_wq;
        struct list_head                pslist;
        struct padata_cpumask           cpumask;
-       struct padata_cpumask           rcpumask;
        struct kobject                   kobj;
        struct mutex                     lock;
        u8                               flags;
@@ -194,7 +181,7 @@ extern void __init padata_init(void);
 static inline void __init padata_init(void) {}
 #endif
 
-extern struct padata_instance *padata_alloc_possible(const char *name);
+extern struct padata_instance *padata_alloc(const char *name);
 extern void padata_free(struct padata_instance *pinst);
 extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
 extern void padata_free_shell(struct padata_shell *ps);
@@ -204,6 +191,4 @@ extern void padata_do_serial(struct padata_priv *padata);
 extern void __init padata_do_multithreaded(struct padata_mt_job *job);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
                              cpumask_var_t cpumask);
-extern int padata_start(struct padata_instance *pinst);
-extern void padata_stop(struct padata_instance *pinst);
 #endif
index cf2468d..d1f4eff 100644 (file)
@@ -496,8 +496,35 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
        return pgoff;
 }
 
+/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
+struct wait_page_key {
+       struct page *page;
+       int bit_nr;
+       int page_match;
+};
+
+struct wait_page_queue {
+       struct page *page;
+       int bit_nr;
+       wait_queue_entry_t wait;
+};
+
+static inline bool wake_page_match(struct wait_page_queue *wait_page,
+                                 struct wait_page_key *key)
+{
+       if (wait_page->page != key->page)
+              return false;
+       key->page_match = 1;
+
+       if (wait_page->bit_nr != key->bit_nr)
+               return false;
+
+       return true;
+}
+
 extern void __lock_page(struct page *page);
 extern int __lock_page_killable(struct page *page);
+extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                                unsigned int flags);
 extern void unlock_page(struct page *page);
@@ -534,6 +561,22 @@ static inline int lock_page_killable(struct page *page)
        return 0;
 }
 
+/*
+ * lock_page_async - Lock the page, unless this would block. If the page
+ * is already locked, then queue a callback when the page becomes unlocked.
+ * This callback can then retry the operation.
+ *
+ * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
+ * was already locked and the callback defined in 'wait' was queued.
+ */
+static inline int lock_page_async(struct page *page,
+                                 struct wait_page_queue *wait)
+{
+       if (!trylock_page(page))
+               return __lock_page_async(page, wait);
+       return 0;
+}
+
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
index 22d9d18..87d8a38 100644 (file)
@@ -155,7 +155,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
         * between contaminating the pointer value, meaning that
         * READ_ONCE() is required when fetching it.
         *
-        * The smp_read_barrier_depends() implied by READ_ONCE() pairs
+        * The dependency ordering from the READ_ONCE() pairs
         * with smp_store_release() in __percpu_ref_switch_to_percpu().
         */
        percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
index b4bb320..0edd257 100644 (file)
@@ -366,7 +366,7 @@ struct pmu {
         * ->stop() with PERF_EF_UPDATE will read the counter and update
         *  period/count values like ->read() would.
         *
-        * ->start() with PERF_EF_RELOAD will reprogram the the counter
+        * ->start() with PERF_EF_RELOAD will reprogram the counter
         *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
         */
        void (*start)                   (struct perf_event *event, int flags);
@@ -419,10 +419,11 @@ struct pmu {
         */
        void (*sched_task)              (struct perf_event_context *ctx,
                                        bool sched_in);
+
        /*
-        * PMU specific data size
+        * Kmem cache of PMU specific data
         */
-       size_t                          task_ctx_size;
+       struct kmem_cache               *task_ctx_cache;
 
        /*
         * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
@@ -1232,6 +1233,9 @@ extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_namespaces(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
+extern void perf_event_text_poke(const void *addr,
+                                const void *old_bytes, size_t old_len,
+                                const void *new_bytes, size_t new_len);
 
 /* Callchains */
 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
@@ -1479,6 +1483,11 @@ static inline void perf_event_exec(void)                         { }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
 static inline void perf_event_namespaces(struct task_struct *tsk)      { }
 static inline void perf_event_fork(struct task_struct *tsk)            { }
+static inline void perf_event_text_poke(const void *addr,
+                                       const void *old_bytes,
+                                       size_t old_len,
+                                       const void *new_bytes,
+                                       size_t new_len)                 { }
 static inline void perf_event_init(void)                               { }
 static inline int  perf_swevent_get_recursion_context(void)            { return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)                { }
index 4b72584..b95f321 100644 (file)
@@ -153,9 +153,10 @@ struct psi_group {
        unsigned long avg[NR_PSI_STATES - 1][3];
 
        /* Monitor work control */
-       atomic_t poll_scheduled;
-       struct kthread_worker __rcu *poll_kworker;
-       struct kthread_delayed_work poll_work;
+       struct task_struct __rcu *poll_task;
+       struct timer_list poll_timer;
+       wait_queue_head_t poll_wait;
+       atomic_t poll_wakeup;
 
        /* Protects data used by the monitor */
        struct mutex trigger_lock;
index 417db0a..808f9d3 100644 (file)
@@ -107,7 +107,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
                return -ENOSPC;
 
        /* Make sure the pointer we are storing points to a valid data. */
-       /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+       /* Pairs with the dependency ordering in __ptr_ring_consume. */
        smp_wmb();
 
        WRITE_ONCE(r->queue[r->producer++], ptr);
index 45e1f8f..9ab7443 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/once.h>
+#include <asm/percpu.h>
 
 #include <uapi/linux/random.h>
 
@@ -119,6 +120,8 @@ struct rnd_state {
        __u32 s1, s2, s3, s4;
 };
 
+DECLARE_PER_CPU(struct rnd_state, net_rand_state);
+
 u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
index df587d1..7a6fc99 100644 (file)
@@ -248,6 +248,8 @@ static inline void __list_splice_init_rcu(struct list_head *list,
         */
 
        sync();
+       ASSERT_EXCLUSIVE_ACCESS(*first);
+       ASSERT_EXCLUSIVE_ACCESS(*last);
 
        /*
         * Readers are finished with the source list, so perform splice.
@@ -512,7 +514,7 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
  * @right: The hlist head on the right
  *
  * The lists start out as [@left  ][node1 ... ] and
                         [@right ][node2 ... ]
*                        [@right ][node2 ... ]
  * The lists end up as    [@left  ][node2 ... ]
  *                        [@right ][node1 ... ]
  */
index 9670b54..ff3e947 100644 (file)
@@ -162,7 +162,7 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
  * The barrier() is needed to make sure compiler doesn't cache first element [1],
  * as this loop can be restarted [2]
  * [1] Documentation/core-api/atomic_ops.rst around line 114
- * [2] Documentation/RCU/rculist_nulls.txt around line 146
+ * [2] Documentation/RCU/rculist_nulls.rst around line 146
  */
 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member)                        \
        for (({barrier();}),                                                    \
index 659cbfa..d15d46d 100644 (file)
@@ -828,17 +828,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 
 /*
  * Does the specified offset indicate that the corresponding rcu_head
- * structure can be handled by kfree_rcu()?
+ * structure can be handled by kvfree_rcu()?
  */
-#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
+#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
 
 /*
  * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
  */
-#define __kfree_rcu(head, offset) \
+#define __kvfree_rcu(head, offset) \
        do { \
-               BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
-               kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
+               BUILD_BUG_ON(!__is_kvfree_rcu_offset(offset)); \
+               kvfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
        } while (0)
 
 /**
@@ -857,7 +857,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * Because the functions are not allowed in the low-order 4096 bytes of
  * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
  * If the offset is larger than 4095 bytes, a compile-time error will
- * be generated in __kfree_rcu().  If this error is triggered, you can
+ * be generated in __kvfree_rcu(). If this error is triggered, you can
  * either fall back to use of call_rcu() or rearrange the structure to
  * position the rcu_head structure into the first 4096 bytes.
  *
@@ -872,7 +872,46 @@ do {                                                                       \
        typeof (ptr) ___p = (ptr);                                      \
                                                                        \
        if (___p)                                                       \
-               __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
+               __kvfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
+} while (0)
+
+/**
+ * kvfree_rcu() - kvfree an object after a grace period.
+ *
+ * This macro consists of one or two arguments and it is
+ * based on whether an object is head-less or not. If it
+ * has a head then a semantic stays the same as it used
+ * to be before:
+ *
+ *     kvfree_rcu(ptr, rhf);
+ *
+ * where @ptr is a pointer to kvfree(), @rhf is the name
+ * of the rcu_head structure within the type of @ptr.
+ *
+ * When it comes to head-less variant, only one argument
+ * is passed and that is just a pointer which has to be
+ * freed after a grace period. Therefore the semantic is
+ *
+ *     kvfree_rcu(ptr);
+ *
+ * where @ptr is a pointer to kvfree().
+ *
+ * Please note, head-less way of freeing is permitted to
+ * use from a context that has to follow might_sleep()
+ * annotation. Otherwise, please switch and embed the
+ * rcu_head structure within the type of @ptr.
+ */
+#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__,          \
+       kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
+
+#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
+#define kvfree_rcu_arg_2(ptr, rhf) kfree_rcu(ptr, rhf)
+#define kvfree_rcu_arg_1(ptr)                                  \
+do {                                                           \
+       typeof(ptr) ___p = (ptr);                               \
+                                                               \
+       if (___p)                                               \
+               kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \
 } while (0)
 
 /*
index 4c25a41..d9015aa 100644 (file)
@@ -36,8 +36,8 @@ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting);
 /**
  * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
  *
- * When synchronize_rcu_trace() is invoked by one task, then that task
- * is guaranteed to block until all other tasks exit their read-side
+ * When synchronize_rcu_tasks_trace() is invoked by one task, then that
+ * task is guaranteed to block until all other tasks exit their read-side
  * critical sections.  Similarly, if call_rcu_trace() is invoked on one
  * task while other tasks are within RCU read-side critical sections,
  * invocation of the corresponding RCU callback is deferred until after
index 8512cae..5cc9637 100644 (file)
@@ -34,9 +34,25 @@ static inline void synchronize_rcu_expedited(void)
        synchronize_rcu();
 }
 
-static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+/*
+ * Add one more declaration of kvfree() here. It is
+ * not so straight forward to just include <linux/mm.h>
+ * where it is defined due to getting many compile
+ * errors caused by that include.
+ */
+extern void kvfree(const void *addr);
+
+static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
-       call_rcu(head, func);
+       if (head) {
+               call_rcu(head, func);
+               return;
+       }
+
+       // kvfree_rcu(one_arg) call.
+       might_sleep();
+       synchronize_rcu();
+       kvfree((void *) func);
 }
 
 void rcu_qs(void);
index d5cc9d6..d2f4064 100644 (file)
@@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(int cpu)
 }
 
 void synchronize_rcu_expedited(void);
-void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
+void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 void rcu_barrier(void);
 bool rcu_eqs_special_set(int cpu);
index d3432ee..68dab3e 100644 (file)
@@ -84,7 +84,7 @@ struct bucket_table {
 
        struct lockdep_map      dep_map;
 
-       struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp;
+       struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
 /*
@@ -261,13 +261,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
                                 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
-struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
-                                          unsigned int hash);
-struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
-                                            unsigned int hash);
-struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
-                                                 struct bucket_table *tbl,
-                                                 unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested(
+       const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+       const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+       struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash);
 
 #define rht_dereference(p, ht) \
        rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
@@ -284,21 +283,21 @@ struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
 #define rht_entry(tpos, pos, member) \
        ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
-static inline struct rhash_lock_head *const *rht_bucket(
+static inline struct rhash_lock_head __rcu *const *rht_bucket(
        const struct bucket_table *tbl, unsigned int hash)
 {
        return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
                                     &tbl->buckets[hash];
 }
 
-static inline struct rhash_lock_head **rht_bucket_var(
+static inline struct rhash_lock_head __rcu **rht_bucket_var(
        struct bucket_table *tbl, unsigned int hash)
 {
        return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
                                     &tbl->buckets[hash];
 }
 
-static inline struct rhash_lock_head **rht_bucket_insert(
+static inline struct rhash_lock_head __rcu **rht_bucket_insert(
        struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
 {
        return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
@@ -325,7 +324,7 @@ static inline struct rhash_lock_head **rht_bucket_insert(
  */
 
 static inline void rht_lock(struct bucket_table *tbl,
-                           struct rhash_lock_head **bkt)
+                           struct rhash_lock_head __rcu **bkt)
 {
        local_bh_disable();
        bit_spin_lock(0, (unsigned long *)bkt);
@@ -333,7 +332,7 @@ static inline void rht_lock(struct bucket_table *tbl,
 }
 
 static inline void rht_lock_nested(struct bucket_table *tbl,
-                                  struct rhash_lock_head **bucket,
+                                  struct rhash_lock_head __rcu **bucket,
                                   unsigned int subclass)
 {
        local_bh_disable();
@@ -342,18 +341,18 @@ static inline void rht_lock_nested(struct bucket_table *tbl,
 }
 
 static inline void rht_unlock(struct bucket_table *tbl,
-                             struct rhash_lock_head **bkt)
+                             struct rhash_lock_head __rcu **bkt)
 {
        lock_map_release(&tbl->dep_map);
        bit_spin_unlock(0, (unsigned long *)bkt);
        local_bh_enable();
 }
 
-static inline struct rhash_head __rcu *__rht_ptr(
-       struct rhash_lock_head *const *bkt)
+static inline struct rhash_head *__rht_ptr(
+       struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
 {
-       return (struct rhash_head __rcu *)
-               ((unsigned long)*bkt & ~BIT(0) ?:
+       return (struct rhash_head *)
+               ((unsigned long)p & ~BIT(0) ?:
                 (unsigned long)RHT_NULLS_MARKER(bkt));
 }
 
@@ -365,47 +364,41 @@ static inline struct rhash_head __rcu *__rht_ptr(
  *            access is guaranteed, such as when destroying the table.
  */
 static inline struct rhash_head *rht_ptr_rcu(
-       struct rhash_lock_head *const *bkt)
+       struct rhash_lock_head __rcu *const *bkt)
 {
-       struct rhash_head __rcu *p = __rht_ptr(bkt);
-
-       return rcu_dereference(p);
+       return __rht_ptr(rcu_dereference(*bkt), bkt);
 }
 
 static inline struct rhash_head *rht_ptr(
-       struct rhash_lock_head *const *bkt,
+       struct rhash_lock_head __rcu *const *bkt,
        struct bucket_table *tbl,
        unsigned int hash)
 {
-       return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash);
+       return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
 }
 
 static inline struct rhash_head *rht_ptr_exclusive(
-       struct rhash_lock_head *const *bkt)
+       struct rhash_lock_head __rcu *const *bkt)
 {
-       return rcu_dereference_protected(__rht_ptr(bkt), 1);
+       return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt);
 }
 
-static inline void rht_assign_locked(struct rhash_lock_head **bkt,
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
                                     struct rhash_head *obj)
 {
-       struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
-
        if (rht_is_a_nulls(obj))
                obj = NULL;
-       rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
+       rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0)));
 }
 
 static inline void rht_assign_unlock(struct bucket_table *tbl,
-                                    struct rhash_lock_head **bkt,
+                                    struct rhash_lock_head __rcu **bkt,
                                     struct rhash_head *obj)
 {
-       struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
-
        if (rht_is_a_nulls(obj))
                obj = NULL;
        lock_map_release(&tbl->dep_map);
-       rcu_assign_pointer(*p, obj);
+       rcu_assign_pointer(*bkt, (void *)obj);
        preempt_enable();
        __release(bitlock);
        local_bh_enable();
@@ -593,7 +586,7 @@ static inline struct rhash_head *__rhashtable_lookup(
                .ht = ht,
                .key = key,
        };
-       struct rhash_lock_head *const *bkt;
+       struct rhash_lock_head __rcu *const *bkt;
        struct bucket_table *tbl;
        struct rhash_head *he;
        unsigned int hash;
@@ -709,7 +702,7 @@ static inline void *__rhashtable_insert_fast(
                .ht = ht,
                .key = key,
        };
-       struct rhash_lock_head **bkt;
+       struct rhash_lock_head __rcu **bkt;
        struct rhash_head __rcu **pprev;
        struct bucket_table *tbl;
        struct rhash_head *head;
@@ -995,7 +988,7 @@ static inline int __rhashtable_remove_fast_one(
        struct rhash_head *obj, const struct rhashtable_params params,
        bool rhlist)
 {
-       struct rhash_lock_head **bkt;
+       struct rhash_lock_head __rcu **bkt;
        struct rhash_head __rcu **pprev;
        struct rhash_head *he;
        unsigned int hash;
@@ -1147,7 +1140,7 @@ static inline int __rhashtable_replace_fast(
        struct rhash_head *obj_old, struct rhash_head *obj_new,
        const struct rhashtable_params params)
 {
-       struct rhash_lock_head **bkt;
+       struct rhash_lock_head __rcu **bkt;
        struct rhash_head __rcu **pprev;
        struct rhash_head *he;
        unsigned int hash;
index 7e5b2a4..25e3fde 100644 (file)
@@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 }
 
 #define RWSEM_UNLOCKED_VALUE           0L
-#define __RWSEM_INIT_COUNT(name)       .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+#define __RWSEM_COUNT_INIT(name)       .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
 
 /* Common initializer macros and functions */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __RWSEM_DEP_MAP_INIT(lockname)                        \
-       , .dep_map = {                                  \
+       .dep_map = {                                    \
                .name = #lockname,                      \
                .wait_type_inner = LD_WAIT_SLEEP,       \
-       }
+       },
 #else
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
 #ifdef CONFIG_DEBUG_RWSEMS
-# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname
+# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname,
 #else
-# define __DEBUG_RWSEM_INITIALIZER(lockname)
+# define __RWSEM_DEBUG_INIT(lockname)
 #endif
 
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
+#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
 #else
 #define __RWSEM_OPT_INIT(lockname)
 #endif
 
 #define __RWSEM_INITIALIZER(name)                              \
-       { __RWSEM_INIT_COUNT(name),                             \
+       { __RWSEM_COUNT_INIT(name),                             \
          .owner = ATOMIC_LONG_INIT(0),                         \
-         .wait_list = LIST_HEAD_INIT((name).wait_list),        \
-         .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
          __RWSEM_OPT_INIT(name)                                \
-         __DEBUG_RWSEM_INITIALIZER(name)                       \
+         .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
+         .wait_list = LIST_HEAD_INIT((name).wait_list),        \
+         __RWSEM_DEBUG_INIT(name)                              \
          __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
index 6833729..6d6683b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/hrtimer.h>
+#include <linux/irqflags.h>
 #include <linux/seccomp.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
@@ -154,24 +155,24 @@ struct task_group;
  *
  *   for (;;) {
  *     set_current_state(TASK_UNINTERRUPTIBLE);
- *     if (!need_sleep)
- *             break;
+ *     if (CONDITION)
+ *        break;
  *
  *     schedule();
  *   }
  *   __set_current_state(TASK_RUNNING);
  *
  * If the caller does not need such serialisation (because, for instance, the
- * condition test and condition change and wakeup are under the same lock) then
+ * CONDITION test and condition change and wakeup are under the same lock) then
  * use __set_current_state().
  *
  * The above is typically ordered against the wakeup, which does:
  *
- *   need_sleep = false;
+ *   CONDITION = 1;
  *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
- * where wake_up_state() executes a full memory barrier before accessing the
- * task state.
+ * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
+ * accessing p->state.
  *
  * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
@@ -374,7 +375,7 @@ struct util_est {
  * For cfs_rq, they are the aggregated values of all runnable and blocked
  * sched_entities.
  *
- * The load/runnable/util_avg doesn't direcly factor frequency scaling and CPU
+ * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
  * capacity scaling. The scaling is done through the rq_clock_pelt that is used
  * for computing those signals (see update_rq_clock_pelt())
  *
@@ -686,9 +687,15 @@ struct task_struct {
        struct sched_dl_entity          dl;
 
 #ifdef CONFIG_UCLAMP_TASK
-       /* Clamp values requested for a scheduling entity */
+       /*
+        * Clamp values requested for a scheduling entity.
+        * Must be updated with task_rq_lock() held.
+        */
        struct uclamp_se                uclamp_req[UCLAMP_CNT];
-       /* Effective clamp values used for a scheduling entity */
+       /*
+        * Effective clamp values used for a scheduling entity.
+        * Must be updated with task_rq_lock() held.
+        */
        struct uclamp_se                uclamp[UCLAMP_CNT];
 #endif
 
@@ -980,19 +987,9 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-       unsigned int                    irq_events;
+       struct irqtrace_events          irqtrace;
        unsigned int                    hardirq_threaded;
-       unsigned long                   hardirq_enable_ip;
-       unsigned long                   hardirq_disable_ip;
-       unsigned int                    hardirq_enable_event;
-       unsigned int                    hardirq_disable_event;
-       int                             hardirqs_enabled;
-       int                             hardirq_context;
        u64                             hardirq_chain_key;
-       unsigned long                   softirq_disable_ip;
-       unsigned long                   softirq_enable_ip;
-       unsigned int                    softirq_disable_event;
-       unsigned int                    softirq_enable_event;
        int                             softirqs_enabled;
        int                             softirq_context;
        int                             irq_config;
@@ -1193,8 +1190,12 @@ struct task_struct {
 #ifdef CONFIG_KASAN
        unsigned int                    kasan_depth;
 #endif
+
 #ifdef CONFIG_KCSAN
        struct kcsan_ctx                kcsan_ctx;
+#ifdef CONFIG_TRACE_IRQFLAGS
+       struct irqtrace_events          kcsan_save_irqtrace;
+#endif
 #endif
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2044,6 +2045,7 @@ const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
 const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
 
 int sched_trace_rq_cpu(struct rq *rq);
+int sched_trace_rq_nr_running(struct rq *rq);
 
 const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 
index 0fbcbac..cc9f393 100644 (file)
@@ -14,6 +14,7 @@ enum hk_flags {
        HK_FLAG_DOMAIN          = (1 << 5),
        HK_FLAG_WQ              = (1 << 6),
        HK_FLAG_MANAGED_IRQ     = (1 << 7),
+       HK_FLAG_KTHREAD         = (1 << 8),
 };
 
 #ifdef CONFIG_CPU_ISOLATION
index 4859bea..83ec54b 100644 (file)
@@ -43,6 +43,6 @@ extern unsigned long calc_load_n(unsigned long load, unsigned long exp,
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-extern void calc_global_load(unsigned long ticks);
+extern void calc_global_load(void);
 
 #endif /* _LINUX_SCHED_LOADAVG_H */
index 480a4d1..6be66f5 100644 (file)
@@ -23,7 +23,7 @@ extern struct mm_struct *mm_alloc(void);
  * will still exist later on and mmget_not_zero() has to be used before
  * accessing it.
  *
- * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * This is a preferred way to pin @mm for a longer/unbounded amount
  * of time.
  *
  * Use mmdrop() to release the reference acquired by mmgrab().
@@ -49,8 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
                __mmdrop(mm);
 }
 
-void mmdrop(struct mm_struct *mm);
-
 /*
  * This has to be called after a get_task_mm()/mmget_not_zero()
  * followed by taking the mmap_lock for writing before modifying the
@@ -234,7 +232,7 @@ static inline unsigned int memalloc_noio_save(void)
  * @flags: Flags to restore.
  *
  * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function.
- * Always make sure that that the given flags is the return value from the
+ * Always make sure that the given flags is the return value from the
  * pairing memalloc_noio_save call.
  */
 static inline void memalloc_noio_restore(unsigned int flags)
@@ -265,7 +263,7 @@ static inline unsigned int memalloc_nofs_save(void)
  * @flags: Flags to restore.
  *
  * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function.
- * Always make sure that that the given flags is the return value from the
+ * Always make sure that the given flags is the return value from the
  * pairing memalloc_nofs_save call.
  */
 static inline void memalloc_nofs_restore(unsigned int flags)
index 660ac49..3c31ba8 100644 (file)
@@ -61,9 +61,13 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
+extern unsigned int sysctl_sched_dl_period_max;
+extern unsigned int sysctl_sched_dl_period_min;
+
 #ifdef CONFIG_UCLAMP_TASK
 extern unsigned int sysctl_sched_uclamp_util_min;
 extern unsigned int sysctl_sched_uclamp_util_max;
+extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
 #endif
 
 #ifdef CONFIG_CFS_BANDWIDTH
index 3835907..27b4fa4 100644 (file)
@@ -55,6 +55,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern void sched_post_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 void __noreturn do_task_dead(void);
@@ -126,6 +127,12 @@ static inline void put_task_struct(struct task_struct *t)
                __put_task_struct(t);
 }
 
+static inline void put_task_struct_many(struct task_struct *t, int nr)
+{
+       if (refcount_sub_and_test(nr, &t->usage))
+               __put_task_struct(t);
+}
+
 void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
index fb11091..8205112 100644 (file)
@@ -217,6 +217,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
 #endif /* !CONFIG_SMP */
 
 #ifndef arch_scale_cpu_capacity
+/**
+ * arch_scale_cpu_capacity - get the capacity scale factor of a given CPU.
+ * @cpu: the CPU in question.
+ *
+ * Return: the CPU scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
+ *
+ *             max_perf(cpu)
+ *      ----------------------------- * SCHED_CAPACITY_SCALE
+ *      max(max_perf(c) : c \in CPUs)
+ */
 static __always_inline
 unsigned long arch_scale_cpu_capacity(int cpu)
 {
@@ -232,6 +242,13 @@ unsigned long arch_scale_thermal_pressure(int cpu)
 }
 #endif
 
+#ifndef arch_set_thermal_pressure
+static __always_inline
+void arch_set_thermal_pressure(const struct cpumask *cpus,
+                              unsigned long th_pressure)
+{ }
+#endif
+
 static inline int task_node(const struct task_struct *p)
 {
        return cpu_to_node(task_cpu(p));
index 0bb04a9..528718e 100644 (file)
@@ -6,6 +6,34 @@
 #define LINUX_SCHED_CLOCK
 
 #ifdef CONFIG_GENERIC_SCHED_CLOCK
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:          sched_clock() value at last update
+ * @epoch_cyc:         Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *                     clocks.
+ * @read_sched_clock:  Current clock source (or dummy source when suspended).
+ * @mult:              Multipler for scaled math conversion.
+ * @shift:             Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
+       u64 epoch_ns;
+       u64 epoch_cyc;
+       u64 sched_clock_mask;
+       u64 (*read_sched_clock)(void);
+       u32 mult;
+       u32 shift;
+};
+
+extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq);
+extern int sched_clock_read_retry(unsigned int seq);
+
 extern void generic_sched_clock_init(void);
 
 extern void sched_clock_register(u64 (*read)(void), int bits,
index 8b97204..54bc204 100644 (file)
@@ -1,36 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_SEQLOCK_H
 #define __LINUX_SEQLOCK_H
+
 /*
- * Reader/writer consistent mechanism without starving writers. This type of
- * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes. There are two types
- * of readers:
- * 1. Sequence readers which never block a writer but they may have to retry
- *    if a writer is in progress by detecting change in sequence number.
- *    Writers do not wait for a sequence reader.
- * 2. Locking readers which will wait if a writer or another locking reader
- *    is in progress. A locking reader in progress will also block a writer
- *    from going forward. Unlike the regular rwlock, the read lock here is
- *    exclusive so that only one locking reader can get it.
- *
- * This is not as cache friendly as brlock. Also, this may not work well
- * for data that contains pointers, because any writer could
- * invalidate a pointer that a reader was following.
- *
- * Expected non-blocking reader usage:
- *     do {
- *         seq = read_seqbegin(&foo);
- *     ...
- *      } while (read_seqretry(&foo, seq));
- *
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
+ * lockless readers (read-only retry loops), and no writer starvation.
  *
- * On non-SMP the spin locks disappear but the writer still needs
- * to increment the sequence variables because an interrupt routine could
- * change the state of the data.
+ * See Documentation/locking/seqlock.rst
  *
- * Based on x86_64 vsyscall gettimeofday 
- * by Keith Owens and Andrea Arcangeli
+ * Copyrights:
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
  */
 
 #include <linux/spinlock.h>
@@ -41,8 +20,8 @@
 #include <asm/processor.h>
 
 /*
- * The seqlock interface does not prescribe a precise sequence of read
- * begin/retry/end. For readers, typically there is a call to
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
+ * read begin/retry/end. For readers, typically there is a call to
  * read_seqcount_begin() and read_seqcount_retry(), however, there are more
  * esoteric cases which do not follow this pattern.
  *
  * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
  * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
  * atomics; if there is a matching read_seqcount_retry() call, no following
- * memory operations are considered atomic. Usage of seqlocks via seqlock_t
- * interface is not affected.
+ * memory operations are considered atomic. Usage of the seqlock_t interface
+ * is not affected.
  */
 #define KCSAN_SEQLOCK_REGION_MAX 1000
 
 /*
- * Version using sequence counter only.
- * This can be used when code has its own mutex protecting the
- * updating starting before the write_seqcountbeqin() and ending
- * after the write_seqcount_end().
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
  */
 typedef struct seqcount {
        unsigned sequence;
@@ -82,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name,
 # define SEQCOUNT_DEP_MAP_INIT(lockname) \
                .dep_map = { .name = #lockname } \
 
+/**
+ * seqcount_init() - runtime initializer for seqcount_t
+ * @s: Pointer to the seqcount_t instance
+ */
 # define seqcount_init(s)                              \
        do {                                            \
                static struct lock_class_key __key;     \
@@ -105,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 # define seqcount_lockdep_reader_access(x)
 #endif
 
-#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
-
+/**
+ * SEQCNT_ZERO() - static initializer for seqcount_t
+ * @name: Name of the seqcount_t instance
+ */
+#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 
 /**
- * __read_seqcount_begin - begin a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -120,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * Use carefully, only in critical code, and comment how the barrier is
  * provided.
+ *
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned __read_seqcount_begin(const seqcount_t *s)
 {
@@ -136,30 +137,10 @@ repeat:
 }
 
 /**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
+ * @s: Pointer to seqcount_t
  *
- * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
- */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
-{
-       unsigned ret = READ_ONCE(s->sequence);
-       smp_rmb();
-       kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-       return ret;
-}
-
-/**
- * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
- *
- * raw_read_seqcount_begin opens a read critical section of the given
- * seqcount, but without any lockdep checking. Validity of the critical
- * section is tested by checking read_seqcount_retry function.
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
 {
@@ -169,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
 }
 
 /**
- * read_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * read_seqcount_begin() - begin a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
  *
- * read_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned read_seqcount_begin(const seqcount_t *s)
 {
@@ -184,32 +162,54 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
 }
 
 /**
- * raw_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount() - read the raw seqcount_t counter value
+ * @s: Pointer to seqcount_t
  *
- * raw_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * raw_read_seqcount opens a read critical section of the given
+ * seqcount_t, without any lockdep checking, and without checking or
+ * masking the sequence counter LSB. Calling code is responsible for
+ * handling that.
  *
- * Unlike read_seqcount_begin(), this function will not wait for the count
- * to stabilize. If a writer is active when we begin, we will fail the
- * read_seqcount_retry() instead of stabilizing at the beginning of the
- * critical section.
+ * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+static inline unsigned raw_read_seqcount(const seqcount_t *s)
 {
        unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
        kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-       return ret & ~1;
+       return ret;
 }
 
 /**
- * __read_seqcount_retry - end a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
+ *                        lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t
+ *
+ * raw_seqcount_begin opens a read critical section of the given
+ * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
+ * for the count to stabilize. If a writer is active when it begins, it
+ * will fail the read_seqcount_retry() at the end of the read critical
+ * section instead of stabilizing at the beginning of it.
+ *
+ * Use this only in special kernel hot paths where the read section is
+ * small and has a high probability of success through other external
+ * means. It will save a single branching instruction.
+ *
+ * Return: count to be passed to read_seqcount_retry()
+ */
+static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+{
+       /*
+        * If the counter is odd, let read_seqcount_retry() fail
+        * by decrementing the counter.
+        */
+       return raw_read_seqcount(s) & ~1;
+}
+
+/**
+ * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -218,6 +218,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
  *
  * Use carefully, only in critical code, and comment how the barrier is
  * provided.
+ *
+ * Return: true if a read section retry is required, else false
  */
 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
@@ -226,14 +228,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 }
 
 /**
- * read_seqcount_retry - end a seq-read critical section
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * read_seqcount_retry() - end a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
  *
- * read_seqcount_retry closes a read critical section of the given seqcount.
- * If the critical section was invalid, it must be ignored (and typically
- * retried).
+ * read_seqcount_retry closes the read critical section of given
+ * seqcount_t.  If the critical section was invalid, it must be ignored
+ * (and typically retried).
+ *
+ * Return: true if a read section retry is required, else false
  */
 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
@@ -241,8 +244,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
        return __read_seqcount_retry(s, start);
 }
 
-
-
+/**
+ * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
 static inline void raw_write_seqcount_begin(seqcount_t *s)
 {
        kcsan_nestable_atomic_begin();
@@ -250,6 +255,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s)
        smp_wmb();
 }
 
+/**
+ * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
 static inline void raw_write_seqcount_end(seqcount_t *s)
 {
        smp_wmb();
@@ -257,45 +266,104 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
        kcsan_nestable_atomic_end();
 }
 
+static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+       raw_write_seqcount_begin(s);
+       seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
 /**
- * raw_write_seqcount_barrier - do a seq write barrier
- * @s: pointer to seqcount_t
+ * write_seqcount_begin_nested() - start a seqcount_t write section with
+ *                                 custom lockdep nesting level
+ * @s: Pointer to seqcount_t
+ * @subclass: lockdep nesting level
  *
- * This can be used to provide an ordering guarantee instead of the
- * usual consistency guarantee. It is one wmb cheaper, because we can
- * collapse the two back-to-back wmb()s.
+ * See Documentation/locking/lockdep-design.rst
+ */
+static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+       lockdep_assert_preemption_disabled();
+       __write_seqcount_begin_nested(s, subclass);
+}
+
+/*
+ * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
+ *
+ * Use for internal seqlock.h code where it's known that preemption is
+ * already disabled. For example, seqlock_t write side functions.
+ */
+static inline void __write_seqcount_begin(seqcount_t *s)
+{
+       __write_seqcount_begin_nested(s, 0);
+}
+
+/**
+ * write_seqcount_begin() - start a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * write_seqcount_begin opens a write side critical section of the given
+ * seqcount_t.
+ *
+ * Context: seqcount_t write side critical sections must be serialized and
+ * non-preemptible. If readers can be invoked from hardirq or softirq
+ * context, interrupts or bottom halves must be respectively disabled.
+ */
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+       write_seqcount_begin_nested(s, 0);
+}
+
+/**
+ * write_seqcount_end() - end a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * The write section must've been opened with write_seqcount_begin().
+ */
+static inline void write_seqcount_end(seqcount_t *s)
+{
+       seqcount_release(&s->dep_map, _RET_IP_);
+       raw_write_seqcount_end(s);
+}
+
+/**
+ * raw_write_seqcount_barrier() - do a seqcount_t write barrier
+ * @s: Pointer to seqcount_t
+ *
+ * This can be used to provide an ordering guarantee instead of the usual
+ * consistency guarantee. It is one wmb cheaper, because it can collapse
+ * the two back-to-back wmb()s.
  *
  * Note that writes surrounding the barrier should be declared atomic (e.g.
  * via WRITE_ONCE): a) to ensure the writes become visible to other threads
  * atomically, avoiding compiler optimizations; b) to document which writes are
  * meant to propagate to the reader critical section. This is necessary because
  * neither writes before and after the barrier are enclosed in a seq-writer
- * critical section that would ensure readers are aware of ongoing writes.
+ * critical section that would ensure readers are aware of ongoing writes::
  *
- *      seqcount_t seq;
- *      bool X = true, Y = false;
+ *     seqcount_t seq;
+ *     bool X = true, Y = false;
  *
- *      void read(void)
- *      {
- *              bool x, y;
+ *     void read(void)
+ *     {
+ *             bool x, y;
  *
- *              do {
- *                      int s = read_seqcount_begin(&seq);
+ *             do {
+ *                     int s = read_seqcount_begin(&seq);
  *
- *                      x = X; y = Y;
+ *                     x = X; y = Y;
  *
- *              } while (read_seqcount_retry(&seq, s));
+ *             } while (read_seqcount_retry(&seq, s));
  *
- *              BUG_ON(!x && !y);
+ *             BUG_ON(!x && !y);
  *      }
  *
  *      void write(void)
  *      {
- *              WRITE_ONCE(Y, true);
+ *             WRITE_ONCE(Y, true);
  *
- *              raw_write_seqcount_barrier(seq);
+ *             raw_write_seqcount_barrier(seq);
  *
- *              WRITE_ONCE(X, false);
+ *             WRITE_ONCE(X, false);
  *      }
  */
 static inline void raw_write_seqcount_barrier(seqcount_t *s)
@@ -307,6 +375,37 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
        kcsan_nestable_atomic_end();
 }
 
+/**
+ * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
+ *                               side operations
+ * @s: Pointer to seqcount_t
+ *
+ * After write_seqcount_invalidate, no seqcount_t read side operations
+ * will complete successfully and see data older than this.
+ */
+static inline void write_seqcount_invalidate(seqcount_t *s)
+{
+       smp_wmb();
+       kcsan_nestable_atomic_begin();
+       s->sequence+=2;
+       kcsan_nestable_atomic_end();
+}
+
+/**
+ * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
+ * @s: Pointer to seqcount_t
+ *
+ * Use seqcount_t latching to switch between two storage places protected
+ * by a sequence counter. Doing so allows having interruptible, preemptible,
+ * seqcount_t write side critical sections.
+ *
+ * Check raw_write_seqcount_latch() for more details and a full reader and
+ * writer usage example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter value must then be
+ * checked with read_seqcount_retry().
+ */
 static inline int raw_read_seqcount_latch(seqcount_t *s)
 {
        /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
@@ -315,8 +414,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
 }
 
 /**
- * raw_write_seqcount_latch - redirect readers to even/odd copy
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_latch() - redirect readers to even/odd copy
+ * @s: Pointer to seqcount_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -332,64 +431,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  * Very simply put: we first modify one copy and then the other. This ensures
  * there is always one copy in a stable state, ready to give us an answer.
  *
- * The basic form is a data structure like:
+ * The basic form is a data structure like::
  *
- * struct latch_struct {
- *     seqcount_t              seq;
- *     struct data_struct      data[2];
- * };
+ *     struct latch_struct {
+ *             seqcount_t              seq;
+ *             struct data_struct      data[2];
+ *     };
  *
  * Where a modification, which is assumed to be externally serialized, does the
- * following:
+ * following::
  *
- * void latch_modify(struct latch_struct *latch, ...)
- * {
- *     smp_wmb();      <- Ensure that the last data[1] update is visible
- *     latch->seq++;
- *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *     void latch_modify(struct latch_struct *latch, ...)
+ *     {
+ *             smp_wmb();      // Ensure that the last data[1] update is visible
+ *             latch->seq++;
+ *             smp_wmb();      // Ensure that the seqcount update is visible
  *
- *     modify(latch->data[0], ...);
+ *             modify(latch->data[0], ...);
  *
- *     smp_wmb();      <- Ensure that the data[0] update is visible
- *     latch->seq++;
- *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *             smp_wmb();      // Ensure that the data[0] update is visible
+ *             latch->seq++;
+ *             smp_wmb();      // Ensure that the seqcount update is visible
  *
- *     modify(latch->data[1], ...);
- * }
+ *             modify(latch->data[1], ...);
+ *     }
  *
- * The query will have a form like:
+ * The query will have a form like::
  *
- * struct entry *latch_query(struct latch_struct *latch, ...)
- * {
- *     struct entry *entry;
- *     unsigned seq, idx;
+ *     struct entry *latch_query(struct latch_struct *latch, ...)
+ *     {
+ *             struct entry *entry;
+ *             unsigned seq, idx;
  *
- *     do {
- *             seq = raw_read_seqcount_latch(&latch->seq);
+ *             do {
+ *                     seq = raw_read_seqcount_latch(&latch->seq);
  *
- *             idx = seq & 0x01;
- *             entry = data_query(latch->data[idx], ...);
+ *                     idx = seq & 0x01;
+ *                     entry = data_query(latch->data[idx], ...);
  *
- *             smp_rmb();
- *     } while (seq != latch->seq);
+ *             // read_seqcount_retry() includes needed smp_rmb()
+ *             } while (read_seqcount_retry(&latch->seq, seq));
  *
- *     return entry;
- * }
+ *             return entry;
+ *     }
  *
  * So during the modification, queries are first redirected to data[1]. Then we
  * modify data[0]. When that is complete, we redirect queries back to data[0]
  * and we can modify data[1].
  *
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
- *       the publishing of new entries in the case where data is a dynamic
- *       data structure.
+ * NOTE:
  *
- *       An iteration might start in data[0] and get suspended long enough
- *       to miss an entire modification sequence, once it resumes it might
- *       observe the new entry.
+ *     The non-requirement for atomic modifications does _NOT_ include
+ *     the publishing of new entries in the case where data is a dynamic
+ *     data structure.
  *
- * NOTE: When data is a dynamic data structure; one should use regular RCU
- *       patterns to manage the lifetimes of the objects within.
+ *     An iteration might start in data[0] and get suspended long enough
+ *     to miss an entire modification sequence, once it resumes it might
+ *     observe the new entry.
+ *
+ * NOTE:
+ *
+ *     When data is a dynamic data structure; one should use regular RCU
+ *     patterns to manage the lifetimes of the objects within.
  */
 static inline void raw_write_seqcount_latch(seqcount_t *s)
 {
@@ -399,67 +502,48 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
 }
 
 /*
- * Sequence counter only version assumes that callers are using their
- * own mutexing.
- */
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-       raw_write_seqcount_begin(s);
-       seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
-static inline void write_seqcount_begin(seqcount_t *s)
-{
-       write_seqcount_begin_nested(s, 0);
-}
-
-static inline void write_seqcount_end(seqcount_t *s)
-{
-       seqcount_release(&s->dep_map, _RET_IP_);
-       raw_write_seqcount_end(s);
-}
-
-/**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
+ * Sequential locks (seqlock_t)
  *
- * After write_seqcount_invalidate, no read-side seq operations will complete
- * successfully and see data older than this.
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ *    - Comments on top of seqcount_t
+ *    - Documentation/locking/seqlock.rst
  */
-static inline void write_seqcount_invalidate(seqcount_t *s)
-{
-       smp_wmb();
-       kcsan_nestable_atomic_begin();
-       s->sequence+=2;
-       kcsan_nestable_atomic_end();
-}
-
 typedef struct {
        struct seqcount seqcount;
        spinlock_t lock;
 } seqlock_t;
 
-/*
- * These macros triggered gcc-3.x compile-time problems.  We think these are
- * OK now.  Be cautious.
- */
 #define __SEQLOCK_UNLOCKED(lockname)                   \
        {                                               \
                .seqcount = SEQCNT_ZERO(lockname),      \
                .lock = __SPIN_LOCK_UNLOCKED(lockname)  \
        }
 
-#define seqlock_init(x)                                        \
+/**
+ * seqlock_init() - dynamic initializer for seqlock_t
+ * @sl: Pointer to the seqlock_t instance
+ */
+#define seqlock_init(sl)                               \
        do {                                            \
-               seqcount_init(&(x)->seqcount);          \
-               spin_lock_init(&(x)->lock);             \
+               seqcount_init(&(sl)->seqcount);         \
+               spin_lock_init(&(sl)->lock);            \
        } while (0)
 
-#define DEFINE_SEQLOCK(x) \
-               seqlock_t x = __SEQLOCK_UNLOCKED(x)
+/**
+ * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t
+ * @sl: Name of the seqlock_t instance
+ */
+#define DEFINE_SEQLOCK(sl) \
+               seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
 
-/*
- * Read side functions for starting and finalizing a read side section.
+/**
+ * read_seqbegin() - start a seqlock_t read side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * Return: count, to be passed to read_seqretry()
  */
 static inline unsigned read_seqbegin(const seqlock_t *sl)
 {
@@ -470,6 +554,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl)
        return ret;
 }
 
+/**
+ * read_seqretry() - end a seqlock_t read side section
+ * @sl: Pointer to seqlock_t
+ * @start: count, from read_seqbegin()
+ *
+ * read_seqretry closes the read side critical section of given seqlock_t.
+ * If the critical section was invalid, it must be ignored (and typically
+ * retried).
+ *
+ * Return: true if a read section retry is required, else false
+ */
 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 {
        /*
@@ -481,41 +576,85 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
        return read_seqcount_retry(&sl->seqcount, start);
 }
 
-/*
- * Lock out other writers and update the count.
- * Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * write_seqlock() - start a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_seqlock opens a write side critical section for the given
+ * seqlock_t.  It also implicitly acquires the spinlock_t embedded inside
+ * that sequential lock. All seqlock_t write side sections are thus
+ * automatically serialized and non-preemptible.
+ *
+ * Context: if the seqlock_t read section, or other write side critical
+ * sections, can be invoked from hardirq or softirq contexts, use the
+ * _irqsave or _bh variants of this function instead.
  */
 static inline void write_seqlock(seqlock_t *sl)
 {
        spin_lock(&sl->lock);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock() - end a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock closes the (serialized and non-preemptible) write side
+ * critical section of given seqlock_t.
+ */
 static inline void write_sequnlock(seqlock_t *sl)
 {
        write_seqcount_end(&sl->seqcount);
        spin_unlock(&sl->lock);
 }
 
+/**
+ * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _bh variant of write_seqlock(). Use only if the read side section, or
+ * other write side sections, can be invoked from softirq contexts.
+ */
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
        spin_lock_bh(&sl->lock);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_bh closes the serialized, non-preemptible, and
+ * softirqs-disabled, seqlock_t write side critical section opened with
+ * write_seqlock_bh().
+ */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
        write_seqcount_end(&sl->seqcount);
        spin_unlock_bh(&sl->lock);
 }
 
+/**
+ * write_seqlock_irq() - start a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of write_seqlock(). Use only if the read side section, or
+ * other write sections, can be invoked from hardirq contexts.
+ */
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
        spin_lock_irq(&sl->lock);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_irq closes the serialized and non-interruptible
+ * seqlock_t write side section opened with write_seqlock_irq().
+ */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
        write_seqcount_end(&sl->seqcount);
@@ -527,13 +666,32 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
        unsigned long flags;
 
        spin_lock_irqsave(&sl->lock, flags);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
        return flags;
 }
 
+/**
+ * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
+ *                           section
+ * @lock:  Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ *         state, to be passed to write_sequnlock_irqrestore().
+ *
+ * _irqsave variant of write_seqlock(). Use it only if the read side
+ * section, or other write sections, can be invoked from hardirq context.
+ */
 #define write_seqlock_irqsave(lock, flags)                             \
        do { flags = __write_seqlock_irqsave(lock); } while (0)
 
+/**
+ * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
+ *                                section
+ * @sl:    Pointer to seqlock_t
+ * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
+ *
+ * write_sequnlock_irqrestore closes the serialized and non-interruptible
+ * seqlock_t write section previously opened with write_seqlock_irqsave().
+ */
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
@@ -541,65 +699,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
        spin_unlock_irqrestore(&sl->lock, flags);
 }
 
-/*
- * A locking reader exclusively locks out other writers and locking readers,
- * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * read_seqlock_excl() - begin a seqlock_t locking reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
+ * locking reader exclusively locks out *both* other writers *and* other
+ * locking readers, but it does not update the embedded sequence number.
+ *
+ * Locking readers act like a normal spin_lock()/spin_unlock().
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * The opened read section must be closed with read_sequnlock_excl().
  */
 static inline void read_seqlock_excl(seqlock_t *sl)
 {
        spin_lock(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl() - end a seqlock_t locking reader critical section
+ * @sl: Pointer to seqlock_t
+ */
 static inline void read_sequnlock_excl(seqlock_t *sl)
 {
        spin_unlock(&sl->lock);
 }
 
 /**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
+ * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
+ *                         softirqs disabled
+ * @sl: Pointer to seqlock_t
  *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
+ * _bh variant of read_seqlock_excl(). Use this variant only if the
+ * seqlock_t write side section, *or other read sections*, can be invoked
+ * from softirq contexts.
  */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-       if (!(*seq & 1))        /* Even */
-               *seq = read_seqbegin(lock);
-       else                    /* Odd */
-               read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-       return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-       if (seq & 1)
-               read_sequnlock_excl(lock);
-}
-
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
        spin_lock_bh(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
+ *                           reader section
+ * @sl: Pointer to seqlock_t
+ */
 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
 {
        spin_unlock_bh(&sl->lock);
 }
 
+/**
+ * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
+ *                          reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
 static inline void read_seqlock_excl_irq(seqlock_t *sl)
 {
        spin_lock_irq(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
+ *                             locking reader section
+ * @sl: Pointer to seqlock_t
+ */
 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
 {
        spin_unlock_irq(&sl->lock);
@@ -613,15 +785,117 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
        return flags;
 }
 
+/**
+ * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
+ *                              locking reader section
+ * @lock:  Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ *         state, to be passed to read_sequnlock_excl_irqrestore().
+ *
+ * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
 #define read_seqlock_excl_irqsave(lock, flags)                         \
        do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
 
+/**
+ * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
+ *                                   locking reader section
+ * @sl:    Pointer to seqlock_t
+ * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
+ */
 static inline void
 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 {
        spin_unlock_irqrestore(&sl->lock, flags);
 }
 
+/**
+ * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq : Marker and return parameter. If the passed value is even, the
+ * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
+ * If the passed value is odd, the reader will become a *locking* reader
+ * as in read_seqlock_excl().  In the first call to this function, the
+ * caller *must* initialize and pass an even value to @seq; this way, a
+ * lockless read can be optimistically tried first.
+ *
+ * read_seqbegin_or_lock is an API designed to optimistically try a normal
+ * lockless seqlock_t read section first.  If an odd counter is found, the
+ * lockless read trial has failed, and the next read iteration transforms
+ * itself into a full seqlock_t locking reader.
+ *
+ * This is typically used to avoid seqlock_t lockless readers starvation
+ * (too much retry loops) in the case of a sharp spike in write side
+ * activity.
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * Check Documentation/locking/seqlock.rst for template example code.
+ *
+ * Return: the encountered sequence counter value, through the @seq
+ * parameter, which is overloaded as a return parameter. This returned
+ * value must be checked with need_seqretry(). If the read section need to
+ * be retried, this returned value must also be passed as the @seq
+ * parameter of the next read_seqbegin_or_lock() iteration.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+       if (!(*seq & 1))        /* Even */
+               *seq = read_seqbegin(lock);
+       else                    /* Odd */
+               read_seqlock_excl(lock);
+}
+
+/**
+ * need_seqretry() - validate seqlock_t "locking or lockless" read section
+ * @lock: Pointer to seqlock_t
+ * @seq: sequence count, from read_seqbegin_or_lock()
+ *
+ * Return: true if a read section retry is required, false otherwise
+ */
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+       return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+/**
+ * done_seqretry() - end seqlock_t "locking or lockless" reader section
+ * @lock: Pointer to seqlock_t
+ * @seq: count, from read_seqbegin_or_lock()
+ *
+ * done_seqretry finishes the seqlock_t read side critical section started
+ * with read_seqbegin_or_lock() and validated by need_seqretry().
+ */
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+       if (seq & 1)
+               read_sequnlock_excl(lock);
+}
+
+/**
+ * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
+ *                                   a non-interruptible locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq:  Marker and return parameter. Check read_seqbegin_or_lock().
+ *
+ * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
+ * the seqlock_t write section, *or other read sections*, can be invoked
+ * from hardirq context.
+ *
+ * Note: Interrupts will be disabled only for "locking reader" mode.
+ *
+ * Return:
+ *
+ *   1. The saved local interrupts state in case of a locking reader, to
+ *      be passed to done_seqretry_irqrestore().
+ *
+ *   2. The encountered sequence counter value, returned through @seq
+ *      overloaded as a return parameter. Check read_seqbegin_or_lock().
+ */
 static inline unsigned long
 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 {
@@ -635,6 +909,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
        return flags;
 }
 
+/**
+ * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
+ *                             non-interruptible locking reader section
+ * @lock:  Pointer to seqlock_t
+ * @seq:   Count, from read_seqbegin_or_lock_irqsave()
+ * @flags: Caller's saved local interrupt state in case of a locking
+ *        reader, also from read_seqbegin_or_lock_irqsave()
+ *
+ * This is the _irqrestore variant of done_seqretry(). The read section
+ * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
+ * by need_seqretry().
+ */
 static inline void
 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
 {
index d3770b3..f2f12d7 100644 (file)
@@ -56,6 +56,7 @@
 #include <linux/kernel.h>
 #include <linux/stringify.h>
 #include <linux/bottom_half.h>
+#include <linux/lockdep.h>
 #include <asm/barrier.h>
 #include <asm/mmiowb.h>
 
index 6102e6b..b981caa 100644 (file)
@@ -15,7 +15,7 @@
 # include <linux/spinlock_types_up.h>
 #endif
 
-#include <linux/lockdep.h>
+#include <linux/lockdep_types.h>
 
 typedef struct raw_spinlock {
        arch_spinlock_t raw_lock;
index 629b66e..7f65bd1 100644 (file)
@@ -55,6 +55,11 @@ struct torture_random_state {
 #define DEFINE_TORTURE_RANDOM_PERCPU(name) \
        DEFINE_PER_CPU(struct torture_random_state, name)
 unsigned long torture_random(struct torture_random_state *trsp);
+static inline void torture_random_init(struct torture_random_state *trsp)
+{
+       trsp->trs_state = 0;
+       trsp->trs_count = 0;
+}
 
 /* Task shuffler, which causes CPUs to occasionally go idle. */
 void torture_shuffle_task_register(struct task_struct *tp);
index 03e9b18..8f4ff39 100644 (file)
@@ -96,6 +96,7 @@ struct tpm_space {
        u8 *context_buf;
        u32 session_tbl[3];
        u8 *session_buf;
+       u32 buf_size;
 };
 
 struct tpm_bios_log {
index 64356b1..739ba9a 100644 (file)
@@ -211,9 +211,16 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
 
        efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
 
-       /* Check if event is malformed. */
+       /*
+        * Perform validation of the event in order to identify malformed
+        * events. This function may be asked to parse arbitrary byte sequences
+        * immediately following a valid event log. The caller expects this
+        * function to recognize that the byte sequence is not a valid event
+        * and to return an event size of 0.
+        */
        if (memcmp(efispecid->signature, TCG_SPECID_SIG,
-                  sizeof(TCG_SPECID_SIG)) || count > efispecid->num_algs) {
+                  sizeof(TCG_SPECID_SIG)) ||
+           !efispecid->num_algs || count != efispecid->num_algs) {
                size = 0;
                goto out;
        }
index d3021c8..a147977 100644 (file)
@@ -167,6 +167,8 @@ typedef struct {
        int counter;
 } atomic_t;
 
+#define ATOMIC_INIT(i) { (i) }
+
 #ifdef CONFIG_64BIT
 typedef struct {
        s64 counter;
index fdb0710..8418b7d 100644 (file)
@@ -274,6 +274,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
 int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
+void __ipv6_sock_ac_close(struct sock *sk);
 void ipv6_sock_ac_close(struct sock *sk);
 
 int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
index 1df6dfe..95b0322 100644 (file)
@@ -718,6 +718,7 @@ enum devlink_trap_group_generic_id {
        DEVLINK_TRAP_GROUP_GENERIC_ID_PIM,
        DEVLINK_TRAP_GROUP_GENERIC_ID_UC_LB,
        DEVLINK_TRAP_GROUP_GENERIC_ID_LOCAL_DELIVERY,
+       DEVLINK_TRAP_GROUP_GENERIC_ID_EXTERNAL_DELIVERY,
        DEVLINK_TRAP_GROUP_GENERIC_ID_IPV6,
        DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_EVENT,
        DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL,
@@ -915,6 +916,8 @@ enum devlink_trap_group_generic_id {
        "uc_loopback"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_LOCAL_DELIVERY \
        "local_delivery"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EXTERNAL_DELIVERY \
+       "external_delivery"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_IPV6 \
        "ipv6"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_EVENT \
index c7d213c..51f65d2 100644 (file)
@@ -941,7 +941,7 @@ struct xfrm_dst {
 static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
 {
 #ifdef CONFIG_XFRM
-       if (dst->xfrm) {
+       if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
                const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
 
                return xdst->path;
@@ -953,7 +953,7 @@ static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
 static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
 {
 #ifdef CONFIG_XFRM
-       if (dst->xfrm) {
+       if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
                struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
                return xdst->child;
        }
@@ -1630,13 +1630,16 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
                     void *);
 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
-                                         u8 type, int dir,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net,
+                                         const struct xfrm_mark *mark,
+                                         u32 if_id, u8 type, int dir,
                                          struct xfrm_selector *sel,
                                          struct xfrm_sec_ctx *ctx, int delete,
                                          int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
-                                    int dir, u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net,
+                                    const struct xfrm_mark *mark, u32 if_id,
+                                    u8 type, int dir, u32 id, int delete,
+                                    int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
index c4369a6..2f1fc23 100644 (file)
@@ -305,6 +305,25 @@ struct rvt_rq {
        spinlock_t lock ____cacheline_aligned_in_smp;
 };
 
+/**
+ * rvt_get_rq_count - count numbers of request work queue entries
+ * in circular buffer
+ * @rq: data structure for request queue entry
+ * @head: head indices of the circular buffer
+ * @tail: tail indices of the circular buffer
+ *
+ * Return - total number of entries in the Receive Queue
+ */
+
+static inline u32 rvt_get_rq_count(struct rvt_rq *rq, u32 head, u32 tail)
+{
+       u32 count = head - tail;
+
+       if ((s32)count < 0)
+               count += rq->size;
+       return count;
+}
+
 /*
  * This structure holds the information that the send tasklet needs
  * to send a RDMA read response or atomic operation.
index 93b1142..34d64ca 100644 (file)
@@ -211,6 +211,21 @@ DEFINE_EVENT(block_rq, block_rq_issue,
        TP_ARGS(q, rq)
 );
 
+/**
+ * block_rq_merge - merge request with another one in the elevator
+ * @q: queue holding operation
+ * @rq: block IO operation operation request
+ *
+ * Called when block operation request @rq from queue @q is merged to another
+ * request queued in the elevator.
+ */
+DEFINE_EVENT(block_rq, block_rq_merge,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+);
+
 /**
  * block_bio_bounce - used bounce buffer when processing block operation
  * @q: queue holding the block operation
index 360b0f9..863335e 100644 (file)
@@ -31,13 +31,6 @@ struct extent_io_tree;
 struct prelim_ref;
 struct btrfs_space_info;
 
-TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
-TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
-TRACE_DEFINE_ENUM(FLUSH_DELALLOC);
-TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT);
-TRACE_DEFINE_ENUM(ALLOC_CHUNK);
-TRACE_DEFINE_ENUM(COMMIT_TRANS);
-
 #define show_ref_type(type)                                            \
        __print_symbolic(type,                                          \
                { BTRFS_TREE_BLOCK_REF_KEY,     "TREE_BLOCK_REF" },     \
@@ -67,30 +60,72 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
              (obj >= BTRFS_ROOT_TREE_OBJECTID &&                       \
               obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
 
-#define show_fi_type(type)                                             \
-       __print_symbolic(type,                                          \
-                { BTRFS_FILE_EXTENT_INLINE,    "INLINE" },             \
-                { BTRFS_FILE_EXTENT_REG,       "REG"    },             \
-                { BTRFS_FILE_EXTENT_PREALLOC,  "PREALLOC"})
+#define FLUSH_ACTIONS                                                          \
+       EM( BTRFS_RESERVE_NO_FLUSH,             "BTRFS_RESERVE_NO_FLUSH")       \
+       EM( BTRFS_RESERVE_FLUSH_LIMIT,          "BTRFS_RESERVE_FLUSH_LIMIT")    \
+       EM( BTRFS_RESERVE_FLUSH_ALL,            "BTRFS_RESERVE_FLUSH_ALL")      \
+       EMe(BTRFS_RESERVE_FLUSH_ALL_STEAL,      "BTRFS_RESERVE_FLUSH_ALL_STEAL")
+
+#define FI_TYPES                                                       \
+       EM( BTRFS_FILE_EXTENT_INLINE,           "INLINE")               \
+       EM( BTRFS_FILE_EXTENT_REG,              "REG")                  \
+       EMe(BTRFS_FILE_EXTENT_PREALLOC,         "PREALLOC")
+
+#define QGROUP_RSV_TYPES                                               \
+       EM( BTRFS_QGROUP_RSV_DATA,              "DATA")                 \
+       EM( BTRFS_QGROUP_RSV_META_PERTRANS,     "META_PERTRANS")        \
+       EMe(BTRFS_QGROUP_RSV_META_PREALLOC,     "META_PREALLOC")
+
+#define IO_TREE_OWNER                                              \
+       EM( IO_TREE_FS_PINNED_EXTENTS,    "PINNED_EXTENTS")         \
+       EM( IO_TREE_FS_EXCLUDED_EXTENTS,  "EXCLUDED_EXTENTS")       \
+       EM( IO_TREE_INODE_IO,             "INODE_IO")               \
+       EM( IO_TREE_INODE_IO_FAILURE,     "INODE_IO_FAILURE")       \
+       EM( IO_TREE_RELOC_BLOCKS,         "RELOC_BLOCKS")           \
+       EM( IO_TREE_TRANS_DIRTY_PAGES,    "TRANS_DIRTY_PAGES")      \
+       EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES")   \
+       EM( IO_TREE_INODE_FILE_EXTENT,    "INODE_FILE_EXTENT")      \
+       EM( IO_TREE_LOG_CSUM_RANGE,       "LOG_CSUM_RANGE")         \
+       EMe(IO_TREE_SELFTEST,             "SELFTEST")
+
+#define FLUSH_STATES                                                   \
+       EM( FLUSH_DELAYED_ITEMS_NR,     "FLUSH_DELAYED_ITEMS_NR")       \
+       EM( FLUSH_DELAYED_ITEMS,        "FLUSH_DELAYED_ITEMS")          \
+       EM( FLUSH_DELALLOC,             "FLUSH_DELALLOC")               \
+       EM( FLUSH_DELALLOC_WAIT,        "FLUSH_DELALLOC_WAIT")          \
+       EM( FLUSH_DELAYED_REFS_NR,      "FLUSH_DELAYED_REFS_NR")        \
+       EM( FLUSH_DELAYED_REFS,         "FLUSH_ELAYED_REFS")            \
+       EM( ALLOC_CHUNK,                "ALLOC_CHUNK")                  \
+       EM( ALLOC_CHUNK_FORCE,          "ALLOC_CHUNK_FORCE")            \
+       EM( RUN_DELAYED_IPUTS,          "RUN_DELAYED_IPUTS")            \
+       EMe(COMMIT_TRANS,               "COMMIT_TRANS")
+
+/*
+ * First define the enums in the above macros to be exported to userspace via
+ * TRACE_DEFINE_ENUM().
+ */
+
+#undef EM
+#undef EMe
+#define EM(a, b)       TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)      TRACE_DEFINE_ENUM(a);
+
+FLUSH_ACTIONS
+FI_TYPES
+QGROUP_RSV_TYPES
+IO_TREE_OWNER
+FLUSH_STATES
+
+/*
+ * Now redefine the EM and EMe macros to map the enums to the strings that will
+ * be printed in the output
+ */
+
+#undef EM
+#undef EMe
+#define EM(a, b)        {a, b},
+#define EMe(a, b)       {a, b}
 
-#define show_qgroup_rsv_type(type)                                     \
-       __print_symbolic(type,                                          \
-               { BTRFS_QGROUP_RSV_DATA,          "DATA"        },      \
-               { BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" },    \
-               { BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
-
-#define show_extent_io_tree_owner(owner)                                      \
-       __print_symbolic(owner,                                                \
-               { IO_TREE_FS_PINNED_EXTENTS,      "PINNED_EXTENTS" },          \
-               { IO_TREE_FS_EXCLUDED_EXTENTS,    "EXCLUDED_EXTENTS" },        \
-               { IO_TREE_INODE_IO,               "INODE_IO" },                \
-               { IO_TREE_INODE_IO_FAILURE,       "INODE_IO_FAILURE" },        \
-               { IO_TREE_RELOC_BLOCKS,           "RELOC_BLOCKS" },            \
-               { IO_TREE_TRANS_DIRTY_PAGES,      "TRANS_DIRTY_PAGES" },       \
-               { IO_TREE_ROOT_DIRTY_LOG_PAGES,   "ROOT_DIRTY_LOG_PAGES" },    \
-               { IO_TREE_INODE_FILE_EXTENT,      "INODE_FILE_EXTENT" },       \
-               { IO_TREE_LOG_CSUM_RANGE,         "LOG_CSUM_RANGE" },          \
-               { IO_TREE_SELFTEST,               "SELFTEST" })
 
 #define BTRFS_GROUP_FLAGS      \
        { BTRFS_BLOCK_GROUP_DATA,       "DATA"},        \
@@ -380,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
                __entry->disk_isize, __entry->extent_start,
                __entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
                __entry->disk_bytenr, __entry->disk_num_bytes,
-               __entry->extent_offset, show_fi_type(__entry->extent_type),
+               __entry->extent_offset, __print_symbolic(__entry->extent_type, FI_TYPES),
                __entry->compression)
 );
 
@@ -421,7 +456,7 @@ DECLARE_EVENT_CLASS(
                "extent_type=%s compression=%u",
                show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
                __entry->disk_isize, __entry->extent_start,
-               __entry->extent_end, show_fi_type(__entry->extent_type),
+               __entry->extent_end, __print_symbolic(__entry->extent_type, FI_TYPES),
                __entry->compression)
 );
 
@@ -1042,12 +1077,6 @@ TRACE_EVENT(btrfs_space_reservation,
                        __entry->bytes)
 );
 
-#define show_flush_action(action)                                              \
-       __print_symbolic(action,                                                \
-               { BTRFS_RESERVE_NO_FLUSH,       "BTRFS_RESERVE_NO_FLUSH"},      \
-               { BTRFS_RESERVE_FLUSH_LIMIT,    "BTRFS_RESERVE_FLUSH_LIMIT"},   \
-               { BTRFS_RESERVE_FLUSH_ALL,      "BTRFS_RESERVE_FLUSH_ALL"})
-
 TRACE_EVENT(btrfs_trigger_flush,
 
        TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
@@ -1071,25 +1100,13 @@ TRACE_EVENT(btrfs_trigger_flush,
 
        TP_printk_btrfs("%s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
                  __get_str(reason), __entry->flush,
-                 show_flush_action(__entry->flush),
+                 __print_symbolic(__entry->flush, FLUSH_ACTIONS),
                  __entry->flags,
                  __print_flags((unsigned long)__entry->flags, "|",
                                BTRFS_GROUP_FLAGS),
                  __entry->bytes)
 );
 
-#define show_flush_state(state)                                                        \
-       __print_symbolic(state,                                                 \
-               { FLUSH_DELAYED_ITEMS_NR,       "FLUSH_DELAYED_ITEMS_NR"},      \
-               { FLUSH_DELAYED_ITEMS,          "FLUSH_DELAYED_ITEMS"},         \
-               { FLUSH_DELALLOC,               "FLUSH_DELALLOC"},              \
-               { FLUSH_DELALLOC_WAIT,          "FLUSH_DELALLOC_WAIT"},         \
-               { FLUSH_DELAYED_REFS_NR,        "FLUSH_DELAYED_REFS_NR"},       \
-               { FLUSH_DELAYED_REFS,           "FLUSH_ELAYED_REFS"},           \
-               { ALLOC_CHUNK,                  "ALLOC_CHUNK"},                 \
-               { ALLOC_CHUNK_FORCE,            "ALLOC_CHUNK_FORCE"},           \
-               { RUN_DELAYED_IPUTS,            "RUN_DELAYED_IPUTS"},           \
-               { COMMIT_TRANS,                 "COMMIT_TRANS"})
 
 TRACE_EVENT(btrfs_flush_space,
 
@@ -1114,7 +1131,7 @@ TRACE_EVENT(btrfs_flush_space,
 
        TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
                  __entry->state,
-                 show_flush_state(__entry->state),
+                 __print_symbolic(__entry->state, FLUSH_STATES),
                  __entry->flags,
                  __print_flags((unsigned long)__entry->flags, "|",
                                BTRFS_GROUP_FLAGS),
@@ -1690,7 +1707,7 @@ TRACE_EVENT(qgroup_update_reserve,
        ),
 
        TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
-               __entry->qgid, show_qgroup_rsv_type(__entry->type),
+               __entry->qgid, __print_symbolic(__entry->type, QGROUP_RSV_TYPES),
                __entry->cur_reserved, __entry->diff)
 );
 
@@ -1714,7 +1731,7 @@ TRACE_EVENT(qgroup_meta_reserve,
 
        TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
                show_root_type(__entry->refroot),
-               show_qgroup_rsv_type(__entry->type), __entry->diff)
+               __print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
 );
 
 TRACE_EVENT(qgroup_meta_convert,
@@ -1735,8 +1752,8 @@ TRACE_EVENT(qgroup_meta_convert,
 
        TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
                show_root_type(__entry->refroot),
-               show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
-               show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
+               __print_symbolic(BTRFS_QGROUP_RSV_META_PREALLOC, QGROUP_RSV_TYPES),
+               __print_symbolic(BTRFS_QGROUP_RSV_META_PERTRANS, QGROUP_RSV_TYPES),
                __entry->diff)
 );
 
@@ -1762,7 +1779,7 @@ TRACE_EVENT(qgroup_meta_free_all_pertrans,
 
        TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
                show_root_type(__entry->refroot),
-               show_qgroup_rsv_type(__entry->type), __entry->diff)
+               __print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
 );
 
 DECLARE_EVENT_CLASS(btrfs__prelim_ref,
@@ -1920,7 +1937,7 @@ TRACE_EVENT(btrfs_set_extent_bit,
 
        TP_printk_btrfs(
                "io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s",
-               show_extent_io_tree_owner(__entry->owner), __entry->ino,
+               __print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
                __entry->rootid, __entry->start, __entry->len,
                __print_flags(__entry->set_bits, "|", EXTENT_FLAGS))
 );
@@ -1959,7 +1976,7 @@ TRACE_EVENT(btrfs_clear_extent_bit,
 
        TP_printk_btrfs(
                "io_tree=%s ino=%llu root=%llu start=%llu len=%llu clear_bits=%s",
-               show_extent_io_tree_owner(__entry->owner), __entry->ino,
+               __print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
                __entry->rootid, __entry->start, __entry->len,
                __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
 );
@@ -2000,7 +2017,7 @@ TRACE_EVENT(btrfs_convert_extent_bit,
 
        TP_printk_btrfs(
 "io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s clear_bits=%s",
-                 show_extent_io_tree_owner(__entry->owner), __entry->ino,
+                 __print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
                  __entry->rootid, __entry->start, __entry->len,
                  __print_flags(__entry->set_bits , "|", EXTENT_FLAGS),
                  __print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
index f9a7811..ced7123 100644 (file)
@@ -435,11 +435,12 @@ TRACE_EVENT_RCU(rcu_fqs,
 #endif /* #if defined(CONFIG_TREE_RCU) */
 
 /*
- * Tracepoint for dyntick-idle entry/exit events.  These take a string
- * as argument: "Start" for entering dyntick-idle mode, "Startirq" for
- * entering it from irq/NMI, "End" for leaving it, "Endirq" for leaving it
- * to irq/NMI, "--=" for events moving towards idle, and "++=" for events
- * moving away from idle.
+ * Tracepoint for dyntick-idle entry/exit events.  These take 2 strings
+ * as argument:
+ * polarity: "Start", "End", "StillNonIdle" for entering, exiting or still not
+ *            being in dyntick-idle mode.
+ * context: "USER" or "IDLE" or "IRQ".
+ * NMIs nested in IRQs are inferred with dynticks_nesting > 1 in IRQ context.
  *
  * These events also take a pair of numbers, which indicate the nesting
  * depth before and after the event of interest, and a third number that is
@@ -506,13 +507,13 @@ TRACE_EVENT_RCU(rcu_callback,
 
 /*
  * Tracepoint for the registration of a single RCU callback of the special
- * kfree() form.  The first argument is the RCU type, the second argument
+ * kvfree() form.  The first argument is the RCU type, the second argument
  * is a pointer to the RCU callback, the third argument is the offset
  * of the callback within the enclosing RCU-protected data structure,
  * the fourth argument is the number of lazy callbacks queued, and the
  * fifth argument is the total number of callbacks queued.
  */
-TRACE_EVENT_RCU(rcu_kfree_callback,
+TRACE_EVENT_RCU(rcu_kvfree_callback,
 
        TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset,
                 long qlen),
@@ -596,12 +597,12 @@ TRACE_EVENT_RCU(rcu_invoke_callback,
 
 /*
  * Tracepoint for the invocation of a single RCU callback of the special
- * kfree() form.  The first argument is the RCU flavor, the second
+ * kvfree() form.  The first argument is the RCU flavor, the second
  * argument is a pointer to the RCU callback, and the third argument
  * is the offset of the callback within the enclosing RCU-protected
  * data structure.
  */
-TRACE_EVENT_RCU(rcu_invoke_kfree_callback,
+TRACE_EVENT_RCU(rcu_invoke_kvfree_callback,
 
        TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset),
 
index ed168b0..fec25b9 100644 (file)
@@ -91,7 +91,7 @@ DEFINE_EVENT(sched_wakeup_template, sched_waking,
 
 /*
  * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
- * It it not always called from the waking context.
+ * It is not always called from the waking context.
  */
 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
             TP_PROTO(struct task_struct *p),
@@ -634,6 +634,18 @@ DECLARE_TRACE(sched_overutilized_tp,
        TP_PROTO(struct root_domain *rd, bool overutilized),
        TP_ARGS(rd, overutilized));
 
+DECLARE_TRACE(sched_util_est_cfs_tp,
+       TP_PROTO(struct cfs_rq *cfs_rq),
+       TP_ARGS(cfs_rq));
+
+DECLARE_TRACE(sched_util_est_se_tp,
+       TP_PROTO(struct sched_entity *se),
+       TP_ARGS(se));
+
+DECLARE_TRACE(sched_update_nr_running_tp,
+       TP_PROTO(struct rq *rq, int change),
+       TP_ARGS(rq, change));
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
index e6b6cb0..2c39d15 100644 (file)
@@ -243,6 +243,18 @@ struct btrfs_ioctl_dev_info_args {
        __u8 path[BTRFS_DEVICE_PATH_NAME_MAX];  /* out */
 };
 
+/*
+ * Retrieve information about the filesystem
+ */
+
+/* Request information about checksum type and size */
+#define BTRFS_FS_INFO_FLAG_CSUM_INFO                   (1 << 0)
+
+/* Request information about filesystem generation */
+#define BTRFS_FS_INFO_FLAG_GENERATION                  (1 << 1)
+/* Request information about filesystem metadata UUID */
+#define BTRFS_FS_INFO_FLAG_METADATA_UUID               (1 << 2)
+
 struct btrfs_ioctl_fs_info_args {
        __u64 max_id;                           /* out */
        __u64 num_devices;                      /* out */
@@ -250,8 +262,13 @@ struct btrfs_ioctl_fs_info_args {
        __u32 nodesize;                         /* out */
        __u32 sectorsize;                       /* out */
        __u32 clone_alignment;                  /* out */
-       __u32 reserved32;
-       __u64 reserved[122];                    /* pad to 1k */
+       /* See BTRFS_FS_INFO_FLAG_* */
+       __u16 csum_type;                        /* out */
+       __u16 csum_size;                        /* out */
+       __u64 flags;                            /* in/out */
+       __u64 generation;                       /* out */
+       __u8 metadata_uuid[BTRFS_FSID_SIZE];    /* out */
+       __u8 reserved[944];                     /* pad to 1k */
 };
 
 /*
index a3f3975..9ba64ca 100644 (file)
@@ -913,9 +913,9 @@ struct btrfs_free_space_info {
 #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
 
 #define BTRFS_QGROUP_LEVEL_SHIFT               48
-static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
+static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
 {
-       return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+       return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
 }
 
 /*
index 7843742..d65fde7 100644 (file)
@@ -31,7 +31,8 @@ struct io_uring_sqe {
        union {
                __kernel_rwf_t  rw_flags;
                __u32           fsync_flags;
-               __u16           poll_events;
+               __u16           poll_events;    /* compatibility */
+               __u32           poll32_events;  /* word-reversed for BE */
                __u32           sync_range_flags;
                __u32           msg_flags;
                __u32           timeout_flags;
@@ -249,6 +250,7 @@ struct io_uring_params {
 #define IORING_FEAT_RW_CUR_POS         (1U << 3)
 #define IORING_FEAT_CUR_PERSONALITY    (1U << 4)
 #define IORING_FEAT_FAST_POLL          (1U << 5)
+#define IORING_FEAT_POLL_32BITS        (1U << 6)
 
 /*
  * io_uring_register(2) opcodes and arguments
index 7b2d6fc..077e7ee 100644 (file)
@@ -383,7 +383,8 @@ struct perf_event_attr {
                                bpf_event      :  1, /* include bpf events */
                                aux_output     :  1, /* generate AUX records instead of events */
                                cgroup         :  1, /* include cgroup events */
-                               __reserved_1   : 31;
+                               text_poke      :  1, /* include text poke events */
+                               __reserved_1   : 30;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -532,9 +533,10 @@ struct perf_event_mmap_page {
                                cap_bit0_is_deprecated  : 1, /* Always 1, signals that bit 0 is zero */
 
                                cap_user_rdpmc          : 1, /* The RDPMC instruction can be used to read counts */
-                               cap_user_time           : 1, /* The time_* fields are used */
+                               cap_user_time           : 1, /* The time_{shift,mult,offset} fields are used */
                                cap_user_time_zero      : 1, /* The time_zero field is used */
-                               cap_____res             : 59;
+                               cap_user_time_short     : 1, /* the time_{cycle,mask} fields are used */
+                               cap_____res             : 58;
                };
        };
 
@@ -593,13 +595,29 @@ struct perf_event_mmap_page {
         *               ((rem * time_mult) >> time_shift);
         */
        __u64   time_zero;
+
        __u32   size;                   /* Header size up to __reserved[] fields. */
+       __u32   __reserved_1;
+
+       /*
+        * If cap_usr_time_short, the hardware clock is less than 64bit wide
+        * and we must compute the 'cyc' value, as used by cap_usr_time, as:
+        *
+        *   cyc = time_cycles + ((cyc - time_cycles) & time_mask)
+        *
+        * NOTE: this form is explicitly chosen such that cap_usr_time_short
+        *       is a correction on top of cap_usr_time, and code that doesn't
+        *       know about cap_usr_time_short still works under the assumption
+        *       the counter doesn't wrap.
+        */
+       __u64   time_cycles;
+       __u64   time_mask;
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u8    __reserved[118*8+4];    /* align to 1k. */
+       __u8    __reserved[116*8];      /* align to 1k. */
 
        /*
         * Control data for the mmap() data buffer.
@@ -1024,12 +1042,35 @@ enum perf_event_type {
         */
        PERF_RECORD_CGROUP                      = 19,
 
+       /*
+        * Records changes to kernel text i.e. self-modified code. 'old_len' is
+        * the number of old bytes, 'new_len' is the number of new bytes. Either
+        * 'old_len' or 'new_len' may be zero to indicate, for example, the
+        * addition or removal of a trampoline. 'bytes' contains the old bytes
+        * followed immediately by the new bytes.
+        *
+        * struct {
+        *      struct perf_event_header        header;
+        *      u64                             addr;
+        *      u16                             old_len;
+        *      u16                             new_len;
+        *      u8                              bytes[];
+        *      struct sample_id                sample_id;
+        * };
+        */
+       PERF_RECORD_TEXT_POKE                   = 20,
+
        PERF_RECORD_MAX,                        /* non-ABI */
 };
 
 enum perf_record_ksymbol_type {
        PERF_RECORD_KSYMBOL_TYPE_UNKNOWN        = 0,
        PERF_RECORD_KSYMBOL_TYPE_BPF            = 1,
+       /*
+        * Out of line code such as kprobe-replaced instructions or optimized
+        * kprobes or ftrace trampolines.
+        */
+       PERF_RECORD_KSYMBOL_TYPE_OOL            = 2,
        PERF_RECORD_KSYMBOL_TYPE_MAX            /* non-ABI */
 };
 
index 7955c56..ee810ca 100644 (file)
@@ -109,6 +109,7 @@ struct vdso_data {
  * relocation, and this is what we need.
  */
 extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
+extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden")));
 
 /*
  * The generic vDSO implementation requires that gettimeofday.h
index 0498af5..f4bffef 100644 (file)
@@ -191,13 +191,16 @@ config HAVE_KERNEL_LZO
 config HAVE_KERNEL_LZ4
        bool
 
+config HAVE_KERNEL_ZSTD
+       bool
+
 config HAVE_KERNEL_UNCOMPRESSED
        bool
 
 choice
        prompt "Kernel compression mode"
        default KERNEL_GZIP
-       depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED
+       depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD || HAVE_KERNEL_UNCOMPRESSED
        help
          The linux kernel is a kind of self-extracting executable.
          Several compression algorithms are available, which differ
@@ -276,6 +279,16 @@ config KERNEL_LZ4
          is about 8% bigger than LZO. But the decompression speed is
          faster than LZO.
 
+config KERNEL_ZSTD
+       bool "ZSTD"
+       depends on HAVE_KERNEL_ZSTD
+       help
+         ZSTD is a compression algorithm targeting intermediate compression
+         with fast decompression speed. It will compress better than GZIP and
+         decompress around the same speed as LZO, but slower than LZ4. You
+         will need at least 192 KB RAM or more for booting. The zstd command
+         line tool is required for compression.
+
 config KERNEL_UNCOMPRESSED
        bool "None"
        depends on HAVE_KERNEL_UNCOMPRESSED
@@ -492,8 +505,23 @@ config HAVE_SCHED_AVG_IRQ
        depends on SMP
 
 config SCHED_THERMAL_PRESSURE
-       bool "Enable periodic averaging of thermal pressure"
+       bool
+       default y if ARM && ARM_CPU_TOPOLOGY
+       default y if ARM64
        depends on SMP
+       depends on CPU_FREQ_THERMAL
+       help
+         Select this option to enable thermal pressure accounting in the
+         scheduler. Thermal pressure is the value conveyed to the scheduler
+         that reflects the reduction in CPU compute capacity resulted from
+         thermal throttling. Thermal throttling occurs when the performance of
+         a CPU is capped due to high operating temperatures.
+
+         If selected, the scheduler will be able to balance tasks accordingly,
+         i.e. put less load on throttled CPUs than on non/less throttled ones.
+
+         This requires the architecture to implement
+         arch_set_thermal_pressure() and arch_get_thermal_pressure().
 
 config BSD_PROCESS_ACCT
        bool "BSD Process Accounting"
index d72beda..53314d7 100644 (file)
@@ -45,11 +45,6 @@ static int __init early_initrdmem(char *p)
 }
 early_param("initrdmem", early_initrdmem);
 
-/*
- * This is here as the initrd keyword has been in use since 11/2018
- * on ARM, PowerPC, and MIPS.
- * It should not be; it is reserved for bootloaders.
- */
 static int __init early_initrd(char *p)
 {
        return early_initrdmem(p);
index 8c201f4..b2301bd 100644 (file)
@@ -1851,7 +1851,6 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
        }
 
        audit_get_stamp(ab->ctx, &t, &serial);
-       audit_clear_dummy(ab->ctx);
        audit_log_format(ab, "audit(%llu.%03lu:%u): ",
                         (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial);
 
index f0233dc..ddc2287 100644 (file)
@@ -290,13 +290,6 @@ extern int audit_signal_info_syscall(struct task_struct *t);
 extern void audit_filter_inodes(struct task_struct *tsk,
                                struct audit_context *ctx);
 extern struct list_head *audit_killed_trees(void);
-
-static inline void audit_clear_dummy(struct audit_context *ctx)
-{
-       if (ctx)
-               ctx->dummy = 0;
-}
-
 #else /* CONFIG_AUDITSYSCALL */
 #define auditsc_get_stamp(c, t, s) 0
 #define audit_put_watch(w) {}
@@ -330,7 +323,6 @@ static inline int audit_signal_info_syscall(struct task_struct *t)
 }
 
 #define audit_filter_inodes(t, c) AUDIT_DISABLED
-#define audit_clear_dummy(c) {}
 #endif /* CONFIG_AUDITSYSCALL */
 
 extern char *audit_unpack_string(void **bufp, size_t *remain, size_t len);
index 468a233..fd840c4 100644 (file)
@@ -1417,6 +1417,9 @@ static void audit_log_proctitle(void)
        struct audit_context *context = audit_context();
        struct audit_buffer *ab;
 
+       if (!context || context->dummy)
+               return;
+
        ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE);
        if (!ab)
                return; /* audit_panic or being filtered */
index 9a1a98d..0443600 100644 (file)
@@ -4058,6 +4058,11 @@ static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn,
        const char *tname, *sym;
        u32 btf_id, i;
 
+       if (!btf_vmlinux) {
+               bpf_log(log, "btf_vmlinux doesn't exist\n");
+               return -EINVAL;
+       }
+
        if (IS_ERR(btf_vmlinux)) {
                bpf_log(log, "btf_vmlinux is malformed\n");
                return -EINVAL;
index b4b288a..b32cc8c 100644 (file)
@@ -779,15 +779,20 @@ static void htab_elem_free_rcu(struct rcu_head *head)
        htab_elem_free(htab, l);
 }
 
-static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
+static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
 {
        struct bpf_map *map = &htab->map;
+       void *ptr;
 
        if (map->ops->map_fd_put_ptr) {
-               void *ptr = fd_htab_map_get_ptr(map, l);
-
+               ptr = fd_htab_map_get_ptr(map, l);
                map->ops->map_fd_put_ptr(ptr);
        }
+}
+
+static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
+{
+       htab_put_fd_value(htab, l);
 
        if (htab_is_prealloc(htab)) {
                __pcpu_freelist_push(&htab->freelist, &l->fnode);
@@ -839,6 +844,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                         */
                        pl_new = this_cpu_ptr(htab->extra_elems);
                        l_new = *pl_new;
+                       htab_put_fd_value(htab, old_elem);
                        *pl_new = old_elem;
                } else {
                        struct pcpu_freelist_node *l;
index b6397a1..d51175c 100644 (file)
@@ -64,7 +64,6 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
 
        raw_spin_unlock_irqrestore(cpu_lock, flags);
 }
-EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
 
 /**
  * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
index 9f1557b..1817568 100644 (file)
@@ -413,6 +413,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
        VMCOREINFO_STRUCT_SIZE(mem_section);
        VMCOREINFO_OFFSET(mem_section, section_mem_map);
+       VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
 #endif
        VMCOREINFO_STRUCT_SIZE(page);
        VMCOREINFO_STRUCT_SIZE(pglist_data);
index 856d98c..7c436d7 100644 (file)
@@ -394,6 +394,7 @@ static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
 static atomic_t nr_cgroup_events __read_mostly;
+static atomic_t nr_text_poke_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -1237,12 +1238,26 @@ static void get_ctx(struct perf_event_context *ctx)
        refcount_inc(&ctx->refcount);
 }
 
+static void *alloc_task_ctx_data(struct pmu *pmu)
+{
+       if (pmu->task_ctx_cache)
+               return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);
+
+       return NULL;
+}
+
+static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
+{
+       if (pmu->task_ctx_cache && task_ctx_data)
+               kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
+}
+
 static void free_ctx(struct rcu_head *head)
 {
        struct perf_event_context *ctx;
 
        ctx = container_of(head, struct perf_event_context, rcu_head);
-       kfree(ctx->task_ctx_data);
+       free_task_ctx_data(ctx->pmu, ctx->task_ctx_data);
        kfree(ctx);
 }
 
@@ -4470,7 +4485,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
                goto errout;
 
        if (event->attach_state & PERF_ATTACH_TASK_DATA) {
-               task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
+               task_ctx_data = alloc_task_ctx_data(pmu);
                if (!task_ctx_data) {
                        err = -ENOMEM;
                        goto errout;
@@ -4528,11 +4543,11 @@ retry:
                }
        }
 
-       kfree(task_ctx_data);
+       free_task_ctx_data(pmu, task_ctx_data);
        return ctx;
 
 errout:
-       kfree(task_ctx_data);
+       free_task_ctx_data(pmu, task_ctx_data);
        return ERR_PTR(err);
 }
 
@@ -4575,7 +4590,7 @@ static bool is_sb_event(struct perf_event *event)
        if (attr->mmap || attr->mmap_data || attr->mmap2 ||
            attr->comm || attr->comm_exec ||
            attr->task || attr->ksymbol ||
-           attr->context_switch ||
+           attr->context_switch || attr->text_poke ||
            attr->bpf_event)
                return true;
        return false;
@@ -4651,6 +4666,8 @@ static void unaccount_event(struct perf_event *event)
                atomic_dec(&nr_ksymbol_events);
        if (event->attr.bpf_event)
                atomic_dec(&nr_bpf_events);
+       if (event->attr.text_poke)
+               atomic_dec(&nr_text_poke_events);
 
        if (dec) {
                if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -8628,6 +8645,89 @@ void perf_event_bpf_event(struct bpf_prog *prog,
        perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
 }
 
+struct perf_text_poke_event {
+       const void              *old_bytes;
+       const void              *new_bytes;
+       size_t                  pad;
+       u16                     old_len;
+       u16                     new_len;
+
+       struct {
+               struct perf_event_header        header;
+
+               u64                             addr;
+       } event_id;
+};
+
+static int perf_event_text_poke_match(struct perf_event *event)
+{
+       return event->attr.text_poke;
+}
+
+static void perf_event_text_poke_output(struct perf_event *event, void *data)
+{
+       struct perf_text_poke_event *text_poke_event = data;
+       struct perf_output_handle handle;
+       struct perf_sample_data sample;
+       u64 padding = 0;
+       int ret;
+
+       if (!perf_event_text_poke_match(event))
+               return;
+
+       perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
+       if (ret)
+               return;
+
+       perf_output_put(&handle, text_poke_event->event_id);
+       perf_output_put(&handle, text_poke_event->old_len);
+       perf_output_put(&handle, text_poke_event->new_len);
+
+       __output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len);
+       __output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len);
+
+       if (text_poke_event->pad)
+               __output_copy(&handle, &padding, text_poke_event->pad);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
+       perf_output_end(&handle);
+}
+
+void perf_event_text_poke(const void *addr, const void *old_bytes,
+                         size_t old_len, const void *new_bytes, size_t new_len)
+{
+       struct perf_text_poke_event text_poke_event;
+       size_t tot, pad;
+
+       if (!atomic_read(&nr_text_poke_events))
+               return;
+
+       tot  = sizeof(text_poke_event.old_len) + old_len;
+       tot += sizeof(text_poke_event.new_len) + new_len;
+       pad  = ALIGN(tot, sizeof(u64)) - tot;
+
+       text_poke_event = (struct perf_text_poke_event){
+               .old_bytes    = old_bytes,
+               .new_bytes    = new_bytes,
+               .pad          = pad,
+               .old_len      = old_len,
+               .new_len      = new_len,
+               .event_id  = {
+                       .header = {
+                               .type = PERF_RECORD_TEXT_POKE,
+                               .misc = PERF_RECORD_MISC_KERNEL,
+                               .size = sizeof(text_poke_event.event_id) + tot + pad,
+                       },
+                       .addr = (unsigned long)addr,
+               },
+       };
+
+       perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
        event->attach_state |= PERF_ATTACH_ITRACE;
@@ -10945,6 +11045,8 @@ static void account_event(struct perf_event *event)
                atomic_inc(&nr_ksymbol_events);
        if (event->attr.bpf_event)
                atomic_inc(&nr_bpf_events);
+       if (event->attr.text_poke)
+               atomic_inc(&nr_text_poke_events);
 
        if (inc) {
                /*
@@ -12409,8 +12511,7 @@ inherit_event(struct perf_event *parent_event,
            !child_ctx->task_ctx_data) {
                struct pmu *pmu = child_event->pmu;
 
-               child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
-                                                  GFP_KERNEL);
+               child_ctx->task_ctx_data = alloc_task_ctx_data(pmu);
                if (!child_ctx->task_ctx_data) {
                        free_event(child_event);
                        return ERR_PTR(-ENOMEM);
index efc5493..2a8e728 100644 (file)
@@ -359,7 +359,13 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
        struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 
        if (new) {
-               *new = *orig;
+               ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+               ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+               /*
+                * orig->shared.rb may be modified concurrently, but the clone
+                * will be reinitialized.
+                */
+               *new = data_race(*orig);
                INIT_LIST_HEAD(&new->anon_vma_chain);
                new->vm_next = new->vm_prev = NULL;
        }
@@ -1954,8 +1960,8 @@ static __latent_entropy struct task_struct *copy_process(
 
        rt_mutex_init_task(p);
 
+       lockdep_assert_irqs_enabled();
 #ifdef CONFIG_PROVE_LOCKING
-       DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
        DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
        retval = -EAGAIN;
@@ -2035,19 +2041,11 @@ static __latent_entropy struct task_struct *copy_process(
        seqcount_init(&p->mems_allowed_seq);
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
-       p->irq_events = 0;
-       p->hardirqs_enabled = 0;
-       p->hardirq_enable_ip = 0;
-       p->hardirq_enable_event = 0;
-       p->hardirq_disable_ip = _THIS_IP_;
-       p->hardirq_disable_event = 0;
-       p->softirqs_enabled = 1;
-       p->softirq_enable_ip = _THIS_IP_;
-       p->softirq_enable_event = 0;
-       p->softirq_disable_ip = 0;
-       p->softirq_disable_event = 0;
-       p->hardirq_context = 0;
-       p->softirq_context = 0;
+       memset(&p->irqtrace, 0, sizeof(p->irqtrace));
+       p->irqtrace.hardirq_disable_ip  = _THIS_IP_;
+       p->irqtrace.softirq_enable_ip   = _THIS_IP_;
+       p->softirqs_enabled             = 1;
+       p->softirq_context              = 0;
 #endif
 
        p->pagefault_disabled = 0;
@@ -2304,6 +2302,7 @@ static __latent_entropy struct task_struct *copy_process(
        write_unlock_irq(&tasklist_lock);
 
        proc_fork_connector(p);
+       sched_post_fork(p);
        cgroup_post_fork(p, args);
        perf_event_fork(p);
 
index e646661..4616d4a 100644 (file)
  *  "But they come in a choice of three flavours!"
  */
 #include <linux/compat.h>
-#include <linux/slab.h>
-#include <linux/poll.h>
-#include <linux/fs.h>
-#include <linux/file.h>
 #include <linux/jhash.h>
-#include <linux/init.h>
-#include <linux/futex.h>
-#include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
-#include <linux/signal.h>
-#include <linux/export.h>
-#include <linux/magic.h>
-#include <linux/pid.h>
-#include <linux/nsproxy.h>
-#include <linux/ptrace.h>
-#include <linux/sched/rt.h>
-#include <linux/sched/wake_q.h>
-#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/memblock.h>
 #include <linux/fault-inject.h>
-#include <linux/refcount.h>
 
 #include <asm/futex.h>
 
@@ -476,7 +459,7 @@ static u64 get_inode_sequence_number(struct inode *inode)
 /**
  * get_futex_key() - Get parameters which are the keys for a futex
  * @uaddr:     virtual address of the futex
- * @fshared:   0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+ * @fshared:   false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
  * @key:       address where result is stored.
  * @rw:                mapping needs to be read/write (values: FUTEX_READ,
  *              FUTEX_WRITE)
@@ -500,8 +483,8 @@ static u64 get_inode_sequence_number(struct inode *inode)
  *
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
-static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_access rw)
+static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
+                        enum futex_access rw)
 {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
@@ -538,7 +521,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 
 again:
        /* Ignore any VERIFY_READ mapping (futex common case) */
-       if (unlikely(should_fail_futex(fshared)))
+       if (unlikely(should_fail_futex(true)))
                return -EFAULT;
 
        err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
@@ -626,7 +609,7 @@ again:
                 * A RO anonymous page will never change and thus doesn't make
                 * sense for futex operations.
                 */
-               if (unlikely(should_fail_futex(fshared)) || ro) {
+               if (unlikely(should_fail_futex(true)) || ro) {
                        err = -EFAULT;
                        goto out;
                }
@@ -677,10 +660,6 @@ out:
        return err;
 }
 
-static inline void put_futex_key(union futex_key *key)
-{
-}
-
 /**
  * fault_in_user_writeable() - Fault in user address and verify RW access
  * @uaddr:     pointer to faulting user space address
@@ -1611,13 +1590,13 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 
        ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
        if (unlikely(ret != 0))
-               goto out;
+               return ret;
 
        hb = hash_futex(&key);
 
        /* Make sure we really have tasks to wakeup */
        if (!hb_waiters_pending(hb))
-               goto out_put_key;
+               return ret;
 
        spin_lock(&hb->lock);
 
@@ -1640,9 +1619,6 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 
        spin_unlock(&hb->lock);
        wake_up_q(&wake_q);
-out_put_key:
-       put_futex_key(&key);
-out:
        return ret;
 }
 
@@ -1709,10 +1685,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
 retry:
        ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
        if (unlikely(ret != 0))
-               goto out;
+               return ret;
        ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
        if (unlikely(ret != 0))
-               goto out_put_key1;
+               return ret;
 
        hb1 = hash_futex(&key1);
        hb2 = hash_futex(&key2);
@@ -1730,13 +1706,13 @@ retry_private:
                         * an MMU, but we might get them from range checking
                         */
                        ret = op_ret;
-                       goto out_put_keys;
+                       return ret;
                }
 
                if (op_ret == -EFAULT) {
                        ret = fault_in_user_writeable(uaddr2);
                        if (ret)
-                               goto out_put_keys;
+                               return ret;
                }
 
                if (!(flags & FLAGS_SHARED)) {
@@ -1744,8 +1720,6 @@ retry_private:
                        goto retry_private;
                }
 
-               put_futex_key(&key2);
-               put_futex_key(&key1);
                cond_resched();
                goto retry;
        }
@@ -1781,11 +1755,6 @@ retry_private:
 out_unlock:
        double_unlock_hb(hb1, hb2);
        wake_up_q(&wake_q);
-out_put_keys:
-       put_futex_key(&key2);
-out_put_key1:
-       put_futex_key(&key1);
-out:
        return ret;
 }
 
@@ -1992,20 +1961,18 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 retry:
        ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
        if (unlikely(ret != 0))
-               goto out;
+               return ret;
        ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
                            requeue_pi ? FUTEX_WRITE : FUTEX_READ);
        if (unlikely(ret != 0))
-               goto out_put_key1;
+               return ret;
 
        /*
         * The check above which compares uaddrs is not sufficient for
         * shared futexes. We need to compare the keys:
         */
-       if (requeue_pi && match_futex(&key1, &key2)) {
-               ret = -EINVAL;
-               goto out_put_keys;
-       }
+       if (requeue_pi && match_futex(&key1, &key2))
+               return -EINVAL;
 
        hb1 = hash_futex(&key1);
        hb2 = hash_futex(&key2);
@@ -2025,13 +1992,11 @@ retry_private:
 
                        ret = get_user(curval, uaddr1);
                        if (ret)
-                               goto out_put_keys;
+                               return ret;
 
                        if (!(flags & FLAGS_SHARED))
                                goto retry_private;
 
-                       put_futex_key(&key2);
-                       put_futex_key(&key1);
                        goto retry;
                }
                if (curval != *cmpval) {
@@ -2090,12 +2055,10 @@ retry_private:
                case -EFAULT:
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
-                       put_futex_key(&key2);
-                       put_futex_key(&key1);
                        ret = fault_in_user_writeable(uaddr2);
                        if (!ret)
                                goto retry;
-                       goto out;
+                       return ret;
                case -EBUSY:
                case -EAGAIN:
                        /*
@@ -2106,8 +2069,6 @@ retry_private:
                         */
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
-                       put_futex_key(&key2);
-                       put_futex_key(&key1);
                        /*
                         * Handle the case where the owner is in the middle of
                         * exiting. Wait for the exit to complete otherwise
@@ -2216,12 +2177,6 @@ out_unlock:
        double_unlock_hb(hb1, hb2);
        wake_up_q(&wake_q);
        hb_waiters_dec(hb2);
-
-out_put_keys:
-       put_futex_key(&key2);
-out_put_key1:
-       put_futex_key(&key1);
-out:
        return ret ? ret : task_count;
 }
 
@@ -2567,7 +2522,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
                 */
                if (q->pi_state->owner != current)
                        ret = fixup_pi_state_owner(uaddr, q, current);
-               goto out;
+               return ret ? ret : locked;
        }
 
        /*
@@ -2580,7 +2535,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
         */
        if (q->pi_state->owner == current) {
                ret = fixup_pi_state_owner(uaddr, q, NULL);
-               goto out;
+               return ret;
        }
 
        /*
@@ -2594,8 +2549,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
                                q->pi_state->owner);
        }
 
-out:
-       return ret ? ret : locked;
+       return ret;
 }
 
 /**
@@ -2692,12 +2646,11 @@ retry_private:
 
                ret = get_user(uval, uaddr);
                if (ret)
-                       goto out;
+                       return ret;
 
                if (!(flags & FLAGS_SHARED))
                        goto retry_private;
 
-               put_futex_key(&q->key);
                goto retry;
        }
 
@@ -2706,9 +2659,6 @@ retry_private:
                ret = -EWOULDBLOCK;
        }
 
-out:
-       if (ret)
-               put_futex_key(&q->key);
        return ret;
 }
 
@@ -2853,7 +2803,6 @@ retry_private:
                         * - EAGAIN: The user space value changed.
                         */
                        queue_unlock(hb);
-                       put_futex_key(&q.key);
                        /*
                         * Handle the case where the owner is in the middle of
                         * exiting. Wait for the exit to complete otherwise
@@ -2961,13 +2910,11 @@ no_block:
                put_pi_state(pi_state);
        }
 
-       goto out_put_key;
+       goto out;
 
 out_unlock_put_key:
        queue_unlock(hb);
 
-out_put_key:
-       put_futex_key(&q.key);
 out:
        if (to) {
                hrtimer_cancel(&to->timer);
@@ -2980,12 +2927,11 @@ uaddr_faulted:
 
        ret = fault_in_user_writeable(uaddr);
        if (ret)
-               goto out_put_key;
+               goto out;
 
        if (!(flags & FLAGS_SHARED))
                goto retry_private;
 
-       put_futex_key(&q.key);
        goto retry;
 }
 
@@ -3114,16 +3060,13 @@ retry:
 out_unlock:
        spin_unlock(&hb->lock);
 out_putkey:
-       put_futex_key(&key);
        return ret;
 
 pi_retry:
-       put_futex_key(&key);
        cond_resched();
        goto retry;
 
 pi_faulted:
-       put_futex_key(&key);
 
        ret = fault_in_user_writeable(uaddr);
        if (!ret)
@@ -3265,7 +3208,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
         */
        ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
-               goto out_key2;
+               goto out;
 
        /*
         * The check above which compares uaddrs is not sufficient for
@@ -3274,7 +3217,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
        if (match_futex(&q.key, &key2)) {
                queue_unlock(hb);
                ret = -EINVAL;
-               goto out_put_keys;
+               goto out;
        }
 
        /* Queue the futex_q, drop the hb lock, wait for wakeup. */
@@ -3284,7 +3227,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
        ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
        spin_unlock(&hb->lock);
        if (ret)
-               goto out_put_keys;
+               goto out;
 
        /*
         * In order for us to be here, we know our q.key == key2, and since
@@ -3374,11 +3317,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                ret = -EWOULDBLOCK;
        }
 
-out_put_keys:
-       put_futex_key(&q.key);
-out_key2:
-       put_futex_key(&key2);
-
 out:
        if (to) {
                hrtimer_cancel(&to->timer);
index 4f9f844..b95ff5d 100644 (file)
@@ -112,6 +112,7 @@ static const struct irq_bit_descr irqdata_states[] = {
        BIT_MASK_DESCR(IRQD_AFFINITY_SET),
        BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
        BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
+       BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE),
        BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
        BIT_MASK_DESCR(IRQD_CAN_RESERVE),
        BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK),
@@ -120,6 +121,10 @@ static const struct irq_bit_descr irqdata_states[] = {
 
        BIT_MASK_DESCR(IRQD_WAKEUP_STATE),
        BIT_MASK_DESCR(IRQD_WAKEUP_ARMED),
+
+       BIT_MASK_DESCR(IRQD_DEFAULT_TRIGGER_SET),
+
+       BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX),
 };
 
 static const struct irq_bit_descr irqdesc_states[] = {
index 2a9fec5..48c38e0 100644 (file)
@@ -320,12 +320,16 @@ static bool irq_set_affinity_deactivated(struct irq_data *data,
        struct irq_desc *desc = irq_data_to_desc(data);
 
        /*
+        * Handle irq chips which can handle affinity only in activated
+        * state correctly
+        *
         * If the interrupt is not yet activated, just store the affinity
         * mask and do not call the chip driver at all. On activation the
         * driver has to make sure anyway that the interrupt is in a
         * useable state so startup works.
         */
-       if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
+       if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
+           irqd_is_activated(data) || !irqd_affinity_on_activate(data))
                return false;
 
        cpumask_copy(desc->irq_common_data.affinity, mask);
index bb14e64..95cb74f 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/filter.h>
 #include <linux/ftrace.h>
+#include <linux/kprobes.h>
 #include <linux/compiler.h>
 
 /*
@@ -437,6 +438,7 @@ struct kallsym_iter {
        loff_t pos_arch_end;
        loff_t pos_mod_end;
        loff_t pos_ftrace_mod_end;
+       loff_t pos_bpf_end;
        unsigned long value;
        unsigned int nameoff; /* If iterating in core kernel symbols. */
        char type;
@@ -480,6 +482,11 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
        return 1;
 }
 
+/*
+ * ftrace_mod_get_kallsym() may also get symbols for pages allocated for ftrace
+ * purposes. In that case "__builtin__ftrace" is used as a module name, even
+ * though "__builtin__ftrace" is not a module.
+ */
 static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
 {
        int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end,
@@ -496,11 +503,33 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
 
 static int get_ksymbol_bpf(struct kallsym_iter *iter)
 {
+       int ret;
+
        strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN);
        iter->exported = 0;
-       return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
-                              &iter->value, &iter->type,
-                              iter->name) < 0 ? 0 : 1;
+       ret = bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
+                             &iter->value, &iter->type,
+                             iter->name);
+       if (ret < 0) {
+               iter->pos_bpf_end = iter->pos;
+               return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * This uses "__builtin__kprobes" as a module name for symbols for pages
+ * allocated for kprobes' purposes, even though "__builtin__kprobes" is not a
+ * module.
+ */
+static int get_ksymbol_kprobe(struct kallsym_iter *iter)
+{
+       strlcpy(iter->module_name, "__builtin__kprobes", MODULE_NAME_LEN);
+       iter->exported = 0;
+       return kprobe_get_kallsym(iter->pos - iter->pos_bpf_end,
+                                 &iter->value, &iter->type,
+                                 iter->name) < 0 ? 0 : 1;
 }
 
 /* Returns space to next name. */
@@ -527,6 +556,7 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
                iter->pos_arch_end = 0;
                iter->pos_mod_end = 0;
                iter->pos_ftrace_mod_end = 0;
+               iter->pos_bpf_end = 0;
        }
 }
 
@@ -551,7 +581,11 @@ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos)
            get_ksymbol_ftrace_mod(iter))
                return 1;
 
-       return get_ksymbol_bpf(iter);
+       if ((!iter->pos_bpf_end || iter->pos_bpf_end > pos) &&
+           get_ksymbol_bpf(iter))
+               return 1;
+
+       return get_ksymbol_kprobe(iter);
 }
 
 /* Returns false if pos at or past end of file. */
index d4999b3..65ca553 100644 (file)
@@ -7,8 +7,11 @@ CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
 
-CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \
-       $(call cc-option,-fno-stack-protector,)
+CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
+       -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 obj-y := core.o debugfs.o report.o
-obj-$(CONFIG_KCSAN_SELFTEST) += test.o
+obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
+
+CFLAGS_kcsan-test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
+obj-$(CONFIG_KCSAN_TEST) += kcsan-test.o
index be9e625..75fe701 100644 (file)
@@ -3,8 +3,7 @@
 #ifndef _KERNEL_KCSAN_ATOMIC_H
 #define _KERNEL_KCSAN_ATOMIC_H
 
-#include <linux/jiffies.h>
-#include <linux/sched.h>
+#include <linux/types.h>
 
 /*
  * Special rules for certain memory where concurrent conflicting accesses are
@@ -13,8 +12,7 @@
  */
 static bool kcsan_is_atomic_special(const volatile void *ptr)
 {
-       /* volatile globals that have been observed in data races. */
-       return ptr == &jiffies || ptr == &current->state;
+       return false;
 }
 
 #endif /* _KERNEL_KCSAN_ATOMIC_H */
index 15f6794..9147ff6 100644 (file)
@@ -291,6 +291,20 @@ static inline unsigned int get_delay(void)
                                0);
 }
 
+void kcsan_save_irqtrace(struct task_struct *task)
+{
+#ifdef CONFIG_TRACE_IRQFLAGS
+       task->kcsan_save_irqtrace = task->irqtrace;
+#endif
+}
+
+void kcsan_restore_irqtrace(struct task_struct *task)
+{
+#ifdef CONFIG_TRACE_IRQFLAGS
+       task->irqtrace = task->kcsan_save_irqtrace;
+#endif
+}
+
 /*
  * Pull everything together: check_access() below contains the performance
  * critical operations; the fast-path (including check_access) functions should
@@ -336,9 +350,11 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
        flags = user_access_save();
 
        if (consumed) {
+               kcsan_save_irqtrace(current);
                kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE,
                             KCSAN_REPORT_CONSUMED_WATCHPOINT,
                             watchpoint - watchpoints);
+               kcsan_restore_irqtrace(current);
        } else {
                /*
                 * The other thread may not print any diagnostics, as it has
@@ -396,9 +412,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
                goto out;
        }
 
+       /*
+        * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's
+        * runtime is entered for every memory access, and potentially useful
+        * information is lost if dirtied by KCSAN.
+        */
+       kcsan_save_irqtrace(current);
        if (!kcsan_interrupt_watcher)
-               /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
-               raw_local_irq_save(irq_flags);
+               local_irq_save(irq_flags);
 
        watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
        if (watchpoint == NULL) {
@@ -539,7 +560,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
        kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
 out_unlock:
        if (!kcsan_interrupt_watcher)
-               raw_local_irq_restore(irq_flags);
+               local_irq_restore(irq_flags);
+       kcsan_restore_irqtrace(current);
 out:
        user_access_restore(ua_flags);
 }
@@ -754,6 +776,7 @@ EXPORT_SYMBOL(__kcsan_check_access);
  */
 
 #define DEFINE_TSAN_READ_WRITE(size)                                           \
+       void __tsan_read##size(void *ptr);                                     \
        void __tsan_read##size(void *ptr)                                      \
        {                                                                      \
                check_access(ptr, size, 0);                                    \
@@ -762,6 +785,7 @@ EXPORT_SYMBOL(__kcsan_check_access);
        void __tsan_unaligned_read##size(void *ptr)                            \
                __alias(__tsan_read##size);                                    \
        EXPORT_SYMBOL(__tsan_unaligned_read##size);                            \
+       void __tsan_write##size(void *ptr);                                    \
        void __tsan_write##size(void *ptr)                                     \
        {                                                                      \
                check_access(ptr, size, KCSAN_ACCESS_WRITE);                   \
@@ -777,12 +801,14 @@ DEFINE_TSAN_READ_WRITE(4);
 DEFINE_TSAN_READ_WRITE(8);
 DEFINE_TSAN_READ_WRITE(16);
 
+void __tsan_read_range(void *ptr, size_t size);
 void __tsan_read_range(void *ptr, size_t size)
 {
        check_access(ptr, size, 0);
 }
 EXPORT_SYMBOL(__tsan_read_range);
 
+void __tsan_write_range(void *ptr, size_t size);
 void __tsan_write_range(void *ptr, size_t size)
 {
        check_access(ptr, size, KCSAN_ACCESS_WRITE);
@@ -799,6 +825,7 @@ EXPORT_SYMBOL(__tsan_write_range);
  * the size-check of compiletime_assert_rwonce_type().
  */
 #define DEFINE_TSAN_VOLATILE_READ_WRITE(size)                                  \
+       void __tsan_volatile_read##size(void *ptr);                            \
        void __tsan_volatile_read##size(void *ptr)                             \
        {                                                                      \
                const bool is_atomic = size <= sizeof(long long) &&            \
@@ -811,6 +838,7 @@ EXPORT_SYMBOL(__tsan_write_range);
        void __tsan_unaligned_volatile_read##size(void *ptr)                   \
                __alias(__tsan_volatile_read##size);                           \
        EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size);                   \
+       void __tsan_volatile_write##size(void *ptr);                           \
        void __tsan_volatile_write##size(void *ptr)                            \
        {                                                                      \
                const bool is_atomic = size <= sizeof(long long) &&            \
@@ -836,14 +864,17 @@ DEFINE_TSAN_VOLATILE_READ_WRITE(16);
  * The below are not required by KCSAN, but can still be emitted by the
  * compiler.
  */
+void __tsan_func_entry(void *call_pc);
 void __tsan_func_entry(void *call_pc)
 {
 }
 EXPORT_SYMBOL(__tsan_func_entry);
+void __tsan_func_exit(void);
 void __tsan_func_exit(void)
 {
 }
 EXPORT_SYMBOL(__tsan_func_exit);
+void __tsan_init(void);
 void __tsan_init(void)
 {
 }
diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c
new file mode 100644 (file)
index 0000000..fed6fcb
--- /dev/null
@@ -0,0 +1,1107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KCSAN test with various race scenarious to test runtime behaviour. Since the
+ * interface with which KCSAN's reports are obtained is via the console, this is
+ * the output we should verify. For each test case checks the presence (or
+ * absence) of generated reports. Relies on 'console' tracepoint to capture
+ * reports as they appear in the kernel log.
+ *
+ * Makes use of KUnit for test organization, and the Torture framework for test
+ * thread control.
+ *
+ * Copyright (C) 2020, Google LLC.
+ * Author: Marco Elver <elver@google.com>
+ */
+
+#include <kunit/test.h>
+#include <linux/jiffies.h>
+#include <linux/kcsan-checks.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/seqlock.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/torture.h>
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+#include <trace/events/printk.h>
+
+/* Points to current test-case memory access "kernels". */
+static void (*access_kernels[2])(void);
+
+static struct task_struct **threads; /* Lists of threads. */
+static unsigned long end_time;       /* End time of test. */
+
+/* Report as observed from console. */
+static struct {
+       spinlock_t lock;
+       int nlines;
+       char lines[3][512];
+} observed = {
+       .lock = __SPIN_LOCK_UNLOCKED(observed.lock),
+};
+
+/* Setup test checking loop. */
+static __no_kcsan inline void
+begin_test_checks(void (*func1)(void), void (*func2)(void))
+{
+       kcsan_disable_current();
+
+       /*
+        * Require at least as long as KCSAN_REPORT_ONCE_IN_MS, to ensure at
+        * least one race is reported.
+        */
+       end_time = jiffies + msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS + 500);
+
+       /* Signal start; release potential initialization of shared data. */
+       smp_store_release(&access_kernels[0], func1);
+       smp_store_release(&access_kernels[1], func2);
+}
+
+/* End test checking loop. */
+static __no_kcsan inline bool
+end_test_checks(bool stop)
+{
+       if (!stop && time_before(jiffies, end_time)) {
+               /* Continue checking */
+               might_sleep();
+               return false;
+       }
+
+       kcsan_enable_current();
+       return true;
+}
+
+/*
+ * Probe for console output: checks if a race was reported, and obtains observed
+ * lines of interest.
+ */
+__no_kcsan
+static void probe_console(void *ignore, const char *buf, size_t len)
+{
+       unsigned long flags;
+       int nlines;
+
+       /*
+        * Note that KCSAN reports under a global lock, so we do not risk the
+        * possibility of having multiple reports interleaved. If that were the
+        * case, we'd expect tests to fail.
+        */
+
+       spin_lock_irqsave(&observed.lock, flags);
+       nlines = observed.nlines;
+
+       if (strnstr(buf, "BUG: KCSAN: ", len) && strnstr(buf, "test_", len)) {
+               /*
+                * KCSAN report and related to the test.
+                *
+                * The provided @buf is not NUL-terminated; copy no more than
+                * @len bytes and let strscpy() add the missing NUL-terminator.
+                */
+               strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0])));
+               nlines = 1;
+       } else if ((nlines == 1 || nlines == 2) && strnstr(buf, "bytes by", len)) {
+               strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0])));
+
+               if (strnstr(buf, "race at unknown origin", len)) {
+                       if (WARN_ON(nlines != 2))
+                               goto out;
+
+                       /* No second line of interest. */
+                       strcpy(observed.lines[nlines++], "<none>");
+               }
+       }
+
+out:
+       WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */
+       spin_unlock_irqrestore(&observed.lock, flags);
+}
+
+/* Check if a report related to the test exists. */
+__no_kcsan
+static bool report_available(void)
+{
+       return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines);
+}
+
+/* Report information we expect in a report. */
+struct expect_report {
+       /* Access information of both accesses. */
+       struct {
+               void *fn;    /* Function pointer to expected function of top frame. */
+               void *addr;  /* Address of access; unchecked if NULL. */
+               size_t size; /* Size of access; unchecked if @addr is NULL. */
+               int type;    /* Access type, see KCSAN_ACCESS definitions. */
+       } access[2];
+};
+
+/* Check observed report matches information in @r. */
+__no_kcsan
+static bool report_matches(const struct expect_report *r)
+{
+       const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT;
+       bool ret = false;
+       unsigned long flags;
+       typeof(observed.lines) expect;
+       const char *end;
+       char *cur;
+       int i;
+
+       /* Doubled-checked locking. */
+       if (!report_available())
+               return false;
+
+       /* Generate expected report contents. */
+
+       /* Title */
+       cur = expect[0];
+       end = &expect[0][sizeof(expect[0]) - 1];
+       cur += scnprintf(cur, end - cur, "BUG: KCSAN: %s in ",
+                        is_assert ? "assert: race" : "data-race");
+       if (r->access[1].fn) {
+               char tmp[2][64];
+               int cmp;
+
+               /* Expect lexographically sorted function names in title. */
+               scnprintf(tmp[0], sizeof(tmp[0]), "%pS", r->access[0].fn);
+               scnprintf(tmp[1], sizeof(tmp[1]), "%pS", r->access[1].fn);
+               cmp = strcmp(tmp[0], tmp[1]);
+               cur += scnprintf(cur, end - cur, "%ps / %ps",
+                                cmp < 0 ? r->access[0].fn : r->access[1].fn,
+                                cmp < 0 ? r->access[1].fn : r->access[0].fn);
+       } else {
+               scnprintf(cur, end - cur, "%pS", r->access[0].fn);
+               /* The exact offset won't match, remove it. */
+               cur = strchr(expect[0], '+');
+               if (cur)
+                       *cur = '\0';
+       }
+
+       /* Access 1 */
+       cur = expect[1];
+       end = &expect[1][sizeof(expect[1]) - 1];
+       if (!r->access[1].fn)
+               cur += scnprintf(cur, end - cur, "race at unknown origin, with ");
+
+       /* Access 1 & 2 */
+       for (i = 0; i < 2; ++i) {
+               const char *const access_type =
+                       (r->access[i].type & KCSAN_ACCESS_ASSERT) ?
+                               ((r->access[i].type & KCSAN_ACCESS_WRITE) ?
+                                        "assert no accesses" :
+                                        "assert no writes") :
+                               ((r->access[i].type & KCSAN_ACCESS_WRITE) ?
+                                        "write" :
+                                        "read");
+               const char *const access_type_aux =
+                       (r->access[i].type & KCSAN_ACCESS_ATOMIC) ?
+                               " (marked)" :
+                               ((r->access[i].type & KCSAN_ACCESS_SCOPED) ?
+                                        " (scoped)" :
+                                        "");
+
+               if (i == 1) {
+                       /* Access 2 */
+                       cur = expect[2];
+                       end = &expect[2][sizeof(expect[2]) - 1];
+
+                       if (!r->access[1].fn) {
+                               /* Dummy string if no second access is available. */
+                               strcpy(cur, "<none>");
+                               break;
+                       }
+               }
+
+               cur += scnprintf(cur, end - cur, "%s%s to ", access_type,
+                                access_type_aux);
+
+               if (r->access[i].addr) /* Address is optional. */
+                       cur += scnprintf(cur, end - cur, "0x%px of %zu bytes",
+                                        r->access[i].addr, r->access[i].size);
+       }
+
+       spin_lock_irqsave(&observed.lock, flags);
+       if (!report_available())
+               goto out; /* A new report is being captured. */
+
+       /* Finally match expected output to what we actually observed. */
+       ret = strstr(observed.lines[0], expect[0]) &&
+             /* Access info may appear in any order. */
+             ((strstr(observed.lines[1], expect[1]) &&
+               strstr(observed.lines[2], expect[2])) ||
+              (strstr(observed.lines[1], expect[2]) &&
+               strstr(observed.lines[2], expect[1])));
+out:
+       spin_unlock_irqrestore(&observed.lock, flags);
+       return ret;
+}
+
+/* ===== Test kernels ===== */
+
+static long test_sink;
+static long test_var;
+/* @test_array should be large enough to fall into multiple watchpoint slots. */
+static long test_array[3 * PAGE_SIZE / sizeof(long)];
+static struct {
+       long val[8];
+} test_struct;
+static DEFINE_SEQLOCK(test_seqlock);
+
+/*
+ * Helper to avoid compiler optimizing out reads, and to generate source values
+ * for writes.
+ */
+__no_kcsan
+static noinline void sink_value(long v) { WRITE_ONCE(test_sink, v); }
+
+static noinline void test_kernel_read(void) { sink_value(test_var); }
+
+static noinline void test_kernel_write(void)
+{
+       test_var = READ_ONCE_NOCHECK(test_sink) + 1;
+}
+
+static noinline void test_kernel_write_nochange(void) { test_var = 42; }
+
+/* Suffixed by value-change exception filter. */
+static noinline void test_kernel_write_nochange_rcu(void) { test_var = 42; }
+
+static noinline void test_kernel_read_atomic(void)
+{
+       sink_value(READ_ONCE(test_var));
+}
+
+static noinline void test_kernel_write_atomic(void)
+{
+       WRITE_ONCE(test_var, READ_ONCE_NOCHECK(test_sink) + 1);
+}
+
+__no_kcsan
+static noinline void test_kernel_write_uninstrumented(void) { test_var++; }
+
+static noinline void test_kernel_data_race(void) { data_race(test_var++); }
+
+static noinline void test_kernel_assert_writer(void)
+{
+       ASSERT_EXCLUSIVE_WRITER(test_var);
+}
+
+static noinline void test_kernel_assert_access(void)
+{
+       ASSERT_EXCLUSIVE_ACCESS(test_var);
+}
+
+#define TEST_CHANGE_BITS 0xff00ff00
+
+static noinline void test_kernel_change_bits(void)
+{
+       if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
+               /*
+                * Avoid race of unknown origin for this test, just pretend they
+                * are atomic.
+                */
+               kcsan_nestable_atomic_begin();
+               test_var ^= TEST_CHANGE_BITS;
+               kcsan_nestable_atomic_end();
+       } else
+               WRITE_ONCE(test_var, READ_ONCE(test_var) ^ TEST_CHANGE_BITS);
+}
+
+static noinline void test_kernel_assert_bits_change(void)
+{
+       ASSERT_EXCLUSIVE_BITS(test_var, TEST_CHANGE_BITS);
+}
+
+static noinline void test_kernel_assert_bits_nochange(void)
+{
+       ASSERT_EXCLUSIVE_BITS(test_var, ~TEST_CHANGE_BITS);
+}
+
+/* To check that scoped assertions do trigger anywhere in scope. */
+static noinline void test_enter_scope(void)
+{
+       int x = 0;
+
+       /* Unrelated accesses to scoped assert. */
+       READ_ONCE(test_sink);
+       kcsan_check_read(&x, sizeof(x));
+}
+
+static noinline void test_kernel_assert_writer_scoped(void)
+{
+       ASSERT_EXCLUSIVE_WRITER_SCOPED(test_var);
+       test_enter_scope();
+}
+
+static noinline void test_kernel_assert_access_scoped(void)
+{
+       ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_var);
+       test_enter_scope();
+}
+
+static noinline void test_kernel_rmw_array(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(test_array); ++i)
+               test_array[i]++;
+}
+
+static noinline void test_kernel_write_struct(void)
+{
+       kcsan_check_write(&test_struct, sizeof(test_struct));
+       kcsan_disable_current();
+       test_struct.val[3]++; /* induce value change */
+       kcsan_enable_current();
+}
+
+static noinline void test_kernel_write_struct_part(void)
+{
+       test_struct.val[3] = 42;
+}
+
+static noinline void test_kernel_read_struct_zero_size(void)
+{
+       kcsan_check_read(&test_struct.val[3], 0);
+}
+
+static noinline void test_kernel_jiffies_reader(void)
+{
+       sink_value((long)jiffies);
+}
+
+static noinline void test_kernel_seqlock_reader(void)
+{
+       unsigned int seq;
+
+       do {
+               seq = read_seqbegin(&test_seqlock);
+               sink_value(test_var);
+       } while (read_seqretry(&test_seqlock, seq));
+}
+
+static noinline void test_kernel_seqlock_writer(void)
+{
+       unsigned long flags;
+
+       write_seqlock_irqsave(&test_seqlock, flags);
+       test_var++;
+       write_sequnlock_irqrestore(&test_seqlock, flags);
+}
+
+/* ===== Test cases ===== */
+
+/* Simple test with normal data race. */
+__no_kcsan
+static void test_basic(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       static const struct expect_report never = {
+               .access = {
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       bool match_expect = false;
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_write, test_kernel_read);
+       do {
+               match_expect |= report_matches(&expect);
+               match_never = report_matches(&never);
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/*
+ * Stress KCSAN with lots of concurrent races on different addresses until
+ * timeout.
+ */
+__no_kcsan
+static void test_concurrent_races(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       /* NULL will match any address. */
+                       { test_kernel_rmw_array, NULL, 0, KCSAN_ACCESS_WRITE },
+                       { test_kernel_rmw_array, NULL, 0, 0 },
+               },
+       };
+       static const struct expect_report never = {
+               .access = {
+                       { test_kernel_rmw_array, NULL, 0, 0 },
+                       { test_kernel_rmw_array, NULL, 0, 0 },
+               },
+       };
+       bool match_expect = false;
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_rmw_array, test_kernel_rmw_array);
+       do {
+               match_expect |= report_matches(&expect);
+               match_never |= report_matches(&never);
+       } while (!end_test_checks(false));
+       KUNIT_EXPECT_TRUE(test, match_expect); /* Sanity check matches exist. */
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test the KCSAN_REPORT_VALUE_CHANGE_ONLY option. */
+__no_kcsan
+static void test_novalue_change(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_write_nochange, test_kernel_read);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY))
+               KUNIT_EXPECT_FALSE(test, match_expect);
+       else
+               KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/*
+ * Test that the rules where the KCSAN_REPORT_VALUE_CHANGE_ONLY option should
+ * never apply work.
+ */
+__no_kcsan
+static void test_novalue_change_exception(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write_nochange_rcu, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_write_nochange_rcu, test_kernel_read);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/* Test that data races of unknown origin are reported. */
+__no_kcsan
+static void test_unknown_origin(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+                       { NULL },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_write_uninstrumented, test_kernel_read);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN))
+               KUNIT_EXPECT_TRUE(test, match_expect);
+       else
+               KUNIT_EXPECT_FALSE(test, match_expect);
+}
+
+/* Test KCSAN_ASSUME_PLAIN_WRITES_ATOMIC if it is selected. */
+__no_kcsan
+static void test_write_write_assume_atomic(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+                       { test_kernel_write, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_write, test_kernel_write);
+       do {
+               sink_value(READ_ONCE(test_var)); /* induce value-change */
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC))
+               KUNIT_EXPECT_FALSE(test, match_expect);
+       else
+               KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/*
+ * Test that data races with writes larger than word-size are always reported,
+ * even if KCSAN_ASSUME_PLAIN_WRITES_ATOMIC is selected.
+ */
+__no_kcsan
+static void test_write_write_struct(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+                       { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_write_struct, test_kernel_write_struct);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/*
+ * Test that data races where only one write is larger than word-size are always
+ * reported, even if KCSAN_ASSUME_PLAIN_WRITES_ATOMIC is selected.
+ */
+__no_kcsan
+static void test_write_write_struct_part(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+                       { test_kernel_write_struct_part, &test_struct.val[3], sizeof(test_struct.val[3]), KCSAN_ACCESS_WRITE },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_write_struct, test_kernel_write_struct_part);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/* Test that races with atomic accesses never result in reports. */
+__no_kcsan
+static void test_read_atomic_write_atomic(struct kunit *test)
+{
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_read_atomic, test_kernel_write_atomic);
+       do {
+               match_never = report_available();
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test that a race with an atomic and plain access result in reports. */
+__no_kcsan
+static void test_read_plain_atomic_write(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+                       { test_kernel_write_atomic, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC },
+               },
+       };
+       bool match_expect = false;
+
+       if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS))
+               return;
+
+       begin_test_checks(test_kernel_read, test_kernel_write_atomic);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+/* Zero-sized accesses should never cause data race reports. */
+__no_kcsan
+static void test_zero_size_access(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+                       { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+               },
+       };
+       const struct expect_report never = {
+               .access = {
+                       { test_kernel_write_struct, &test_struct, sizeof(test_struct), KCSAN_ACCESS_WRITE },
+                       { test_kernel_read_struct_zero_size, &test_struct.val[3], 0, 0 },
+               },
+       };
+       bool match_expect = false;
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_write_struct, test_kernel_read_struct_zero_size);
+       do {
+               match_expect |= report_matches(&expect);
+               match_never = report_matches(&never);
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_TRUE(test, match_expect); /* Sanity check. */
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test the data_race() macro. */
+__no_kcsan
+static void test_data_race(struct kunit *test)
+{
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_data_race, test_kernel_data_race);
+       do {
+               match_never = report_available();
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+__no_kcsan
+static void test_assert_exclusive_writer(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+                       { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_assert_writer, test_kernel_write_nochange);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+__no_kcsan
+static void test_assert_exclusive_access(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_assert_access, test_kernel_read);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+__no_kcsan
+static void test_assert_exclusive_access_writer(struct kunit *test)
+{
+       const struct expect_report expect_access_writer = {
+               .access = {
+                       { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+                       { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+               },
+       };
+       const struct expect_report expect_access_access = {
+               .access = {
+                       { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+                       { test_kernel_assert_access, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE },
+               },
+       };
+       const struct expect_report never = {
+               .access = {
+                       { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+                       { test_kernel_assert_writer, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+               },
+       };
+       bool match_expect_access_writer = false;
+       bool match_expect_access_access = false;
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_assert_access, test_kernel_assert_writer);
+       do {
+               match_expect_access_writer |= report_matches(&expect_access_writer);
+               match_expect_access_access |= report_matches(&expect_access_access);
+               match_never |= report_matches(&never);
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_TRUE(test, match_expect_access_writer);
+       KUNIT_EXPECT_TRUE(test, match_expect_access_access);
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+__no_kcsan
+static void test_assert_exclusive_bits_change(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .access = {
+                       { test_kernel_assert_bits_change, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT },
+                       { test_kernel_change_bits, &test_var, sizeof(test_var),
+                               KCSAN_ACCESS_WRITE | (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) ? 0 : KCSAN_ACCESS_ATOMIC) },
+               },
+       };
+       bool match_expect = false;
+
+       begin_test_checks(test_kernel_assert_bits_change, test_kernel_change_bits);
+       do {
+               match_expect = report_matches(&expect);
+       } while (!end_test_checks(match_expect));
+       KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
+__no_kcsan
+static void test_assert_exclusive_bits_nochange(struct kunit *test)
+{
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_assert_bits_nochange, test_kernel_change_bits);
+       do {
+               match_never = report_available();
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+__no_kcsan
+static void test_assert_exclusive_writer_scoped(struct kunit *test)
+{
+       const struct expect_report expect_start = {
+               .access = {
+                       { test_kernel_assert_writer_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED },
+                       { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+               },
+       };
+       const struct expect_report expect_anywhere = {
+               .access = {
+                       { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_SCOPED },
+                       { test_kernel_write_nochange, &test_var, sizeof(test_var), KCSAN_ACCESS_WRITE },
+               },
+       };
+       bool match_expect_start = false;
+       bool match_expect_anywhere = false;
+
+       begin_test_checks(test_kernel_assert_writer_scoped, test_kernel_write_nochange);
+       do {
+               match_expect_start |= report_matches(&expect_start);
+               match_expect_anywhere |= report_matches(&expect_anywhere);
+       } while (!end_test_checks(match_expect_start && match_expect_anywhere));
+       KUNIT_EXPECT_TRUE(test, match_expect_start);
+       KUNIT_EXPECT_TRUE(test, match_expect_anywhere);
+}
+
+__no_kcsan
+static void test_assert_exclusive_access_scoped(struct kunit *test)
+{
+       const struct expect_report expect_start1 = {
+               .access = {
+                       { test_kernel_assert_access_scoped, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       const struct expect_report expect_start2 = {
+               .access = { expect_start1.access[0], expect_start1.access[0] },
+       };
+       const struct expect_report expect_inscope = {
+               .access = {
+                       { test_enter_scope, &test_var, sizeof(test_var), KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_SCOPED },
+                       { test_kernel_read, &test_var, sizeof(test_var), 0 },
+               },
+       };
+       bool match_expect_start = false;
+       bool match_expect_inscope = false;
+
+       begin_test_checks(test_kernel_assert_access_scoped, test_kernel_read);
+       end_time += msecs_to_jiffies(1000); /* This test requires a bit more time. */
+       do {
+               match_expect_start |= report_matches(&expect_start1) || report_matches(&expect_start2);
+               match_expect_inscope |= report_matches(&expect_inscope);
+       } while (!end_test_checks(match_expect_start && match_expect_inscope));
+       KUNIT_EXPECT_TRUE(test, match_expect_start);
+       KUNIT_EXPECT_TRUE(test, match_expect_inscope);
+}
+
+/*
+ * jiffies is special (declared to be volatile) and its accesses are typically
+ * not marked; this test ensures that the compiler nor KCSAN gets confused about
+ * jiffies's declaration on different architectures.
+ */
+__no_kcsan
+static void test_jiffies_noreport(struct kunit *test)
+{
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_jiffies_reader, test_kernel_jiffies_reader);
+       do {
+               match_never = report_available();
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/* Test that racing accesses in seqlock critical sections are not reported. */
+__no_kcsan
+static void test_seqlock_noreport(struct kunit *test)
+{
+       bool match_never = false;
+
+       begin_test_checks(test_kernel_seqlock_reader, test_kernel_seqlock_writer);
+       do {
+               match_never = report_available();
+       } while (!end_test_checks(match_never));
+       KUNIT_EXPECT_FALSE(test, match_never);
+}
+
+/*
+ * Each test case is run with different numbers of threads. Until KUnit supports
+ * passing arguments for each test case, we encode #threads in the test case
+ * name (read by get_num_threads()). [The '-' was chosen as a stylistic
+ * preference to separate test name and #threads.]
+ *
+ * The thread counts are chosen to cover potentially interesting boundaries and
+ * corner cases (range 2-5), and then stress the system with larger counts.
+ */
+#define KCSAN_KUNIT_CASE(test_name)                                            \
+       { .run_case = test_name, .name = #test_name "-02" },                   \
+       { .run_case = test_name, .name = #test_name "-03" },                   \
+       { .run_case = test_name, .name = #test_name "-04" },                   \
+       { .run_case = test_name, .name = #test_name "-05" },                   \
+       { .run_case = test_name, .name = #test_name "-08" },                   \
+       { .run_case = test_name, .name = #test_name "-16" }
+
+static struct kunit_case kcsan_test_cases[] = {
+       KCSAN_KUNIT_CASE(test_basic),
+       KCSAN_KUNIT_CASE(test_concurrent_races),
+       KCSAN_KUNIT_CASE(test_novalue_change),
+       KCSAN_KUNIT_CASE(test_novalue_change_exception),
+       KCSAN_KUNIT_CASE(test_unknown_origin),
+       KCSAN_KUNIT_CASE(test_write_write_assume_atomic),
+       KCSAN_KUNIT_CASE(test_write_write_struct),
+       KCSAN_KUNIT_CASE(test_write_write_struct_part),
+       KCSAN_KUNIT_CASE(test_read_atomic_write_atomic),
+       KCSAN_KUNIT_CASE(test_read_plain_atomic_write),
+       KCSAN_KUNIT_CASE(test_zero_size_access),
+       KCSAN_KUNIT_CASE(test_data_race),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_writer),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_access),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_access_writer),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_bits_change),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_bits_nochange),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_writer_scoped),
+       KCSAN_KUNIT_CASE(test_assert_exclusive_access_scoped),
+       KCSAN_KUNIT_CASE(test_jiffies_noreport),
+       KCSAN_KUNIT_CASE(test_seqlock_noreport),
+       {},
+};
+
+/* ===== End test cases ===== */
+
+/* Get number of threads encoded in test name. */
+static bool __no_kcsan
+get_num_threads(const char *test, int *nthreads)
+{
+       int len = strlen(test);
+
+       if (WARN_ON(len < 3))
+               return false;
+
+       *nthreads = test[len - 1] - '0';
+       *nthreads += (test[len - 2] - '0') * 10;
+
+       if (WARN_ON(*nthreads < 0))
+               return false;
+
+       return true;
+}
+
+/* Concurrent accesses from interrupts. */
+__no_kcsan
+static void access_thread_timer(struct timer_list *timer)
+{
+       static atomic_t cnt = ATOMIC_INIT(0);
+       unsigned int idx;
+       void (*func)(void);
+
+       idx = (unsigned int)atomic_inc_return(&cnt) % ARRAY_SIZE(access_kernels);
+       /* Acquire potential initialization. */
+       func = smp_load_acquire(&access_kernels[idx]);
+       if (func)
+               func();
+}
+
+/* The main loop for each thread. */
+__no_kcsan
+static int access_thread(void *arg)
+{
+       struct timer_list timer;
+       unsigned int cnt = 0;
+       unsigned int idx;
+       void (*func)(void);
+
+       timer_setup_on_stack(&timer, access_thread_timer, 0);
+       do {
+               might_sleep();
+
+               if (!timer_pending(&timer))
+                       mod_timer(&timer, jiffies + 1);
+               else {
+                       /* Iterate through all kernels. */
+                       idx = cnt++ % ARRAY_SIZE(access_kernels);
+                       /* Acquire potential initialization. */
+                       func = smp_load_acquire(&access_kernels[idx]);
+                       if (func)
+                               func();
+               }
+       } while (!torture_must_stop());
+       del_timer_sync(&timer);
+       destroy_timer_on_stack(&timer);
+
+       torture_kthread_stopping("access_thread");
+       return 0;
+}
+
+__no_kcsan
+static int test_init(struct kunit *test)
+{
+       unsigned long flags;
+       int nthreads;
+       int i;
+
+       spin_lock_irqsave(&observed.lock, flags);
+       for (i = 0; i < ARRAY_SIZE(observed.lines); ++i)
+               observed.lines[i][0] = '\0';
+       observed.nlines = 0;
+       spin_unlock_irqrestore(&observed.lock, flags);
+
+       if (!torture_init_begin((char *)test->name, 1))
+               return -EBUSY;
+
+       if (!get_num_threads(test->name, &nthreads))
+               goto err;
+
+       if (WARN_ON(threads))
+               goto err;
+
+       for (i = 0; i < ARRAY_SIZE(access_kernels); ++i) {
+               if (WARN_ON(access_kernels[i]))
+                       goto err;
+       }
+
+       if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
+               /*
+                * Without any preemption, keep 2 CPUs free for other tasks, one
+                * of which is the main test case function checking for
+                * completion or failure.
+                */
+               const int min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0;
+               const int min_required_cpus = 2 + min_unused_cpus;
+
+               if (num_online_cpus() < min_required_cpus) {
+                       pr_err("%s: too few online CPUs (%u < %d) for test",
+                              test->name, num_online_cpus(), min_required_cpus);
+                       goto err;
+               } else if (nthreads > num_online_cpus() - min_unused_cpus) {
+                       nthreads = num_online_cpus() - min_unused_cpus;
+                       pr_warn("%s: limiting number of threads to %d\n",
+                               test->name, nthreads);
+               }
+       }
+
+       if (nthreads) {
+               threads = kcalloc(nthreads + 1, sizeof(struct task_struct *),
+                                 GFP_KERNEL);
+               if (WARN_ON(!threads))
+                       goto err;
+
+               threads[nthreads] = NULL;
+               for (i = 0; i < nthreads; ++i) {
+                       if (torture_create_kthread(access_thread, NULL,
+                                                  threads[i]))
+                               goto err;
+               }
+       }
+
+       torture_init_end();
+
+       return 0;
+
+err:
+       kfree(threads);
+       threads = NULL;
+       torture_init_end();
+       return -EINVAL;
+}
+
+__no_kcsan
+static void test_exit(struct kunit *test)
+{
+       struct task_struct **stop_thread;
+       int i;
+
+       if (torture_cleanup_begin())
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(access_kernels); ++i)
+               WRITE_ONCE(access_kernels[i], NULL);
+
+       if (threads) {
+               for (stop_thread = threads; *stop_thread; stop_thread++)
+                       torture_stop_kthread(reader_thread, *stop_thread);
+
+               kfree(threads);
+               threads = NULL;
+       }
+
+       torture_cleanup_end();
+}
+
+static struct kunit_suite kcsan_test_suite = {
+       .name = "kcsan-test",
+       .test_cases = kcsan_test_cases,
+       .init = test_init,
+       .exit = test_exit,
+};
+static struct kunit_suite *kcsan_test_suites[] = { &kcsan_test_suite, NULL };
+
+__no_kcsan
+static void register_tracepoints(struct tracepoint *tp, void *ignore)
+{
+       check_trace_callback_type_console(probe_console);
+       if (!strcmp(tp->name, "console"))
+               WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+}
+
+__no_kcsan
+static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+{
+       if (!strcmp(tp->name, "console"))
+               tracepoint_probe_unregister(tp, probe_console, NULL);
+}
+
+/*
+ * We only want to do tracepoints setup and teardown once, therefore we have to
+ * customize the init and exit functions and cannot rely on kunit_test_suite().
+ */
+static int __init kcsan_test_init(void)
+{
+       /*
+        * Because we want to be able to build the test as a module, we need to
+        * iterate through all known tracepoints, since the static registration
+        * won't work here.
+        */
+       for_each_kernel_tracepoint(register_tracepoints, NULL);
+       return __kunit_test_suites_init(kcsan_test_suites);
+}
+
+static void kcsan_test_exit(void)
+{
+       __kunit_test_suites_exit(kcsan_test_suites);
+       for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+       tracepoint_synchronize_unregister();
+}
+
+late_initcall(kcsan_test_init);
+module_exit(kcsan_test_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Marco Elver <elver@google.com>");
index 763d6d0..2948001 100644 (file)
@@ -9,6 +9,7 @@
 #define _KERNEL_KCSAN_KCSAN_H
 
 #include <linux/kcsan.h>
+#include <linux/sched.h>
 
 /* The number of adjacent watchpoints to check. */
 #define KCSAN_CHECK_ADJACENT 1
@@ -22,6 +23,12 @@ extern unsigned int kcsan_udelay_interrupt;
  */
 extern bool kcsan_enabled;
 
+/*
+ * Save/restore IRQ flags state trace dirtied by KCSAN.
+ */
+void kcsan_save_irqtrace(struct task_struct *task);
+void kcsan_restore_irqtrace(struct task_struct *task);
+
 /*
  * Initialize debugfs file.
  */
index ac5f834..9d07e17 100644 (file)
@@ -308,6 +308,9 @@ static void print_verbose_info(struct task_struct *task)
        if (!task)
                return;
 
+       /* Restore IRQ state trace for printing. */
+       kcsan_restore_irqtrace(task);
+
        pr_err("\n");
        debug_show_held_locks(task);
        print_irqtrace_events(task);
@@ -606,10 +609,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
                goto out;
 
        /*
-        * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
-        * we do not turn off lockdep here; this could happen due to recursion
-        * into lockdep via KCSAN if we detect a race in utilities used by
-        * lockdep.
+        * Because we may generate reports when we're in scheduler code, the use
+        * of printk() could deadlock. Until such time that all printing code
+        * called in print_report() is scheduler-safe, accept the risk, and just
+        * get our message out. As such, also disable lockdep to hide the
+        * warning, and avoid disabling lockdep for the rest of the kernel.
         */
        lockdep_off();
 
diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c
new file mode 100644 (file)
index 0000000..d26a052
--- /dev/null
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/types.h>
+
+#include "encoding.h"
+
+#define ITERS_PER_TEST 2000
+
+/* Test requirements. */
+static bool test_requires(void)
+{
+       /* random should be initialized for the below tests */
+       return prandom_u32() + prandom_u32() != 0;
+}
+
+/*
+ * Test watchpoint encode and decode: check that encoding some access's info,
+ * and then subsequent decode preserves the access's info.
+ */
+static bool test_encode_decode(void)
+{
+       int i;
+
+       for (i = 0; i < ITERS_PER_TEST; ++i) {
+               size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1;
+               bool is_write = !!prandom_u32_max(2);
+               unsigned long addr;
+
+               prandom_bytes(&addr, sizeof(addr));
+               if (WARN_ON(!check_encodable(addr, size)))
+                       return false;
+
+               /* Encode and decode */
+               {
+                       const long encoded_watchpoint =
+                               encode_watchpoint(addr, size, is_write);
+                       unsigned long verif_masked_addr;
+                       size_t verif_size;
+                       bool verif_is_write;
+
+                       /* Check special watchpoints */
+                       if (WARN_ON(decode_watchpoint(
+                                   INVALID_WATCHPOINT, &verif_masked_addr,
+                                   &verif_size, &verif_is_write)))
+                               return false;
+                       if (WARN_ON(decode_watchpoint(
+                                   CONSUMED_WATCHPOINT, &verif_masked_addr,
+                                   &verif_size, &verif_is_write)))
+                               return false;
+
+                       /* Check decoding watchpoint returns same data */
+                       if (WARN_ON(!decode_watchpoint(
+                                   encoded_watchpoint, &verif_masked_addr,
+                                   &verif_size, &verif_is_write)))
+                               return false;
+                       if (WARN_ON(verif_masked_addr !=
+                                   (addr & WATCHPOINT_ADDR_MASK)))
+                               goto fail;
+                       if (WARN_ON(verif_size != size))
+                               goto fail;
+                       if (WARN_ON(is_write != verif_is_write))
+                               goto fail;
+
+                       continue;
+fail:
+                       pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n",
+                              __func__, is_write ? "write" : "read", size,
+                              addr, encoded_watchpoint,
+                              verif_is_write ? "write" : "read", verif_size,
+                              verif_masked_addr);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+/* Test access matching function. */
+static bool test_matching_access(void)
+{
+       if (WARN_ON(!matching_access(10, 1, 10, 1)))
+               return false;
+       if (WARN_ON(!matching_access(10, 2, 11, 1)))
+               return false;
+       if (WARN_ON(!matching_access(10, 1, 9, 2)))
+               return false;
+       if (WARN_ON(matching_access(10, 1, 11, 1)))
+               return false;
+       if (WARN_ON(matching_access(9, 1, 10, 1)))
+               return false;
+
+       /*
+        * An access of size 0 could match another access, as demonstrated here.
+        * Rather than add more comparisons to 'matching_access()', which would
+        * end up in the fast-path for *all* checks, check_access() simply
+        * returns for all accesses of size 0.
+        */
+       if (WARN_ON(!matching_access(8, 8, 12, 0)))
+               return false;
+
+       return true;
+}
+
+static int __init kcsan_selftest(void)
+{
+       int passed = 0;
+       int total = 0;
+
+#define RUN_TEST(do_test)                                                      \
+       do {                                                                   \
+               ++total;                                                       \
+               if (do_test())                                                 \
+                       ++passed;                                              \
+               else                                                           \
+                       pr_err("KCSAN selftest: " #do_test " failed");         \
+       } while (0)
+
+       RUN_TEST(test_requires);
+       RUN_TEST(test_encode_decode);
+       RUN_TEST(test_matching_access);
+
+       pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total);
+       if (passed != total)
+               panic("KCSAN selftests failed");
+       return 0;
+}
+postcore_initcall(kcsan_selftest);
diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c
deleted file mode 100644 (file)
index d26a052..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/printk.h>
-#include <linux/random.h>
-#include <linux/types.h>
-
-#include "encoding.h"
-
-#define ITERS_PER_TEST 2000
-
-/* Test requirements. */
-static bool test_requires(void)
-{
-       /* random should be initialized for the below tests */
-       return prandom_u32() + prandom_u32() != 0;
-}
-
-/*
- * Test watchpoint encode and decode: check that encoding some access's info,
- * and then subsequent decode preserves the access's info.
- */
-static bool test_encode_decode(void)
-{
-       int i;
-
-       for (i = 0; i < ITERS_PER_TEST; ++i) {
-               size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1;
-               bool is_write = !!prandom_u32_max(2);
-               unsigned long addr;
-
-               prandom_bytes(&addr, sizeof(addr));
-               if (WARN_ON(!check_encodable(addr, size)))
-                       return false;
-
-               /* Encode and decode */
-               {
-                       const long encoded_watchpoint =
-                               encode_watchpoint(addr, size, is_write);
-                       unsigned long verif_masked_addr;
-                       size_t verif_size;
-                       bool verif_is_write;
-
-                       /* Check special watchpoints */
-                       if (WARN_ON(decode_watchpoint(
-                                   INVALID_WATCHPOINT, &verif_masked_addr,
-                                   &verif_size, &verif_is_write)))
-                               return false;
-                       if (WARN_ON(decode_watchpoint(
-                                   CONSUMED_WATCHPOINT, &verif_masked_addr,
-                                   &verif_size, &verif_is_write)))
-                               return false;
-
-                       /* Check decoding watchpoint returns same data */
-                       if (WARN_ON(!decode_watchpoint(
-                                   encoded_watchpoint, &verif_masked_addr,
-                                   &verif_size, &verif_is_write)))
-                               return false;
-                       if (WARN_ON(verif_masked_addr !=
-                                   (addr & WATCHPOINT_ADDR_MASK)))
-                               goto fail;
-                       if (WARN_ON(verif_size != size))
-                               goto fail;
-                       if (WARN_ON(is_write != verif_is_write))
-                               goto fail;
-
-                       continue;
-fail:
-                       pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n",
-                              __func__, is_write ? "write" : "read", size,
-                              addr, encoded_watchpoint,
-                              verif_is_write ? "write" : "read", verif_size,
-                              verif_masked_addr);
-                       return false;
-               }
-       }
-
-       return true;
-}
-
-/* Test access matching function. */
-static bool test_matching_access(void)
-{
-       if (WARN_ON(!matching_access(10, 1, 10, 1)))
-               return false;
-       if (WARN_ON(!matching_access(10, 2, 11, 1)))
-               return false;
-       if (WARN_ON(!matching_access(10, 1, 9, 2)))
-               return false;
-       if (WARN_ON(matching_access(10, 1, 11, 1)))
-               return false;
-       if (WARN_ON(matching_access(9, 1, 10, 1)))
-               return false;
-
-       /*
-        * An access of size 0 could match another access, as demonstrated here.
-        * Rather than add more comparisons to 'matching_access()', which would
-        * end up in the fast-path for *all* checks, check_access() simply
-        * returns for all accesses of size 0.
-        */
-       if (WARN_ON(!matching_access(8, 8, 12, 0)))
-               return false;
-
-       return true;
-}
-
-static int __init kcsan_selftest(void)
-{
-       int passed = 0;
-       int total = 0;
-
-#define RUN_TEST(do_test)                                                      \
-       do {                                                                   \
-               ++total;                                                       \
-               if (do_test())                                                 \
-                       ++passed;                                              \
-               else                                                           \
-                       pr_err("KCSAN selftest: " #do_test " failed");         \
-       } while (0)
-
-       RUN_TEST(test_requires);
-       RUN_TEST(test_encode_decode);
-       RUN_TEST(test_matching_access);
-
-       pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total);
-       if (passed != total)
-               panic("KCSAN selftests failed");
-       return 0;
-}
-postcore_initcall(kcsan_selftest);
index 2e97feb..e87679a 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/jump_label.h>
+#include <linux/perf_event.h>
 
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
@@ -123,6 +124,7 @@ struct kprobe_insn_cache kprobe_insn_slots = {
        .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
        .alloc = alloc_insn_page,
        .free = free_insn_page,
+       .sym = KPROBE_INSN_PAGE_SYM,
        .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
        .insn_size = MAX_INSN_SIZE,
        .nr_garbage = 0,
@@ -188,6 +190,10 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
        kip->cache = c;
        list_add_rcu(&kip->list, &c->pages);
        slot = kip->insns;
+
+       /* Record the perf ksymbol register event after adding the page */
+       perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
+                          PAGE_SIZE, false, c->sym);
 out:
        mutex_unlock(&c->mutex);
        return slot;
@@ -206,6 +212,13 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
                 * next time somebody inserts a probe.
                 */
                if (!list_is_singular(&kip->list)) {
+                       /*
+                        * Record perf ksymbol unregister event before removing
+                        * the page.
+                        */
+                       perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
+                                          (unsigned long)kip->insns, PAGE_SIZE, true,
+                                          kip->cache->sym);
                        list_del_rcu(&kip->list);
                        synchronize_rcu();
                        kip->cache->free(kip->insns);
@@ -295,12 +308,34 @@ bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
        return ret;
 }
 
+int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
+                            unsigned long *value, char *type, char *sym)
+{
+       struct kprobe_insn_page *kip;
+       int ret = -ERANGE;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(kip, &c->pages, list) {
+               if ((*symnum)--)
+                       continue;
+               strlcpy(sym, c->sym, KSYM_NAME_LEN);
+               *type = 't';
+               *value = (unsigned long)kip->insns;
+               ret = 0;
+               break;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
 #ifdef CONFIG_OPTPROBES
 /* For optimized_kprobe buffer */
 struct kprobe_insn_cache kprobe_optinsn_slots = {
        .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
        .alloc = alloc_insn_page,
        .free = free_insn_page,
+       .sym = KPROBE_OPTINSN_PAGE_SYM,
        .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
        /* .insn_size is initialized later */
        .nr_garbage = 0,
@@ -563,8 +598,6 @@ static void kprobe_optimizer(struct work_struct *work)
        mutex_lock(&kprobe_mutex);
        cpus_read_lock();
        mutex_lock(&text_mutex);
-       /* Lock modules while optimizing kprobes */
-       mutex_lock(&module_mutex);
 
        /*
         * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -589,7 +622,6 @@ static void kprobe_optimizer(struct work_struct *work)
        /* Step 4: Free cleaned kprobes after quiesence period */
        do_free_cleaned_kprobes();
 
-       mutex_unlock(&module_mutex);
        mutex_unlock(&text_mutex);
        cpus_read_unlock();
 
@@ -2232,6 +2264,28 @@ static void kprobe_remove_ksym_blacklist(unsigned long entry)
        kprobe_remove_area_blacklist(entry, entry + 1);
 }
 
+int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
+                                  char *type, char *sym)
+{
+       return -ERANGE;
+}
+
+int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+                      char *sym)
+{
+#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
+       if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
+               return 0;
+#ifdef CONFIG_OPTPROBES
+       if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
+               return 0;
+#endif
+#endif
+       if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
+               return 0;
+       return -ERANGE;
+}
+
 int __init __weak arch_populate_kprobe_blacklist(void)
 {
        return 0;
index 132f84a..1d9e2fd 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
 #include <linux/numa.h>
+#include <linux/sched/isolation.h>
 #include <trace/events/sched.h>
 
 
@@ -383,7 +384,8 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
                 * The kernel thread should not inherit these properties.
                 */
                sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
-               set_cpus_allowed_ptr(task, cpu_all_mask);
+               set_cpus_allowed_ptr(task,
+                                    housekeeping_cpumask(HK_FLAG_KTHREAD));
        }
        kfree(create);
        return task;
@@ -608,7 +610,7 @@ int kthreadd(void *unused)
        /* Setup a clean context for our children to inherit. */
        set_task_comm(tsk, "kthreadd");
        ignore_signals(tsk);
-       set_cpus_allowed_ptr(tsk, cpu_all_mask);
+       set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_FLAG_KTHREAD));
        set_mems_allowed(node_states[N_MEMORY]);
 
        current->flags |= PF_NOFREEZE;
index 29a8de4..f361d75 100644 (file)
@@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task)
 
 static __always_inline void lockdep_recursion_finish(void)
 {
-       if (WARN_ON_ONCE(--current->lockdep_recursion))
+       if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
                current->lockdep_recursion = 0;
 }
 
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr,
        pr_warn("-----------------------------------------------------\n");
        pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
                curr->comm, task_pid_nr(curr),
-               curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+               lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
                curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
-               curr->hardirqs_enabled,
+               lockdep_hardirqs_enabled(),
                curr->softirqs_enabled);
        print_lock(next);
 
@@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 
        pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
                curr->comm, task_pid_nr(curr),
-               lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+               lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT,
                lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
-               lockdep_hardirqs_enabled(curr),
+               lockdep_hardirqs_enabled(),
                lockdep_softirqs_enabled(curr));
        print_lock(this);
 
@@ -3484,19 +3484,21 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
 
 void print_irqtrace_events(struct task_struct *curr)
 {
-       printk("irq event stamp: %u\n", curr->irq_events);
+       const struct irqtrace_events *trace = &curr->irqtrace;
+
+       printk("irq event stamp: %u\n", trace->irq_events);
        printk("hardirqs last  enabled at (%u): [<%px>] %pS\n",
-               curr->hardirq_enable_event, (void *)curr->hardirq_enable_ip,
-               (void *)curr->hardirq_enable_ip);
+               trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip,
+               (void *)trace->hardirq_enable_ip);
        printk("hardirqs last disabled at (%u): [<%px>] %pS\n",
-               curr->hardirq_disable_event, (void *)curr->hardirq_disable_ip,
-               (void *)curr->hardirq_disable_ip);
+               trace->hardirq_disable_event, (void *)trace->hardirq_disable_ip,
+               (void *)trace->hardirq_disable_ip);
        printk("softirqs last  enabled at (%u): [<%px>] %pS\n",
-               curr->softirq_enable_event, (void *)curr->softirq_enable_ip,
-               (void *)curr->softirq_enable_ip);
+               trace->softirq_enable_event, (void *)trace->softirq_enable_ip,
+               (void *)trace->softirq_enable_ip);
        printk("softirqs last disabled at (%u): [<%px>] %pS\n",
-               curr->softirq_disable_event, (void *)curr->softirq_disable_ip,
-               (void *)curr->softirq_disable_ip);
+               trace->softirq_disable_event, (void *)trace->softirq_disable_ip,
+               (void *)trace->softirq_disable_ip);
 }
 
 static int HARDIRQ_verbose(struct lock_class *class)
@@ -3646,10 +3648,19 @@ static void __trace_hardirqs_on_caller(void)
  */
 void lockdep_hardirqs_on_prepare(unsigned long ip)
 {
-       if (unlikely(!debug_locks || current->lockdep_recursion))
+       if (unlikely(!debug_locks))
+               return;
+
+       /*
+        * NMIs do not (and cannot) track lock dependencies, nothing to do.
+        */
+       if (unlikely(in_nmi()))
+               return;
+
+       if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
                return;
 
-       if (unlikely(current->hardirqs_enabled)) {
+       if (unlikely(lockdep_hardirqs_enabled())) {
                /*
                 * Neither irq nor preemption are disabled here
                 * so this is racy by nature but losing one hit
@@ -3677,7 +3688,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
         * Can't allow enabling interrupts while in an interrupt handler,
         * that's general bad form and such. Recursion, limited stack etc..
         */
-       if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+       if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context()))
                return;
 
        current->hardirq_chain_key = current->curr_chain_key;
@@ -3690,12 +3701,35 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_on_prepare);
 
 void noinstr lockdep_hardirqs_on(unsigned long ip)
 {
-       struct task_struct *curr = current;
+       struct irqtrace_events *trace = &current->irqtrace;
+
+       if (unlikely(!debug_locks))
+               return;
+
+       /*
+        * NMIs can happen in the middle of local_irq_{en,dis}able() where the
+        * tracking state and hardware state are out of sync.
+        *
+        * NMIs must save lockdep_hardirqs_enabled() to restore IRQ state from,
+        * and not rely on hardware state like normal interrupts.
+        */
+       if (unlikely(in_nmi())) {
+               if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
+                       return;
+
+               /*
+                * Skip:
+                *  - recursion check, because NMI can hit lockdep;
+                *  - hardware state check, because above;
+                *  - chain_key check, see lockdep_hardirqs_on_prepare().
+                */
+               goto skip_checks;
+       }
 
-       if (unlikely(!debug_locks || curr->lockdep_recursion))
+       if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
                return;
 
-       if (curr->hardirqs_enabled) {
+       if (lockdep_hardirqs_enabled()) {
                /*
                 * Neither irq nor preemption are disabled here
                 * so this is racy by nature but losing one hit
@@ -3720,10 +3754,11 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
        DEBUG_LOCKS_WARN_ON(current->hardirq_chain_key !=
                            current->curr_chain_key);
 
+skip_checks:
        /* we'll do an OFF -> ON transition: */
-       curr->hardirqs_enabled = 1;
-       curr->hardirq_enable_ip = ip;
-       curr->hardirq_enable_event = ++curr->irq_events;
+       this_cpu_write(hardirqs_enabled, 1);
+       trace->hardirq_enable_ip = ip;
+       trace->hardirq_enable_event = ++trace->irq_events;
        debug_atomic_inc(hardirqs_on_events);
 }
 EXPORT_SYMBOL_GPL(lockdep_hardirqs_on);
@@ -3733,9 +3768,18 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_on);
  */
 void noinstr lockdep_hardirqs_off(unsigned long ip)
 {
-       struct task_struct *curr = current;
+       if (unlikely(!debug_locks))
+               return;
 
-       if (unlikely(!debug_locks || curr->lockdep_recursion))
+       /*
+        * Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep;
+        * they will restore the software state. This ensures the software
+        * state is consistent inside NMIs as well.
+        */
+       if (in_nmi()) {
+               if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
+                       return;
+       } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
                return;
 
        /*
@@ -3745,13 +3789,15 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return;
 
-       if (curr->hardirqs_enabled) {
+       if (lockdep_hardirqs_enabled()) {
+               struct irqtrace_events *trace = &current->irqtrace;
+
                /*
                 * We have done an ON -> OFF transition:
                 */
-               curr->hardirqs_enabled = 0;
-               curr->hardirq_disable_ip = ip;
-               curr->hardirq_disable_event = ++curr->irq_events;
+               this_cpu_write(hardirqs_enabled, 0);
+               trace->hardirq_disable_ip = ip;
+               trace->hardirq_disable_event = ++trace->irq_events;
                debug_atomic_inc(hardirqs_off_events);
        } else {
                debug_atomic_inc(redundant_hardirqs_off);
@@ -3764,7 +3810,7 @@ EXPORT_SYMBOL_GPL(lockdep_hardirqs_off);
  */
 void lockdep_softirqs_on(unsigned long ip)
 {
-       struct task_struct *curr = current;
+       struct irqtrace_events *trace = &current->irqtrace;
 
        if (unlikely(!debug_locks || current->lockdep_recursion))
                return;
@@ -3776,7 +3822,7 @@ void lockdep_softirqs_on(unsigned long ip)
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return;
 
-       if (curr->softirqs_enabled) {
+       if (current->softirqs_enabled) {
                debug_atomic_inc(redundant_softirqs_on);
                return;
        }
@@ -3785,17 +3831,17 @@ void lockdep_softirqs_on(unsigned long ip)
        /*
         * We'll do an OFF -> ON transition:
         */
-       curr->softirqs_enabled = 1;
-       curr->softirq_enable_ip = ip;
-       curr->softirq_enable_event = ++curr->irq_events;
+       current->softirqs_enabled = 1;
+       trace->softirq_enable_ip = ip;
+       trace->softirq_enable_event = ++trace->irq_events;
        debug_atomic_inc(softirqs_on_events);
        /*
         * We are going to turn softirqs on, so set the
         * usage bit for all held locks, if hardirqs are
         * enabled too:
         */
-       if (curr->hardirqs_enabled)
-               mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
+       if (lockdep_hardirqs_enabled())
+               mark_held_locks(current, LOCK_ENABLED_SOFTIRQ);
        lockdep_recursion_finish();
 }
 
@@ -3804,8 +3850,6 @@ void lockdep_softirqs_on(unsigned long ip)
  */
 void lockdep_softirqs_off(unsigned long ip)
 {
-       struct task_struct *curr = current;
-
        if (unlikely(!debug_locks || current->lockdep_recursion))
                return;
 
@@ -3815,13 +3859,15 @@ void lockdep_softirqs_off(unsigned long ip)
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return;
 
-       if (curr->softirqs_enabled) {
+       if (current->softirqs_enabled) {
+               struct irqtrace_events *trace = &current->irqtrace;
+
                /*
                 * We have done an ON -> OFF transition:
                 */
-               curr->softirqs_enabled = 0;
-               curr->softirq_disable_ip = ip;
-               curr->softirq_disable_event = ++curr->irq_events;
+               current->softirqs_enabled = 0;
+               trace->softirq_disable_ip = ip;
+               trace->softirq_disable_event = ++trace->irq_events;
                debug_atomic_inc(softirqs_off_events);
                /*
                 * Whoops, we wanted softirqs off, so why aren't they?
@@ -3843,7 +3889,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
         */
        if (!hlock->trylock) {
                if (hlock->read) {
-                       if (curr->hardirq_context)
+                       if (lockdep_hardirq_context())
                                if (!mark_lock(curr, hlock,
                                                LOCK_USED_IN_HARDIRQ_READ))
                                        return 0;
@@ -3852,7 +3898,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
                                                LOCK_USED_IN_SOFTIRQ_READ))
                                        return 0;
                } else {
-                       if (curr->hardirq_context)
+                       if (lockdep_hardirq_context())
                                if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
                                        return 0;
                        if (curr->softirq_context)
@@ -3890,7 +3936,7 @@ lock_used:
 
 static inline unsigned int task_irq_context(struct task_struct *task)
 {
-       return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+       return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context() +
               LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
 }
 
@@ -3983,7 +4029,7 @@ static inline short task_wait_context(struct task_struct *curr)
         * Set appropriate wait type for the context; for IRQs we have to take
         * into account force_irqthread as that is implied by PREEMPT_RT.
         */
-       if (curr->hardirq_context) {
+       if (lockdep_hardirq_context()) {
                /*
                 * Check if force_irqthreads will run us threaded.
                 */
@@ -4826,11 +4872,11 @@ static void check_flags(unsigned long flags)
                return;
 
        if (irqs_disabled_flags(flags)) {
-               if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
+               if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) {
                        printk("possible reason: unannotated irqs-off.\n");
                }
        } else {
-               if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
+               if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled())) {
                        printk("possible reason: unannotated irqs-on.\n");
                }
        }
@@ -5851,9 +5897,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
        pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
               !rcu_lockdep_current_cpu_online()
                        ? "RCU used illegally from offline CPU!\n"
-                       : !rcu_is_watching()
-                               ? "RCU used illegally from idle CPU!\n"
-                               : "",
+                       : "",
               rcu_scheduler_active, debug_locks);
 
        /*
index 5efbfc6..8ff6f50 100644 (file)
@@ -631,13 +631,13 @@ static int lock_torture_writer(void *arg)
                cxt.cur_ops->writelock();
                if (WARN_ON_ONCE(lock_is_write_held))
                        lwsp->n_lock_fail++;
-               lock_is_write_held = 1;
+               lock_is_write_held = true;
                if (WARN_ON_ONCE(lock_is_read_held))
                        lwsp->n_lock_fail++; /* rare, but... */
 
                lwsp->n_lock_acquired++;
                cxt.cur_ops->write_delay(&rand);
-               lock_is_write_held = 0;
+               lock_is_write_held = false;
                cxt.cur_ops->writeunlock();
 
                stutter_wait("lock_torture_writer");
@@ -665,13 +665,13 @@ static int lock_torture_reader(void *arg)
                        schedule_timeout_uninterruptible(1);
 
                cxt.cur_ops->readlock();
-               lock_is_read_held = 1;
+               lock_is_read_held = true;
                if (WARN_ON_ONCE(lock_is_write_held))
                        lrsp->n_lock_fail++; /* rare, but... */
 
                lrsp->n_lock_acquired++;
                cxt.cur_ops->read_delay(&rand);
-               lock_is_read_held = 0;
+               lock_is_read_held = false;
                cxt.cur_ops->readunlock();
 
                stutter_wait("lock_torture_reader");
@@ -686,7 +686,7 @@ static int lock_torture_reader(void *arg)
 static void __torture_print_stats(char *page,
                                  struct lock_stress_stats *statp, bool write)
 {
-       bool fail = 0;
+       bool fail = false;
        int i, n_stress;
        long max = 0, min = statp ? statp[0].n_lock_acquired : 0;
        long long sum = 0;
@@ -904,7 +904,7 @@ static int __init lock_torture_init(void)
 
        /* Initialize the statistics so that each run gets its own numbers. */
        if (nwriters_stress) {
-               lock_is_write_held = 0;
+               lock_is_write_held = false;
                cxt.lwsa = kmalloc_array(cxt.nrealwriters_stress,
                                         sizeof(*cxt.lwsa),
                                         GFP_KERNEL);
@@ -935,7 +935,7 @@ static int __init lock_torture_init(void)
                }
 
                if (nreaders_stress) {
-                       lock_is_read_held = 0;
+                       lock_is_read_held = false;
                        cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress,
                                                 sizeof(*cxt.lrsa),
                                                 GFP_KERNEL);
index 1f77349..1de006e 100644 (file)
@@ -154,7 +154,11 @@ bool osq_lock(struct optimistic_spin_queue *lock)
         */
 
        for (;;) {
-               if (prev->next == node &&
+               /*
+                * cpu_relax() below implies a compiler barrier which would
+                * prevent this comparison being optimized away.
+                */
+               if (data_race(prev->next) == node &&
                    cmpxchg(&prev->next, node, NULL) == node)
                        break;
 
index 4373f7a..16cb894 100644 (file)
@@ -250,13 +250,11 @@ EXPORT_SYMBOL(padata_do_parallel);
 static struct padata_priv *padata_find_next(struct parallel_data *pd,
                                            bool remove_object)
 {
-       struct padata_parallel_queue *next_queue;
        struct padata_priv *padata;
        struct padata_list *reorder;
        int cpu = pd->cpu;
 
-       next_queue = per_cpu_ptr(pd->pqueue, cpu);
-       reorder = &next_queue->reorder;
+       reorder = per_cpu_ptr(pd->reorder_list, cpu);
 
        spin_lock(&reorder->lock);
        if (list_empty(&reorder->list)) {
@@ -291,7 +289,7 @@ static void padata_reorder(struct parallel_data *pd)
        int cb_cpu;
        struct padata_priv *padata;
        struct padata_serial_queue *squeue;
-       struct padata_parallel_queue *next_queue;
+       struct padata_list *reorder;
 
        /*
         * We need to ensure that only one cpu can work on dequeueing of
@@ -339,9 +337,8 @@ static void padata_reorder(struct parallel_data *pd)
         */
        smp_mb();
 
-       next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
-       if (!list_empty(&next_queue->reorder.list) &&
-           padata_find_next(pd, false))
+       reorder = per_cpu_ptr(pd->reorder_list, pd->cpu);
+       if (!list_empty(&reorder->list) && padata_find_next(pd, false))
                queue_work(pinst->serial_wq, &pd->reorder_work);
 }
 
@@ -401,17 +398,16 @@ void padata_do_serial(struct padata_priv *padata)
 {
        struct parallel_data *pd = padata->pd;
        int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
-       struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
-                                                          hashed_cpu);
+       struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
        struct padata_priv *cur;
 
-       spin_lock(&pqueue->reorder.lock);
+       spin_lock(&reorder->lock);
        /* Sort in ascending order of sequence number. */
-       list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
+       list_for_each_entry_reverse(cur, &reorder->list, list)
                if (cur->seq_nr < padata->seq_nr)
                        break;
        list_add(&padata->list, &cur->list);
-       spin_unlock(&pqueue->reorder.lock);
+       spin_unlock(&reorder->lock);
 
        /*
         * Ensure the addition to the reorder list is ordered correctly
@@ -441,28 +437,6 @@ static int padata_setup_cpumasks(struct padata_instance *pinst)
        return err;
 }
 
-static int pd_setup_cpumasks(struct parallel_data *pd,
-                            const struct cpumask *pcpumask,
-                            const struct cpumask *cbcpumask)
-{
-       int err = -ENOMEM;
-
-       if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
-               goto out;
-       if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
-               goto free_pcpu_mask;
-
-       cpumask_copy(pd->cpumask.pcpu, pcpumask);
-       cpumask_copy(pd->cpumask.cbcpu, cbcpumask);
-
-       return 0;
-
-free_pcpu_mask:
-       free_cpumask_var(pd->cpumask.pcpu);
-out:
-       return err;
-}
-
 static void __init padata_mt_helper(struct work_struct *w)
 {
        struct padata_work *pw = container_of(w, struct padata_work, pw_work);
@@ -575,17 +549,15 @@ static void padata_init_squeues(struct parallel_data *pd)
        }
 }
 
-/* Initialize all percpu queues used by parallel workers */
-static void padata_init_pqueues(struct parallel_data *pd)
+/* Initialize per-CPU reorder lists */
+static void padata_init_reorder_list(struct parallel_data *pd)
 {
        int cpu;
-       struct padata_parallel_queue *pqueue;
+       struct padata_list *list;
 
        for_each_cpu(cpu, pd->cpumask.pcpu) {
-               pqueue = per_cpu_ptr(pd->pqueue, cpu);
-
-               __padata_list_init(&pqueue->reorder);
-               atomic_set(&pqueue->num_obj, 0);
+               list = per_cpu_ptr(pd->reorder_list, cpu);
+               __padata_list_init(list);
        }
 }
 
@@ -593,30 +565,31 @@ static void padata_init_pqueues(struct parallel_data *pd)
 static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
 {
        struct padata_instance *pinst = ps->pinst;
-       const struct cpumask *cbcpumask;
-       const struct cpumask *pcpumask;
        struct parallel_data *pd;
 
-       cbcpumask = pinst->rcpumask.cbcpu;
-       pcpumask = pinst->rcpumask.pcpu;
-
        pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
        if (!pd)
                goto err;
 
-       pd->pqueue = alloc_percpu(struct padata_parallel_queue);
-       if (!pd->pqueue)
+       pd->reorder_list = alloc_percpu(struct padata_list);
+       if (!pd->reorder_list)
                goto err_free_pd;
 
        pd->squeue = alloc_percpu(struct padata_serial_queue);
        if (!pd->squeue)
-               goto err_free_pqueue;
+               goto err_free_reorder_list;
 
        pd->ps = ps;
-       if (pd_setup_cpumasks(pd, pcpumask, cbcpumask))
+
+       if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
                goto err_free_squeue;
+       if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
+               goto err_free_pcpu;
+
+       cpumask_and(pd->cpumask.pcpu, pinst->cpumask.pcpu, cpu_online_mask);
+       cpumask_and(pd->cpumask.cbcpu, pinst->cpumask.cbcpu, cpu_online_mask);
 
-       padata_init_pqueues(pd);
+       padata_init_reorder_list(pd);
        padata_init_squeues(pd);
        pd->seq_nr = -1;
        atomic_set(&pd->refcnt, 1);
@@ -626,10 +599,12 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
 
        return pd;
 
+err_free_pcpu:
+       free_cpumask_var(pd->cpumask.pcpu);
 err_free_squeue:
        free_percpu(pd->squeue);
-err_free_pqueue:
-       free_percpu(pd->pqueue);
+err_free_reorder_list:
+       free_percpu(pd->reorder_list);
 err_free_pd:
        kfree(pd);
 err:
@@ -640,7 +615,7 @@ static void padata_free_pd(struct parallel_data *pd)
 {
        free_cpumask_var(pd->cpumask.pcpu);
        free_cpumask_var(pd->cpumask.cbcpu);
-       free_percpu(pd->pqueue);
+       free_percpu(pd->reorder_list);
        free_percpu(pd->squeue);
        kfree(pd);
 }
@@ -682,12 +657,6 @@ static int padata_replace(struct padata_instance *pinst)
 
        pinst->flags |= PADATA_RESET;
 
-       cpumask_and(pinst->rcpumask.pcpu, pinst->cpumask.pcpu,
-                   cpu_online_mask);
-
-       cpumask_and(pinst->rcpumask.cbcpu, pinst->cpumask.cbcpu,
-                   cpu_online_mask);
-
        list_for_each_entry(ps, &pinst->pslist, list) {
                err = padata_replace_one(ps);
                if (err)
@@ -789,43 +758,6 @@ out:
 }
 EXPORT_SYMBOL(padata_set_cpumask);
 
-/**
- * padata_start - start the parallel processing
- *
- * @pinst: padata instance to start
- *
- * Return: 0 on success or negative error code
- */
-int padata_start(struct padata_instance *pinst)
-{
-       int err = 0;
-
-       mutex_lock(&pinst->lock);
-
-       if (pinst->flags & PADATA_INVALID)
-               err = -EINVAL;
-
-       __padata_start(pinst);
-
-       mutex_unlock(&pinst->lock);
-
-       return err;
-}
-EXPORT_SYMBOL(padata_start);
-
-/**
- * padata_stop - stop the parallel processing
- *
- * @pinst: padata instance to stop
- */
-void padata_stop(struct padata_instance *pinst)
-{
-       mutex_lock(&pinst->lock);
-       __padata_stop(pinst);
-       mutex_unlock(&pinst->lock);
-}
-EXPORT_SYMBOL(padata_stop);
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
@@ -907,9 +839,6 @@ static void __padata_free(struct padata_instance *pinst)
 
        WARN_ON(!list_empty(&pinst->pslist));
 
-       padata_stop(pinst);
-       free_cpumask_var(pinst->rcpumask.cbcpu);
-       free_cpumask_var(pinst->rcpumask.pcpu);
        free_cpumask_var(pinst->cpumask.pcpu);
        free_cpumask_var(pinst->cpumask.cbcpu);
        destroy_workqueue(pinst->serial_wq);
@@ -1044,18 +973,12 @@ static struct kobj_type padata_attr_type = {
 };
 
 /**
- * padata_alloc - allocate and initialize a padata instance and specify
- *                cpumasks for serial and parallel workers.
- *
+ * padata_alloc - allocate and initialize a padata instance
  * @name: used to identify the instance
- * @pcpumask: cpumask that will be used for padata parallelization
- * @cbcpumask: cpumask that will be used for padata serialization
  *
  * Return: new instance on success, NULL on error
  */
-static struct padata_instance *padata_alloc(const char *name,
-                                           const struct cpumask *pcpumask,
-                                           const struct cpumask *cbcpumask)
+struct padata_instance *padata_alloc(const char *name)
 {
        struct padata_instance *pinst;
 
@@ -1081,26 +1004,16 @@ static struct padata_instance *padata_alloc(const char *name,
                free_cpumask_var(pinst->cpumask.pcpu);
                goto err_free_serial_wq;
        }
-       if (!padata_validate_cpumask(pinst, pcpumask) ||
-           !padata_validate_cpumask(pinst, cbcpumask))
-               goto err_free_masks;
-
-       if (!alloc_cpumask_var(&pinst->rcpumask.pcpu, GFP_KERNEL))
-               goto err_free_masks;
-       if (!alloc_cpumask_var(&pinst->rcpumask.cbcpu, GFP_KERNEL))
-               goto err_free_rcpumask_pcpu;
 
        INIT_LIST_HEAD(&pinst->pslist);
 
-       cpumask_copy(pinst->cpumask.pcpu, pcpumask);
-       cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
-       cpumask_and(pinst->rcpumask.pcpu, pcpumask, cpu_online_mask);
-       cpumask_and(pinst->rcpumask.cbcpu, cbcpumask, cpu_online_mask);
+       cpumask_copy(pinst->cpumask.pcpu, cpu_possible_mask);
+       cpumask_copy(pinst->cpumask.cbcpu, cpu_possible_mask);
 
        if (padata_setup_cpumasks(pinst))
-               goto err_free_rcpumask_cbcpu;
+               goto err_free_masks;
 
-       pinst->flags = 0;
+       __padata_start(pinst);
 
        kobject_init(&pinst->kobj, &padata_attr_type);
        mutex_init(&pinst->lock);
@@ -1116,10 +1029,6 @@ static struct padata_instance *padata_alloc(const char *name,
 
        return pinst;
 
-err_free_rcpumask_cbcpu:
-       free_cpumask_var(pinst->rcpumask.cbcpu);
-err_free_rcpumask_pcpu:
-       free_cpumask_var(pinst->rcpumask.pcpu);
 err_free_masks:
        free_cpumask_var(pinst->cpumask.pcpu);
        free_cpumask_var(pinst->cpumask.cbcpu);
@@ -1133,21 +1042,7 @@ err_free_inst:
 err:
        return NULL;
 }
-
-/**
- * padata_alloc_possible - Allocate and initialize padata instance.
- *                         Use the cpu_possible_mask for serial and
- *                         parallel workers.
- *
- * @name: used to identify the instance
- *
- * Return: new instance on success, NULL on error
- */
-struct padata_instance *padata_alloc_possible(const char *name)
-{
-       return padata_alloc(name, cpu_possible_mask, cpu_possible_mask);
-}
-EXPORT_SYMBOL(padata_alloc_possible);
+EXPORT_SYMBOL(padata_alloc);
 
 /**
  * padata_free - free a padata instance
index 452feae..3cf6132 100644 (file)
@@ -61,6 +61,25 @@ config RCU_TORTURE_TEST
          Say M if you want the RCU torture tests to build as a module.
          Say N if you are unsure.
 
+config RCU_REF_SCALE_TEST
+       tristate "Scalability tests for read-side synchronization (RCU and others)"
+       depends on DEBUG_KERNEL
+       select TORTURE_TEST
+       select SRCU
+       select TASKS_RCU
+       select TASKS_RUDE_RCU
+       select TASKS_TRACE_RCU
+       default n
+       help
+         This option provides a kernel module that runs performance tests
+         useful comparing RCU with various read-side synchronization mechanisms.
+         The kernel module may be built after the fact on the running kernel to be
+         tested, if desired.
+
+         Say Y here if you want these performance tests built into the kernel.
+         Say M if you want to build it as a module instead.
+         Say N if you are unsure.
+
 config RCU_CPU_STALL_TIMEOUT
        int "RCU CPU stall timeout in seconds"
        depends on RCU_STALL_COMMON
index f91f2c2..95f5117 100644 (file)
@@ -12,6 +12,7 @@ obj-$(CONFIG_TREE_SRCU) += srcutree.o
 obj-$(CONFIG_TINY_SRCU) += srcutiny.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
+obj-$(CONFIG_RCU_REF_SCALE_TEST) += refscale.o
 obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_TINY_RCU) += tiny.o
 obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o
index 9eb39c2..ec903d7 100644 (file)
@@ -69,6 +69,11 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
  *     value specified by nr_cpus for a read-only test.
  *
  * Various other use cases may of course be specified.
+ *
+ * Note that this test's readers are intended only as a test load for
+ * the writers.  The reader performance statistics will be overly
+ * pessimistic due to the per-critical-section interrupt disabling,
+ * test-end checks, and the pair of calls through pointers.
  */
 
 #ifdef MODULE
@@ -309,8 +314,10 @@ static void rcu_perf_wait_shutdown(void)
 }
 
 /*
- * RCU perf reader kthread.  Repeatedly does empty RCU read-side
- * critical section, minimizing update-side interference.
+ * RCU perf reader kthread.  Repeatedly does empty RCU read-side critical
+ * section, minimizing update-side interference.  However, the point of
+ * this test is not to evaluate reader performance, but instead to serve
+ * as a test load for update-side performance testing.
  */
 static int
 rcu_perf_reader(void *arg)
@@ -576,11 +583,8 @@ static int compute_real(int n)
 static int
 rcu_perf_shutdown(void *arg)
 {
-       do {
-               wait_event(shutdown_wq,
-                          atomic_read(&n_rcu_perf_writer_finished) >=
-                          nrealwriters);
-       } while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters);
+       wait_event(shutdown_wq,
+                  atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters);
        smp_mb(); /* Wake before output. */
        rcu_perf_cleanup();
        kernel_power_off();
@@ -693,11 +697,8 @@ kfree_perf_cleanup(void)
 static int
 kfree_perf_shutdown(void *arg)
 {
-       do {
-               wait_event(shutdown_wq,
-                          atomic_read(&n_kfree_perf_thread_ended) >=
-                          kfree_nrealthreads);
-       } while (atomic_read(&n_kfree_perf_thread_ended) < kfree_nrealthreads);
+       wait_event(shutdown_wq,
+                  atomic_read(&n_kfree_perf_thread_ended) >= kfree_nrealthreads);
 
        smp_mb(); /* Wake before output. */
 
index efb792e..d0d2653 100644 (file)
@@ -7,7 +7,7 @@
  * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
  *       Josh Triplett <josh@joshtriplett.org>
  *
- * See also:  Documentation/RCU/torture.txt
+ * See also:  Documentation/RCU/torture.rst
  */
 
 #define pr_fmt(fmt) fmt
@@ -109,6 +109,10 @@ torture_param(int, object_debug, 0,
 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
 torture_param(int, onoff_interval, 0,
             "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(int, read_exit_delay, 13,
+             "Delay between read-then-exit episodes (s)");
+torture_param(int, read_exit_burst, 16,
+             "# of read-then-exit bursts per episode, zero to disable");
 torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles");
 torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
 torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
@@ -146,6 +150,7 @@ static struct task_struct *stall_task;
 static struct task_struct *fwd_prog_task;
 static struct task_struct **barrier_cbs_tasks;
 static struct task_struct *barrier_task;
+static struct task_struct *read_exit_task;
 
 #define RCU_TORTURE_PIPE_LEN 10
 
@@ -177,6 +182,7 @@ static long n_rcu_torture_boosts;
 static atomic_long_t n_rcu_torture_timers;
 static long n_barrier_attempts;
 static long n_barrier_successes; /* did rcu_barrier test succeed? */
+static unsigned long n_read_exits;
 static struct list_head rcu_torture_removed;
 static unsigned long shutdown_jiffies;
 
@@ -1166,6 +1172,7 @@ rcu_torture_writer(void *arg)
                                        WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
                                }
        } while (!torture_must_stop());
+       rcu_torture_current = NULL;  // Let stats task know that we are done.
        /* Reset expediting back to unexpedited. */
        if (expediting > 0)
                expediting = -expediting;
@@ -1370,6 +1377,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
        struct rt_read_seg *rtrsp1;
        unsigned long long ts;
 
+       WARN_ON_ONCE(!rcu_is_watching());
        newstate = rcutorture_extend_mask(readstate, trsp);
        rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++);
        started = cur_ops->get_gp_seq();
@@ -1539,10 +1547,11 @@ rcu_torture_stats_print(void)
                n_rcu_torture_boosts,
                atomic_long_read(&n_rcu_torture_timers));
        torture_onoff_stats();
-       pr_cont("barrier: %ld/%ld:%ld\n",
+       pr_cont("barrier: %ld/%ld:%ld ",
                data_race(n_barrier_successes),
                data_race(n_barrier_attempts),
                data_race(n_rcu_torture_barrier_error));
+       pr_cont("read-exits: %ld\n", data_race(n_read_exits));
 
        pr_alert("%s%s ", torture_type, TORTURE_FLAG);
        if (atomic_read(&n_rcu_torture_mberror) ||
@@ -1634,7 +1643,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
                 "stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
                 "stall_cpu_block=%d "
                 "n_barrier_cbs=%d "
-                "onoff_interval=%d onoff_holdoff=%d\n",
+                "onoff_interval=%d onoff_holdoff=%d "
+                "read_exit_delay=%d read_exit_burst=%d\n",
                 torture_type, tag, nrealreaders, nfakewriters,
                 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
                 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
@@ -1643,7 +1653,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
                 stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
                 stall_cpu_block,
                 n_barrier_cbs,
-                onoff_interval, onoff_holdoff);
+                onoff_interval, onoff_holdoff,
+                read_exit_delay, read_exit_burst);
 }
 
 static int rcutorture_booster_cleanup(unsigned int cpu)
@@ -2175,7 +2186,7 @@ static void rcu_torture_barrier1cb(void *rcu_void)
 static int rcu_torture_barrier_cbs(void *arg)
 {
        long myid = (long)arg;
-       bool lastphase = 0;
+       bool lastphase = false;
        bool newphase;
        struct rcu_head rcu;
 
@@ -2338,6 +2349,99 @@ static bool rcu_torture_can_boost(void)
        return true;
 }
 
+static bool read_exit_child_stop;
+static bool read_exit_child_stopped;
+static wait_queue_head_t read_exit_wq;
+
+// Child kthread which just does an rcutorture reader and exits.
+static int rcu_torture_read_exit_child(void *trsp_in)
+{
+       struct torture_random_state *trsp = trsp_in;
+
+       set_user_nice(current, MAX_NICE);
+       // Minimize time between reading and exiting.
+       while (!kthread_should_stop())
+               schedule_timeout_uninterruptible(1);
+       (void)rcu_torture_one_read(trsp);
+       return 0;
+}
+
+// Parent kthread which creates and destroys read-exit child kthreads.
+static int rcu_torture_read_exit(void *unused)
+{
+       int count = 0;
+       bool errexit = false;
+       int i;
+       struct task_struct *tsp;
+       DEFINE_TORTURE_RANDOM(trs);
+
+       // Allocate and initialize.
+       set_user_nice(current, MAX_NICE);
+       VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of test");
+
+       // Each pass through this loop does one read-exit episode.
+       do {
+               if (++count > read_exit_burst) {
+                       VERBOSE_TOROUT_STRING("rcu_torture_read_exit: End of episode");
+                       rcu_barrier(); // Wait for task_struct free, avoid OOM.
+                       for (i = 0; i < read_exit_delay; i++) {
+                               schedule_timeout_uninterruptible(HZ);
+                               if (READ_ONCE(read_exit_child_stop))
+                                       break;
+                       }
+                       if (!READ_ONCE(read_exit_child_stop))
+                               VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of episode");
+                       count = 0;
+               }
+               if (READ_ONCE(read_exit_child_stop))
+                       break;
+               // Spawn child.
+               tsp = kthread_run(rcu_torture_read_exit_child,
+                                    &trs, "%s",
+                                    "rcu_torture_read_exit_child");
+               if (IS_ERR(tsp)) {
+                       VERBOSE_TOROUT_ERRSTRING("out of memory");
+                       errexit = true;
+                       tsp = NULL;
+                       break;
+               }
+               cond_resched();
+               kthread_stop(tsp);
+               n_read_exits ++;
+               stutter_wait("rcu_torture_read_exit");
+       } while (!errexit && !READ_ONCE(read_exit_child_stop));
+
+       // Clean up and exit.
+       smp_store_release(&read_exit_child_stopped, true); // After reaping.
+       smp_mb(); // Store before wakeup.
+       wake_up(&read_exit_wq);
+       while (!torture_must_stop())
+               schedule_timeout_uninterruptible(1);
+       torture_kthread_stopping("rcu_torture_read_exit");
+       return 0;
+}
+
+static int rcu_torture_read_exit_init(void)
+{
+       if (read_exit_burst <= 0)
+               return -EINVAL;
+       init_waitqueue_head(&read_exit_wq);
+       read_exit_child_stop = false;
+       read_exit_child_stopped = false;
+       return torture_create_kthread(rcu_torture_read_exit, NULL,
+                                     read_exit_task);
+}
+
+static void rcu_torture_read_exit_cleanup(void)
+{
+       if (!read_exit_task)
+               return;
+       WRITE_ONCE(read_exit_child_stop, true);
+       smp_mb(); // Above write before wait.
+       wait_event(read_exit_wq, smp_load_acquire(&read_exit_child_stopped));
+       torture_stop_kthread(rcutorture_read_exit, read_exit_task);
+}
+
 static enum cpuhp_state rcutor_hp;
 
 static void
@@ -2359,6 +2463,7 @@ rcu_torture_cleanup(void)
        }
 
        show_rcu_gp_kthreads();
+       rcu_torture_read_exit_cleanup();
        rcu_torture_barrier_cleanup();
        torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
        torture_stop_kthread(rcu_torture_stall, stall_task);
@@ -2370,7 +2475,6 @@ rcu_torture_cleanup(void)
                                             reader_tasks[i]);
                kfree(reader_tasks);
        }
-       rcu_torture_current = NULL;
 
        if (fakewriter_tasks) {
                for (i = 0; i < nfakewriters; i++) {
@@ -2680,6 +2784,9 @@ rcu_torture_init(void)
        if (firsterr)
                goto unwind;
        firsterr = rcu_torture_barrier_init();
+       if (firsterr)
+               goto unwind;
+       firsterr = rcu_torture_read_exit_init();
        if (firsterr)
                goto unwind;
        if (object_debug)
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
new file mode 100644 (file)
index 0000000..d9291f8
--- /dev/null
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Scalability test comparing RCU vs other mechanisms
+// for acquiring references on objects.
+//
+// Copyright (C) Google, 2020.
+//
+// Author: Joel Fernandes <joel@joelfernandes.org>
+
+#define pr_fmt(fmt) fmt
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rcupdate_trace.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/torture.h>
+#include <linux/types.h>
+
+#include "rcu.h"
+
+#define SCALE_FLAG "-ref-scale: "
+
+#define SCALEOUT(s, x...) \
+       pr_alert("%s" SCALE_FLAG s, scale_type, ## x)
+
+#define VERBOSE_SCALEOUT(s, x...) \
+       do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0)
+
+#define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \
+       do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
+
+static char *scale_type = "rcu";
+module_param(scale_type, charp, 0444);
+MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
+
+torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
+
+// Wait until there are multiple CPUs before starting test.
+torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
+             "Holdoff time before test start (s)");
+// Number of loops per experiment, all readers execute operations concurrently.
+torture_param(long, loops, 10000, "Number of loops per experiment.");
+// Number of readers, with -1 defaulting to about 75% of the CPUs.
+torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
+// Number of runs.
+torture_param(int, nruns, 30, "Number of experiments to run.");
+// Reader delay in nanoseconds, 0 for no delay.
+torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
+
+#ifdef MODULE
+# define REFSCALE_SHUTDOWN 0
+#else
+# define REFSCALE_SHUTDOWN 1
+#endif
+
+torture_param(bool, shutdown, REFSCALE_SHUTDOWN,
+             "Shutdown at end of scalability tests.");
+
+struct reader_task {
+       struct task_struct *task;
+       int start_reader;
+       wait_queue_head_t wq;
+       u64 last_duration_ns;
+};
+
+static struct task_struct *shutdown_task;
+static wait_queue_head_t shutdown_wq;
+
+static struct task_struct *main_task;
+static wait_queue_head_t main_wq;
+static int shutdown_start;
+
+static struct reader_task *reader_tasks;
+
+// Number of readers that are part of the current experiment.
+static atomic_t nreaders_exp;
+
+// Use to wait for all threads to start.
+static atomic_t n_init;
+static atomic_t n_started;
+static atomic_t n_warmedup;
+static atomic_t n_cooleddown;
+
+// Track which experiment is currently running.
+static int exp_idx;
+
+// Operations vector for selecting different types of tests.
+struct ref_scale_ops {
+       void (*init)(void);
+       void (*cleanup)(void);
+       void (*readsection)(const int nloops);
+       void (*delaysection)(const int nloops, const int udl, const int ndl);
+       const char *name;
+};
+
+static struct ref_scale_ops *cur_ops;
+
+static void un_delay(const int udl, const int ndl)
+{
+       if (udl)
+               udelay(udl);
+       if (ndl)
+               ndelay(ndl);
+}
+
+static void ref_rcu_read_section(const int nloops)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               rcu_read_lock();
+               rcu_read_unlock();
+       }
+}
+
+static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               rcu_read_lock();
+               un_delay(udl, ndl);
+               rcu_read_unlock();
+       }
+}
+
+static void rcu_sync_scale_init(void)
+{
+}
+
+static struct ref_scale_ops rcu_ops = {
+       .init           = rcu_sync_scale_init,
+       .readsection    = ref_rcu_read_section,
+       .delaysection   = ref_rcu_delay_section,
+       .name           = "rcu"
+};
+
+// Definitions for SRCU ref scale testing.
+DEFINE_STATIC_SRCU(srcu_refctl_scale);
+static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale;
+
+static void srcu_ref_scale_read_section(const int nloops)
+{
+       int i;
+       int idx;
+
+       for (i = nloops; i >= 0; i--) {
+               idx = srcu_read_lock(srcu_ctlp);
+               srcu_read_unlock(srcu_ctlp, idx);
+       }
+}
+
+static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+       int idx;
+
+       for (i = nloops; i >= 0; i--) {
+               idx = srcu_read_lock(srcu_ctlp);
+               un_delay(udl, ndl);
+               srcu_read_unlock(srcu_ctlp, idx);
+       }
+}
+
+static struct ref_scale_ops srcu_ops = {
+       .init           = rcu_sync_scale_init,
+       .readsection    = srcu_ref_scale_read_section,
+       .delaysection   = srcu_ref_scale_delay_section,
+       .name           = "srcu"
+};
+
+// Definitions for RCU Tasks ref scale testing: Empty read markers.
+// These definitions also work for RCU Rude readers.
+static void rcu_tasks_ref_scale_read_section(const int nloops)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--)
+               continue;
+}
+
+static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--)
+               un_delay(udl, ndl);
+}
+
+static struct ref_scale_ops rcu_tasks_ops = {
+       .init           = rcu_sync_scale_init,
+       .readsection    = rcu_tasks_ref_scale_read_section,
+       .delaysection   = rcu_tasks_ref_scale_delay_section,
+       .name           = "rcu-tasks"
+};
+
+// Definitions for RCU Tasks Trace ref scale testing.
+static void rcu_trace_ref_scale_read_section(const int nloops)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               rcu_read_lock_trace();
+               rcu_read_unlock_trace();
+       }
+}
+
+static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               rcu_read_lock_trace();
+               un_delay(udl, ndl);
+               rcu_read_unlock_trace();
+       }
+}
+
+static struct ref_scale_ops rcu_trace_ops = {
+       .init           = rcu_sync_scale_init,
+       .readsection    = rcu_trace_ref_scale_read_section,
+       .delaysection   = rcu_trace_ref_scale_delay_section,
+       .name           = "rcu-trace"
+};
+
+// Definitions for reference count
+static atomic_t refcnt;
+
+static void ref_refcnt_section(const int nloops)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               atomic_inc(&refcnt);
+               atomic_dec(&refcnt);
+       }
+}
+
+static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               atomic_inc(&refcnt);
+               un_delay(udl, ndl);
+               atomic_dec(&refcnt);
+       }
+}
+
+static struct ref_scale_ops refcnt_ops = {
+       .init           = rcu_sync_scale_init,
+       .readsection    = ref_refcnt_section,
+       .delaysection   = ref_refcnt_delay_section,
+       .name           = "refcnt"
+};
+
+// Definitions for rwlock
+static rwlock_t test_rwlock;
+
+static void ref_rwlock_init(void)
+{
+       rwlock_init(&test_rwlock);
+}
+
+static void ref_rwlock_section(const int nloops)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               read_lock(&test_rwlock);
+               read_unlock(&test_rwlock);
+       }
+}
+
+static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               read_lock(&test_rwlock);
+               un_delay(udl, ndl);
+               read_unlock(&test_rwlock);
+       }
+}
+
+static struct ref_scale_ops rwlock_ops = {
+       .init           = ref_rwlock_init,
+       .readsection    = ref_rwlock_section,
+       .delaysection   = ref_rwlock_delay_section,
+       .name           = "rwlock"
+};
+
+// Definitions for rwsem
+static struct rw_semaphore test_rwsem;
+
+static void ref_rwsem_init(void)
+{
+       init_rwsem(&test_rwsem);
+}
+
+static void ref_rwsem_section(const int nloops)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               down_read(&test_rwsem);
+               up_read(&test_rwsem);
+       }
+}
+
+static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl)
+{
+       int i;
+
+       for (i = nloops; i >= 0; i--) {
+               down_read(&test_rwsem);
+               un_delay(udl, ndl);
+               up_read(&test_rwsem);
+       }
+}
+
+static struct ref_scale_ops rwsem_ops = {
+       .init           = ref_rwsem_init,
+       .readsection    = ref_rwsem_section,
+       .delaysection   = ref_rwsem_delay_section,
+       .name           = "rwsem"
+};
+
+static void rcu_scale_one_reader(void)
+{
+       if (readdelay <= 0)
+               cur_ops->readsection(loops);
+       else
+               cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000);
+}
+
+// Reader kthread.  Repeatedly does empty RCU read-side
+// critical section, minimizing update-side interference.
+static int
+ref_scale_reader(void *arg)
+{
+       unsigned long flags;
+       long me = (long)arg;
+       struct reader_task *rt = &(reader_tasks[me]);
+       u64 start;
+       s64 duration;
+
+       VERBOSE_SCALEOUT("ref_scale_reader %ld: task started", me);
+       set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+       set_user_nice(current, MAX_NICE);
+       atomic_inc(&n_init);
+       if (holdoff)
+               schedule_timeout_interruptible(holdoff * HZ);
+repeat:
+       VERBOSE_SCALEOUT("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, smp_processor_id());
+
+       // Wait for signal that this reader can start.
+       wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) ||
+                          torture_must_stop());
+
+       if (torture_must_stop())
+               goto end;
+
+       // Make sure that the CPU is affinitized appropriately during testing.
+       WARN_ON_ONCE(smp_processor_id() != me);
+
+       WRITE_ONCE(rt->start_reader, 0);
+       if (!atomic_dec_return(&n_started))
+               while (atomic_read_acquire(&n_started))
+                       cpu_relax();
+
+       VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d started", me, exp_idx);
+
+
+       // To reduce noise, do an initial cache-warming invocation, check
+       // in, and then keep warming until everyone has checked in.
+       rcu_scale_one_reader();
+       if (!atomic_dec_return(&n_warmedup))
+               while (atomic_read_acquire(&n_warmedup))
+                       rcu_scale_one_reader();
+       // Also keep interrupts disabled.  This also has the effect
+       // of preventing entries into slow path for rcu_read_unlock().
+       local_irq_save(flags);
+       start = ktime_get_mono_fast_ns();
+
+       rcu_scale_one_reader();
+
+       duration = ktime_get_mono_fast_ns() - start;
+       local_irq_restore(flags);
+
+       rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
+       // To reduce runtime-skew noise, do maintain-load invocations until
+       // everyone is done.
+       if (!atomic_dec_return(&n_cooleddown))
+               while (atomic_read_acquire(&n_cooleddown))
+                       rcu_scale_one_reader();
+
+       if (atomic_dec_and_test(&nreaders_exp))
+               wake_up(&main_wq);
+
+       VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)",
+                       me, exp_idx, atomic_read(&nreaders_exp));
+
+       if (!torture_must_stop())
+               goto repeat;
+end:
+       torture_kthread_stopping("ref_scale_reader");
+       return 0;
+}
+
+static void reset_readers(void)
+{
+       int i;
+       struct reader_task *rt;
+
+       for (i = 0; i < nreaders; i++) {
+               rt = &(reader_tasks[i]);
+
+               rt->last_duration_ns = 0;
+       }
+}
+
+// Print the results of each reader and return the sum of all their durations.
+static u64 process_durations(int n)
+{
+       int i;
+       struct reader_task *rt;
+       char buf1[64];
+       char *buf;
+       u64 sum = 0;
+
+       buf = kmalloc(128 + nreaders * 32, GFP_KERNEL);
+       if (!buf)
+               return 0;
+       buf[0] = 0;
+       sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
+               exp_idx);
+
+       for (i = 0; i < n && !torture_must_stop(); i++) {
+               rt = &(reader_tasks[i]);
+               sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns);
+
+               if (i % 5 == 0)
+                       strcat(buf, "\n");
+               strcat(buf, buf1);
+
+               sum += rt->last_duration_ns;
+       }
+       strcat(buf, "\n");
+
+       SCALEOUT("%s\n", buf);
+
+       kfree(buf);
+       return sum;
+}
+
+// The main_func is the main orchestrator, it performs a bunch of
+// experiments.  For every experiment, it orders all the readers
+// involved to start and waits for them to finish the experiment. It
+// then reads their timestamps and starts the next experiment. Each
+// experiment progresses from 1 concurrent reader to N of them at which
+// point all the timestamps are printed.
+static int main_func(void *arg)
+{
+       bool errexit = false;
+       int exp, r;
+       char buf1[64];
+       char *buf;
+       u64 *result_avg;
+
+       set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids));
+       set_user_nice(current, MAX_NICE);
+
+       VERBOSE_SCALEOUT("main_func task started");
+       result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
+       buf = kzalloc(64 + nruns * 32, GFP_KERNEL);
+       if (!result_avg || !buf) {
+               VERBOSE_SCALEOUT_ERRSTRING("out of memory");
+               errexit = true;
+       }
+       if (holdoff)
+               schedule_timeout_interruptible(holdoff * HZ);
+
+       // Wait for all threads to start.
+       atomic_inc(&n_init);
+       while (atomic_read(&n_init) < nreaders + 1)
+               schedule_timeout_uninterruptible(1);
+
+       // Start exp readers up per experiment
+       for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
+               if (errexit)
+                       break;
+               if (torture_must_stop())
+                       goto end;
+
+               reset_readers();
+               atomic_set(&nreaders_exp, nreaders);
+               atomic_set(&n_started, nreaders);
+               atomic_set(&n_warmedup, nreaders);
+               atomic_set(&n_cooleddown, nreaders);
+
+               exp_idx = exp;
+
+               for (r = 0; r < nreaders; r++) {
+                       smp_store_release(&reader_tasks[r].start_reader, 1);
+                       wake_up(&reader_tasks[r].wq);
+               }
+
+               VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers",
+                               nreaders);
+
+               wait_event(main_wq,
+                          !atomic_read(&nreaders_exp) || torture_must_stop());
+
+               VERBOSE_SCALEOUT("main_func: experiment ended");
+
+               if (torture_must_stop())
+                       goto end;
+
+               result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops);
+       }
+
+       // Print the average of all experiments
+       SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
+
+       buf[0] = 0;
+       strcat(buf, "\n");
+       strcat(buf, "Runs\tTime(ns)\n");
+
+       for (exp = 0; exp < nruns; exp++) {
+               u64 avg;
+               u32 rem;
+
+               if (errexit)
+                       break;
+               avg = div_u64_rem(result_avg[exp], 1000, &rem);
+               sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem);
+               strcat(buf, buf1);
+       }
+
+       if (!errexit)
+               SCALEOUT("%s", buf);
+
+       // This will shutdown everything including us.
+       if (shutdown) {
+               shutdown_start = 1;
+               wake_up(&shutdown_wq);
+       }
+
+       // Wait for torture to stop us
+       while (!torture_must_stop())
+               schedule_timeout_uninterruptible(1);
+
+end:
+       torture_kthread_stopping("main_func");
+       kfree(result_avg);
+       kfree(buf);
+       return 0;
+}
+
+static void
+ref_scale_print_module_parms(struct ref_scale_ops *cur_ops, const char *tag)
+{
+       pr_alert("%s" SCALE_FLAG
+                "--- %s:  verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag,
+                verbose, shutdown, holdoff, loops, nreaders, nruns, readdelay);
+}
+
+static void
+ref_scale_cleanup(void)
+{
+       int i;
+
+       if (torture_cleanup_begin())
+               return;
+
+       if (!cur_ops) {
+               torture_cleanup_end();
+               return;
+       }
+
+       if (reader_tasks) {
+               for (i = 0; i < nreaders; i++)
+                       torture_stop_kthread("ref_scale_reader",
+                                            reader_tasks[i].task);
+       }
+       kfree(reader_tasks);
+
+       torture_stop_kthread("main_task", main_task);
+       kfree(main_task);
+
+       // Do scale-type-specific cleanup operations.
+       if (cur_ops->cleanup != NULL)
+               cur_ops->cleanup();
+
+       torture_cleanup_end();
+}
+
+// Shutdown kthread.  Just waits to be awakened, then shuts down system.
+static int
+ref_scale_shutdown(void *arg)
+{
+       wait_event(shutdown_wq, shutdown_start);
+
+       smp_mb(); // Wake before output.
+       ref_scale_cleanup();
+       kernel_power_off();
+
+       return -EINVAL;
+}
+
+static int __init
+ref_scale_init(void)
+{
+       long i;
+       int firsterr = 0;
+       static struct ref_scale_ops *scale_ops[] = {
+               &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops,
+               &refcnt_ops, &rwlock_ops, &rwsem_ops,
+       };
+
+       if (!torture_init_begin(scale_type, verbose))
+               return -EBUSY;
+
+       for (i = 0; i < ARRAY_SIZE(scale_ops); i++) {
+               cur_ops = scale_ops[i];
+               if (strcmp(scale_type, cur_ops->name) == 0)
+                       break;
+       }
+       if (i == ARRAY_SIZE(scale_ops)) {
+               pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type);
+               pr_alert("rcu-scale types:");
+               for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
+                       pr_cont(" %s", scale_ops[i]->name);
+               pr_cont("\n");
+               WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST));
+               firsterr = -EINVAL;
+               cur_ops = NULL;
+               goto unwind;
+       }
+       if (cur_ops->init)
+               cur_ops->init();
+
+       ref_scale_print_module_parms(cur_ops, "Start of test");
+
+       // Shutdown task
+       if (shutdown) {
+               init_waitqueue_head(&shutdown_wq);
+               firsterr = torture_create_kthread(ref_scale_shutdown, NULL,
+                                                 shutdown_task);
+               if (firsterr)
+                       goto unwind;
+               schedule_timeout_uninterruptible(1);
+       }
+
+       // Reader tasks (default to ~75% of online CPUs).
+       if (nreaders < 0)
+               nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
+       reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
+                              GFP_KERNEL);
+       if (!reader_tasks) {
+               VERBOSE_SCALEOUT_ERRSTRING("out of memory");
+               firsterr = -ENOMEM;
+               goto unwind;
+       }
+
+       VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders);
+
+       for (i = 0; i < nreaders; i++) {
+               firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
+                                                 reader_tasks[i].task);
+               if (firsterr)
+                       goto unwind;
+
+               init_waitqueue_head(&(reader_tasks[i].wq));
+       }
+
+       // Main Task
+       init_waitqueue_head(&main_wq);
+       firsterr = torture_create_kthread(main_func, NULL, main_task);
+       if (firsterr)
+               goto unwind;
+
+       torture_init_end();
+       return 0;
+
+unwind:
+       torture_init_end();
+       ref_scale_cleanup();
+       return firsterr;
+}
+
+module_init(ref_scale_init);
+module_exit(ref_scale_cleanup);
index 6d3ef70..c100acf 100644 (file)
@@ -766,7 +766,7 @@ static void srcu_flip(struct srcu_struct *ssp)
  * it, if this function was preempted for enough time for the counters
  * to wrap, it really doesn't matter whether or not we expedite the grace
  * period.  The extra overhead of a needlessly expedited grace period is
- * negligible when amoritized over that time period, and the extra latency
+ * negligible when amortized over that time period, and the extra latency
  * of a needlessly non-expedited grace period is similarly negligible.
  */
 static bool srcu_might_be_idle(struct srcu_struct *ssp)
@@ -777,14 +777,15 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)
        unsigned long t;
        unsigned long tlast;
 
+       check_init_srcu_struct(ssp);
        /* If the local srcu_data structure has callbacks, not idle.  */
-       local_irq_save(flags);
-       sdp = this_cpu_ptr(ssp->sda);
+       sdp = raw_cpu_ptr(ssp->sda);
+       spin_lock_irqsave_rcu_node(sdp, flags);
        if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
-               local_irq_restore(flags);
+               spin_unlock_irqrestore_rcu_node(sdp, flags);
                return false; /* Callbacks already present, so not idle. */
        }
-       local_irq_restore(flags);
+       spin_unlock_irqrestore_rcu_node(sdp, flags);
 
        /*
         * No local callbacks, so probabalistically probe global state.
@@ -864,9 +865,8 @@ static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
        }
        rhp->func = func;
        idx = srcu_read_lock(ssp);
-       local_irq_save(flags);
-       sdp = this_cpu_ptr(ssp->sda);
-       spin_lock_rcu_node(sdp);
+       sdp = raw_cpu_ptr(ssp->sda);
+       spin_lock_irqsave_rcu_node(sdp, flags);
        rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
        rcu_segcblist_advance(&sdp->srcu_cblist,
                              rcu_seq_current(&ssp->srcu_gp_seq));
index ce23f6c..835e2df 100644 (file)
@@ -103,6 +103,7 @@ module_param(rcu_task_stall_timeout, int, 0644);
 #define RTGS_WAIT_READERS       9
 #define RTGS_INVOKE_CBS                10
 #define RTGS_WAIT_CBS          11
+#ifndef CONFIG_TINY_RCU
 static const char * const rcu_tasks_gp_state_names[] = {
        "RTGS_INIT",
        "RTGS_WAIT_WAIT_CBS",
@@ -117,6 +118,7 @@ static const char * const rcu_tasks_gp_state_names[] = {
        "RTGS_INVOKE_CBS",
        "RTGS_WAIT_CBS",
 };
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 ////////////////////////////////////////////////////////////////////////
 //
@@ -129,6 +131,7 @@ static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
        rtp->gp_jiffies = jiffies;
 }
 
+#ifndef CONFIG_TINY_RCU
 /* Return state name. */
 static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
 {
@@ -139,6 +142,7 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
                return "???";
        return rcu_tasks_gp_state_names[j];
 }
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 // Enqueue a callback for the specified flavor of Tasks RCU.
 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
@@ -205,7 +209,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                        if (!rtp->cbs_head) {
                                WARN_ON(signal_pending(current));
                                set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS);
-                               schedule_timeout_interruptible(HZ/10);
+                               schedule_timeout_idle(HZ/10);
                        }
                        continue;
                }
@@ -227,7 +231,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                        cond_resched();
                }
                /* Paranoid sleep to keep this from entering a tight loop */
-               schedule_timeout_uninterruptible(HZ/10);
+               schedule_timeout_idle(HZ/10);
 
                set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
        }
@@ -268,6 +272,7 @@ static void __init rcu_tasks_bootup_oddness(void)
 
 #endif /* #ifndef CONFIG_TINY_RCU */
 
+#ifndef CONFIG_TINY_RCU
 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 {
@@ -281,6 +286,7 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
                ".C"[!!data_race(rtp->cbs_head)],
                s);
 }
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 static void exit_tasks_rcu_finish_trace(struct task_struct *t);
 
@@ -336,7 +342,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 
                /* Slowly back off waiting for holdouts */
                set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
-               schedule_timeout_interruptible(HZ/fract);
+               schedule_timeout_idle(HZ/fract);
 
                if (fract > 1)
                        fract--;
@@ -402,7 +408,7 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
 }
 
 /* Processing between scanning taskslist and draining the holdout list. */
-void rcu_tasks_postscan(struct list_head *hop)
+static void rcu_tasks_postscan(struct list_head *hop)
 {
        /*
         * Wait for tasks that are in the process of exiting.  This
@@ -557,10 +563,12 @@ static int __init rcu_spawn_tasks_kthread(void)
 }
 core_initcall(rcu_spawn_tasks_kthread);
 
+#ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_classic_gp_kthread(void)
 {
        show_rcu_tasks_generic_gp_kthread(&rcu_tasks, "");
 }
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 /* Do the srcu_read_lock() for the above synchronize_srcu().  */
 void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
@@ -682,10 +690,12 @@ static int __init rcu_spawn_tasks_rude_kthread(void)
 }
 core_initcall(rcu_spawn_tasks_rude_kthread);
 
+#ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_rude_gp_kthread(void)
 {
        show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, "");
 }
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 #else /* #ifdef CONFIG_TASKS_RUDE_RCU */
 static void show_rcu_tasks_rude_gp_kthread(void) {}
@@ -727,8 +737,8 @@ EXPORT_SYMBOL_GPL(rcu_trace_lock_map);
 
 #ifdef CONFIG_TASKS_TRACE_RCU
 
-atomic_t trc_n_readers_need_end;       // Number of waited-for readers.
-DECLARE_WAIT_QUEUE_HEAD(trc_wait);     // List of holdout tasks.
+static atomic_t trc_n_readers_need_end;                // Number of waited-for readers.
+static DECLARE_WAIT_QUEUE_HEAD(trc_wait);      // List of holdout tasks.
 
 // Record outstanding IPIs to each CPU.  No point in sending two...
 static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
@@ -835,7 +845,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
        bool ofl = cpu_is_offline(cpu);
 
        if (task_curr(t)) {
-               WARN_ON_ONCE(ofl & !is_idle_task(t));
+               WARN_ON_ONCE(ofl && !is_idle_task(t));
 
                // If no chance of heavyweight readers, do it the hard way.
                if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
@@ -1118,11 +1128,10 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks_trace);
  * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
  *
  * Control will return to the caller some time after a trace rcu-tasks
- * grace period has elapsed, in other words after all currently
- * executing rcu-tasks read-side critical sections have elapsed.  These
- * read-side critical sections are delimited by calls to schedule(),
- * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
- * anyway) cond_resched().
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-tasks read-side critical sections have elapsed.  These read-side
+ * critical sections are delimited by calls to rcu_read_lock_trace()
+ * and rcu_read_unlock_trace().
  *
  * This is a very specialized primitive, intended only for a few uses in
  * tracing and other situations requiring manipulation of function preambles
@@ -1164,6 +1173,7 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
 }
 core_initcall(rcu_spawn_tasks_trace_kthread);
 
+#ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_trace_gp_kthread(void)
 {
        char buf[64];
@@ -1174,18 +1184,21 @@ static void show_rcu_tasks_trace_gp_kthread(void)
                data_race(n_heavy_reader_attempts));
        show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
 }
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 #else /* #ifdef CONFIG_TASKS_TRACE_RCU */
 static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
 static inline void show_rcu_tasks_trace_gp_kthread(void) {}
 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
 
+#ifndef CONFIG_TINY_RCU
 void show_rcu_tasks_gp_kthreads(void)
 {
        show_rcu_tasks_classic_gp_kthread();
        show_rcu_tasks_rude_gp_kthread();
        show_rcu_tasks_trace_gp_kthread();
 }
+#endif /* #ifndef CONFIG_TINY_RCU */
 
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void rcu_tasks_bootup_oddness(void) {}
index dd572ce..aa897c3 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/cpu.h>
 #include <linux/prefetch.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 
 #include "rcu.h"
 
@@ -84,9 +85,9 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head)
        unsigned long offset = (unsigned long)head->func;
 
        rcu_lock_acquire(&rcu_callback_map);
-       if (__is_kfree_rcu_offset(offset)) {
-               trace_rcu_invoke_kfree_callback("", head, offset);
-               kfree((void *)head - offset);
+       if (__is_kvfree_rcu_offset(offset)) {
+               trace_rcu_invoke_kvfree_callback("", head, offset);
+               kvfree((void *)head - offset);
                rcu_lock_release(&rcu_callback_map);
                return true;
        }
index 6c6569e..ac7198e 100644 (file)
@@ -57,6 +57,8 @@
 #include <linux/slab.h>
 #include <linux/sched/isolation.h>
 #include <linux/sched/clock.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include "../time/tick-internal.h"
 
 #include "tree.h"
@@ -175,6 +177,15 @@ module_param(gp_init_delay, int, 0444);
 static int gp_cleanup_delay;
 module_param(gp_cleanup_delay, int, 0444);
 
+/*
+ * This rcu parameter is runtime-read-only. It reflects
+ * a minimum allowed number of objects which can be cached
+ * per-CPU. Object size is equal to one page. This value
+ * can be changed at boot time.
+ */
+static int rcu_min_cached_objs = 2;
+module_param(rcu_min_cached_objs, int, 0444);
+
 /* Retrieve RCU kthreads priority for rcutorture */
 int rcu_get_gp_kthreads_prio(void)
 {
@@ -954,7 +965,6 @@ void __rcu_irq_enter_check_tick(void)
 
 /**
  * rcu_nmi_enter - inform RCU of entry to NMI context
- * @irq: Is this call from rcu_irq_enter?
  *
  * If the CPU was idle from RCU's viewpoint, update rdp->dynticks and
  * rdp->dynticks_nmi_nesting to let the RCU grace-period handling know
@@ -990,8 +1000,11 @@ noinstr void rcu_nmi_enter(void)
                rcu_dynticks_eqs_exit();
                // ... but is watching here.
 
-               if (!in_nmi())
+               if (!in_nmi()) {
+                       instrumentation_begin();
                        rcu_cleanup_after_idle();
+                       instrumentation_end();
+               }
 
                instrumentation_begin();
                // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
@@ -1638,7 +1651,7 @@ static void rcu_gp_slow(int delay)
        if (delay > 0 &&
            !(rcu_seq_ctr(rcu_state.gp_seq) %
              (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
-               schedule_timeout_uninterruptible(delay);
+               schedule_timeout_idle(delay);
 }
 
 static unsigned long sleep_duration;
@@ -1661,7 +1674,7 @@ static void rcu_gp_torture_wait(void)
        duration = xchg(&sleep_duration, 0UL);
        if (duration > 0) {
                pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
-               schedule_timeout_uninterruptible(duration);
+               schedule_timeout_idle(duration);
                pr_alert("%s: Wait complete\n", __func__);
        }
 }
@@ -2443,6 +2456,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
        local_irq_save(flags);
        rcu_nocb_lock(rdp);
        count = -rcl.len;
+       rdp->n_cbs_invoked += count;
        trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
                            is_idle_task(current), rcu_is_callbacks_kthread());
 
@@ -2726,7 +2740,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
        }
        *statusp = RCU_KTHREAD_YIELDING;
        trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-       schedule_timeout_interruptible(2);
+       schedule_timeout_idle(2);
        trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
        *statusp = RCU_KTHREAD_WAITING;
 }
@@ -2894,8 +2908,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                return; // Enqueued onto ->nocb_bypass, so just leave.
        // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
        rcu_segcblist_enqueue(&rdp->cblist, head);
-       if (__is_kfree_rcu_offset((unsigned long)func))
-               trace_rcu_kfree_callback(rcu_state.name, head,
+       if (__is_kvfree_rcu_offset((unsigned long)func))
+               trace_rcu_kvfree_callback(rcu_state.name, head,
                                         (unsigned long)func,
                                         rcu_segcblist_n_cbs(&rdp->cblist));
        else
@@ -2957,53 +2971,53 @@ EXPORT_SYMBOL_GPL(call_rcu);
 /* Maximum number of jiffies to wait before draining a batch. */
 #define KFREE_DRAIN_JIFFIES (HZ / 50)
 #define KFREE_N_BATCHES 2
-
-/*
- * This macro defines how many entries the "records" array
- * will contain. It is based on the fact that the size of
- * kfree_rcu_bulk_data structure becomes exactly one page.
- */
-#define KFREE_BULK_MAX_ENTR ((PAGE_SIZE / sizeof(void *)) - 3)
+#define FREE_N_CHANNELS 2
 
 /**
- * struct kfree_rcu_bulk_data - single block to store kfree_rcu() pointers
+ * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers
  * @nr_records: Number of active pointers in the array
- * @records: Array of the kfree_rcu() pointers
  * @next: Next bulk object in the block chain
- * @head_free_debug: For debug, when CONFIG_DEBUG_OBJECTS_RCU_HEAD is set
+ * @records: Array of the kvfree_rcu() pointers
  */
-struct kfree_rcu_bulk_data {
+struct kvfree_rcu_bulk_data {
        unsigned long nr_records;
-       void *records[KFREE_BULK_MAX_ENTR];
-       struct kfree_rcu_bulk_data *next;
-       struct rcu_head *head_free_debug;
+       struct kvfree_rcu_bulk_data *next;
+       void *records[];
 };
 
+/*
+ * This macro defines how many entries the "records" array
+ * will contain. It is based on the fact that the size of
+ * kvfree_rcu_bulk_data structure becomes exactly one page.
+ */
+#define KVFREE_BULK_MAX_ENTR \
+       ((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
+
 /**
  * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
  * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
  * @head_free: List of kfree_rcu() objects waiting for a grace period
- * @bhead_free: Bulk-List of kfree_rcu() objects waiting for a grace period
+ * @bkvhead_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
  * @krcp: Pointer to @kfree_rcu_cpu structure
  */
 
 struct kfree_rcu_cpu_work {
        struct rcu_work rcu_work;
        struct rcu_head *head_free;
-       struct kfree_rcu_bulk_data *bhead_free;
+       struct kvfree_rcu_bulk_data *bkvhead_free[FREE_N_CHANNELS];
        struct kfree_rcu_cpu *krcp;
 };
 
 /**
  * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
  * @head: List of kfree_rcu() objects not yet waiting for a grace period
- * @bhead: Bulk-List of kfree_rcu() objects not yet waiting for a grace period
- * @bcached: Keeps at most one object for later reuse when build chain blocks
+ * @bkvhead: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
  * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
  * @lock: Synchronize access to this structure
  * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
  * @monitor_todo: Tracks whether a @monitor_work delayed work is pending
- * @initialized: The @lock and @rcu_work fields have been initialized
+ * @initialized: The @rcu_work fields have been initialized
+ * @count: Number of objects for which GP not started
  *
  * This is a per-CPU structure.  The reason that it is not included in
  * the rcu_data structure is to permit this code to be extracted from
@@ -3012,28 +3026,84 @@ struct kfree_rcu_cpu_work {
  */
 struct kfree_rcu_cpu {
        struct rcu_head *head;
-       struct kfree_rcu_bulk_data *bhead;
-       struct kfree_rcu_bulk_data *bcached;
+       struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS];
        struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
-       spinlock_t lock;
+       raw_spinlock_t lock;
        struct delayed_work monitor_work;
        bool monitor_todo;
        bool initialized;
-       // Number of objects for which GP not started
        int count;
+
+       /*
+        * A simple cache list that contains objects for
+        * reuse purpose. In order to save some per-cpu
+        * space the list is singular. Even though it is
+        * lockless an access has to be protected by the
+        * per-cpu lock.
+        */
+       struct llist_head bkvcache;
+       int nr_bkv_objs;
 };
 
-static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
+static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {
+       .lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
+};
 
 static __always_inline void
-debug_rcu_head_unqueue_bulk(struct rcu_head *head)
+debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead)
 {
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-       for (; head; head = head->next)
-               debug_rcu_head_unqueue(head);
+       int i;
+
+       for (i = 0; i < bhead->nr_records; i++)
+               debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i]));
 #endif
 }
 
+static inline struct kfree_rcu_cpu *
+krc_this_cpu_lock(unsigned long *flags)
+{
+       struct kfree_rcu_cpu *krcp;
+
+       local_irq_save(*flags); // For safely calling this_cpu_ptr().
+       krcp = this_cpu_ptr(&krc);
+       raw_spin_lock(&krcp->lock);
+
+       return krcp;
+}
+
+static inline void
+krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags)
+{
+       raw_spin_unlock(&krcp->lock);
+       local_irq_restore(flags);
+}
+
+static inline struct kvfree_rcu_bulk_data *
+get_cached_bnode(struct kfree_rcu_cpu *krcp)
+{
+       if (!krcp->nr_bkv_objs)
+               return NULL;
+
+       krcp->nr_bkv_objs--;
+       return (struct kvfree_rcu_bulk_data *)
+               llist_del_first(&krcp->bkvcache);
+}
+
+static inline bool
+put_cached_bnode(struct kfree_rcu_cpu *krcp,
+       struct kvfree_rcu_bulk_data *bnode)
+{
+       // Check the limit.
+       if (krcp->nr_bkv_objs >= rcu_min_cached_objs)
+               return false;
+
+       llist_add((struct llist_node *) bnode, &krcp->bkvcache);
+       krcp->nr_bkv_objs++;
+       return true;
+
+}
+
 /*
  * This function is invoked in workqueue context after a grace period.
  * It frees all the objects queued on ->bhead_free or ->head_free.
@@ -3041,38 +3111,63 @@ debug_rcu_head_unqueue_bulk(struct rcu_head *head)
 static void kfree_rcu_work(struct work_struct *work)
 {
        unsigned long flags;
+       struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS], *bnext;
        struct rcu_head *head, *next;
-       struct kfree_rcu_bulk_data *bhead, *bnext;
        struct kfree_rcu_cpu *krcp;
        struct kfree_rcu_cpu_work *krwp;
+       int i, j;
 
        krwp = container_of(to_rcu_work(work),
                            struct kfree_rcu_cpu_work, rcu_work);
        krcp = krwp->krcp;
-       spin_lock_irqsave(&krcp->lock, flags);
-       head = krwp->head_free;
-       krwp->head_free = NULL;
-       bhead = krwp->bhead_free;
-       krwp->bhead_free = NULL;
-       spin_unlock_irqrestore(&krcp->lock, flags);
-
-       /* "bhead" is now private, so traverse locklessly. */
-       for (; bhead; bhead = bnext) {
-               bnext = bhead->next;
 
-               debug_rcu_head_unqueue_bulk(bhead->head_free_debug);
+       raw_spin_lock_irqsave(&krcp->lock, flags);
+       // Channels 1 and 2.
+       for (i = 0; i < FREE_N_CHANNELS; i++) {
+               bkvhead[i] = krwp->bkvhead_free[i];
+               krwp->bkvhead_free[i] = NULL;
+       }
 
-               rcu_lock_acquire(&rcu_callback_map);
-               trace_rcu_invoke_kfree_bulk_callback(rcu_state.name,
-                       bhead->nr_records, bhead->records);
+       // Channel 3.
+       head = krwp->head_free;
+       krwp->head_free = NULL;
+       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+       // Handle two first channels.
+       for (i = 0; i < FREE_N_CHANNELS; i++) {
+               for (; bkvhead[i]; bkvhead[i] = bnext) {
+                       bnext = bkvhead[i]->next;
+                       debug_rcu_bhead_unqueue(bkvhead[i]);
+
+                       rcu_lock_acquire(&rcu_callback_map);
+                       if (i == 0) { // kmalloc() / kfree().
+                               trace_rcu_invoke_kfree_bulk_callback(
+                                       rcu_state.name, bkvhead[i]->nr_records,
+                                       bkvhead[i]->records);
+
+                               kfree_bulk(bkvhead[i]->nr_records,
+                                       bkvhead[i]->records);
+                       } else { // vmalloc() / vfree().
+                               for (j = 0; j < bkvhead[i]->nr_records; j++) {
+                                       trace_rcu_invoke_kvfree_callback(
+                                               rcu_state.name,
+                                               bkvhead[i]->records[j], 0);
+
+                                       vfree(bkvhead[i]->records[j]);
+                               }
+                       }
+                       rcu_lock_release(&rcu_callback_map);
 
-               kfree_bulk(bhead->nr_records, bhead->records);
-               rcu_lock_release(&rcu_callback_map);
+                       krcp = krc_this_cpu_lock(&flags);
+                       if (put_cached_bnode(krcp, bkvhead[i]))
+                               bkvhead[i] = NULL;
+                       krc_this_cpu_unlock(krcp, flags);
 
-               if (cmpxchg(&krcp->bcached, NULL, bhead))
-                       free_page((unsigned long) bhead);
+                       if (bkvhead[i])
+                               free_page((unsigned long) bkvhead[i]);
 
-               cond_resched_tasks_rcu_qs();
+                       cond_resched_tasks_rcu_qs();
+               }
        }
 
        /*
@@ -3082,14 +3177,15 @@ static void kfree_rcu_work(struct work_struct *work)
         */
        for (; head; head = next) {
                unsigned long offset = (unsigned long)head->func;
+               void *ptr = (void *)head - offset;
 
                next = head->next;
-               debug_rcu_head_unqueue(head);
+               debug_rcu_head_unqueue((struct rcu_head *)ptr);
                rcu_lock_acquire(&rcu_callback_map);
-               trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
+               trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
 
-               if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset)))
-                       kfree((void *)head - offset);
+               if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
+                       kvfree(ptr);
 
                rcu_lock_release(&rcu_callback_map);
                cond_resched_tasks_rcu_qs();
@@ -3105,8 +3201,8 @@ static void kfree_rcu_work(struct work_struct *work)
 static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
 {
        struct kfree_rcu_cpu_work *krwp;
-       bool queued = false;
-       int i;
+       bool repeat = false;
+       int i, j;
 
        lockdep_assert_held(&krcp->lock);
 
@@ -3114,21 +3210,25 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
                krwp = &(krcp->krw_arr[i]);
 
                /*
-                * Try to detach bhead or head and attach it over any
+                * Try to detach bkvhead or head and attach it over any
                 * available corresponding free channel. It can be that
                 * a previous RCU batch is in progress, it means that
                 * immediately to queue another one is not possible so
                 * return false to tell caller to retry.
                 */
-               if ((krcp->bhead && !krwp->bhead_free) ||
+               if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
+                       (krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
                                (krcp->head && !krwp->head_free)) {
-                       /* Channel 1. */
-                       if (!krwp->bhead_free) {
-                               krwp->bhead_free = krcp->bhead;
-                               krcp->bhead = NULL;
+                       // Channel 1 corresponds to SLAB ptrs.
+                       // Channel 2 corresponds to vmalloc ptrs.
+                       for (j = 0; j < FREE_N_CHANNELS; j++) {
+                               if (!krwp->bkvhead_free[j]) {
+                                       krwp->bkvhead_free[j] = krcp->bkvhead[j];
+                                       krcp->bkvhead[j] = NULL;
+                               }
                        }
 
-                       /* Channel 2. */
+                       // Channel 3 corresponds to emergency path.
                        if (!krwp->head_free) {
                                krwp->head_free = krcp->head;
                                krcp->head = NULL;
@@ -3137,17 +3237,21 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
                        WRITE_ONCE(krcp->count, 0);
 
                        /*
-                        * One work is per one batch, so there are two "free channels",
-                        * "bhead_free" and "head_free" the batch can handle. It can be
-                        * that the work is in the pending state when two channels have
-                        * been detached following each other, one by one.
+                        * One work is per one batch, so there are three
+                        * "free channels", the batch can handle. It can
+                        * be that the work is in the pending state when
+                        * channels have been detached following by each
+                        * other.
                         */
                        queue_rcu_work(system_wq, &krwp->rcu_work);
-                       queued = true;
                }
+
+               // Repeat if any "free" corresponding channel is still busy.
+               if (krcp->bkvhead[0] || krcp->bkvhead[1] || krcp->head)
+                       repeat = true;
        }
 
-       return queued;
+       return !repeat;
 }
 
 static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
@@ -3157,14 +3261,14 @@ static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
        krcp->monitor_todo = false;
        if (queue_kfree_rcu_work(krcp)) {
                // Success! Our job is done here.
-               spin_unlock_irqrestore(&krcp->lock, flags);
+               raw_spin_unlock_irqrestore(&krcp->lock, flags);
                return;
        }
 
        // Previous RCU batch still in progress, try again later.
        krcp->monitor_todo = true;
        schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
-       spin_unlock_irqrestore(&krcp->lock, flags);
+       raw_spin_unlock_irqrestore(&krcp->lock, flags);
 }
 
 /*
@@ -3177,32 +3281,50 @@ static void kfree_rcu_monitor(struct work_struct *work)
        struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu,
                                                 monitor_work.work);
 
-       spin_lock_irqsave(&krcp->lock, flags);
+       raw_spin_lock_irqsave(&krcp->lock, flags);
        if (krcp->monitor_todo)
                kfree_rcu_drain_unlock(krcp, flags);
        else
-               spin_unlock_irqrestore(&krcp->lock, flags);
+               raw_spin_unlock_irqrestore(&krcp->lock, flags);
 }
 
 static inline bool
-kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp,
-       struct rcu_head *head, rcu_callback_t func)
+kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
 {
-       struct kfree_rcu_bulk_data *bnode;
+       struct kvfree_rcu_bulk_data *bnode;
+       int idx;
 
        if (unlikely(!krcp->initialized))
                return false;
 
        lockdep_assert_held(&krcp->lock);
+       idx = !!is_vmalloc_addr(ptr);
 
        /* Check if a new block is required. */
-       if (!krcp->bhead ||
-                       krcp->bhead->nr_records == KFREE_BULK_MAX_ENTR) {
-               bnode = xchg(&krcp->bcached, NULL);
+       if (!krcp->bkvhead[idx] ||
+                       krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
+               bnode = get_cached_bnode(krcp);
                if (!bnode) {
-                       WARN_ON_ONCE(sizeof(struct kfree_rcu_bulk_data) > PAGE_SIZE);
+                       /*
+                        * To keep this path working on raw non-preemptible
+                        * sections, prevent the optional entry into the
+                        * allocator as it uses sleeping locks. In fact, even
+                        * if the caller of kfree_rcu() is preemptible, this
+                        * path still is not, as krcp->lock is a raw spinlock.
+                        * With additional page pre-allocation in the works,
+                        * hitting this return is going to be much less likely.
+                        */
+                       if (IS_ENABLED(CONFIG_PREEMPT_RT))
+                               return false;
 
-                       bnode = (struct kfree_rcu_bulk_data *)
+                       /*
+                        * NOTE: For one argument of kvfree_rcu() we can
+                        * drop the lock and get the page in sleepable
+                        * context. That would allow to maintain an array
+                        * for the CONFIG_PREEMPT_RT as well if no cached
+                        * pages are available.
+                        */
+                       bnode = (struct kvfree_rcu_bulk_data *)
                                __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
                }
 
@@ -3212,53 +3334,62 @@ kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp,
 
                /* Initialize the new block. */
                bnode->nr_records = 0;
-               bnode->next = krcp->bhead;
-               bnode->head_free_debug = NULL;
+               bnode->next = krcp->bkvhead[idx];
 
                /* Attach it to the head. */
-               krcp->bhead = bnode;
+               krcp->bkvhead[idx] = bnode;
        }
 
-#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-       head->func = func;
-       head->next = krcp->bhead->head_free_debug;
-       krcp->bhead->head_free_debug = head;
-#endif
-
        /* Finally insert. */
-       krcp->bhead->records[krcp->bhead->nr_records++] =
-               (void *) head - (unsigned long) func;
+       krcp->bkvhead[idx]->records
+               [krcp->bkvhead[idx]->nr_records++] = ptr;
 
        return true;
 }
 
 /*
- * Queue a request for lazy invocation of kfree_bulk()/kfree() after a grace
- * period. Please note there are two paths are maintained, one is the main one
- * that uses kfree_bulk() interface and second one is emergency one, that is
- * used only when the main path can not be maintained temporary, due to memory
- * pressure.
+ * Queue a request for lazy invocation of appropriate free routine after a
+ * grace period. Please note there are three paths are maintained, two are the
+ * main ones that use array of pointers interface and third one is emergency
+ * one, that is used only when the main path can not be maintained temporary,
+ * due to memory pressure.
  *
- * Each kfree_call_rcu() request is added to a batch. The batch will be drained
+ * Each kvfree_call_rcu() request is added to a batch. The batch will be drained
  * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will
  * be free'd in workqueue context. This allows us to: batch requests together to
- * reduce the number of grace periods during heavy kfree_rcu() load.
+ * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load.
  */
-void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
        unsigned long flags;
        struct kfree_rcu_cpu *krcp;
+       bool success;
+       void *ptr;
 
-       local_irq_save(flags);  // For safely calling this_cpu_ptr().
-       krcp = this_cpu_ptr(&krc);
-       if (krcp->initialized)
-               spin_lock(&krcp->lock);
+       if (head) {
+               ptr = (void *) head - (unsigned long) func;
+       } else {
+               /*
+                * Please note there is a limitation for the head-less
+                * variant, that is why there is a clear rule for such
+                * objects: it can be used from might_sleep() context
+                * only. For other places please embed an rcu_head to
+                * your data.
+                */
+               might_sleep();
+               ptr = (unsigned long *) func;
+       }
+
+       krcp = krc_this_cpu_lock(&flags);
 
        // Queue the object but don't yet schedule the batch.
-       if (debug_rcu_head_queue(head)) {
+       if (debug_rcu_head_queue(ptr)) {
                // Probable double kfree_rcu(), just leak.
                WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
                          __func__, head);
+
+               // Mark as success and leave.
+               success = true;
                goto unlock_return;
        }
 
@@ -3266,10 +3397,16 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
         * Under high memory pressure GFP_NOWAIT can fail,
         * in that case the emergency path is maintained.
         */
-       if (unlikely(!kfree_call_rcu_add_ptr_to_bulk(krcp, head, func))) {
+       success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
+       if (!success) {
+               if (head == NULL)
+                       // Inline if kvfree_rcu(one_arg) call.
+                       goto unlock_return;
+
                head->func = func;
                head->next = krcp->head;
                krcp->head = head;
+               success = true;
        }
 
        WRITE_ONCE(krcp->count, krcp->count + 1);
@@ -3282,11 +3419,20 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
        }
 
 unlock_return:
-       if (krcp->initialized)
-               spin_unlock(&krcp->lock);
-       local_irq_restore(flags);
+       krc_this_cpu_unlock(krcp, flags);
+
+       /*
+        * Inline kvfree() after synchronize_rcu(). We can do
+        * it from might_sleep() context only, so the current
+        * CPU can pass the QS state.
+        */
+       if (!success) {
+               debug_rcu_head_unqueue((struct rcu_head *) ptr);
+               synchronize_rcu();
+               kvfree(ptr);
+       }
 }
-EXPORT_SYMBOL_GPL(kfree_call_rcu);
+EXPORT_SYMBOL_GPL(kvfree_call_rcu);
 
 static unsigned long
 kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
@@ -3315,11 +3461,11 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
 
                count = krcp->count;
-               spin_lock_irqsave(&krcp->lock, flags);
+               raw_spin_lock_irqsave(&krcp->lock, flags);
                if (krcp->monitor_todo)
                        kfree_rcu_drain_unlock(krcp, flags);
                else
-                       spin_unlock_irqrestore(&krcp->lock, flags);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
 
                sc->nr_to_scan -= count;
                freed += count;
@@ -3328,7 +3474,7 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                        break;
        }
 
-       return freed;
+       return freed == 0 ? SHRINK_STOP : freed;
 }
 
 static struct shrinker kfree_rcu_shrinker = {
@@ -3346,15 +3492,15 @@ void __init kfree_rcu_scheduler_running(void)
        for_each_online_cpu(cpu) {
                struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
 
-               spin_lock_irqsave(&krcp->lock, flags);
+               raw_spin_lock_irqsave(&krcp->lock, flags);
                if (!krcp->head || krcp->monitor_todo) {
-                       spin_unlock_irqrestore(&krcp->lock, flags);
+                       raw_spin_unlock_irqrestore(&krcp->lock, flags);
                        continue;
                }
                krcp->monitor_todo = true;
                schedule_delayed_work_on(cpu, &krcp->monitor_work,
                                         KFREE_DRAIN_JIFFIES);
-               spin_unlock_irqrestore(&krcp->lock, flags);
+               raw_spin_unlock_irqrestore(&krcp->lock, flags);
        }
 }
 
@@ -3842,10 +3988,9 @@ void rcu_cpu_starting(unsigned int cpu)
 {
        unsigned long flags;
        unsigned long mask;
-       int nbits;
-       unsigned long oldmask;
        struct rcu_data *rdp;
        struct rcu_node *rnp;
+       bool newcpu;
 
        if (per_cpu(rcu_cpu_started, cpu))
                return;
@@ -3857,12 +4002,10 @@ void rcu_cpu_starting(unsigned int cpu)
        mask = rdp->grpmask;
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
        WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
-       oldmask = rnp->expmaskinitnext;
+       newcpu = !(rnp->expmaskinitnext & mask);
        rnp->expmaskinitnext |= mask;
-       oldmask ^= rnp->expmaskinitnext;
-       nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
        /* Allow lockless access for expedited grace periods. */
-       smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits); /* ^^^ */
+       smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + newcpu); /* ^^^ */
        ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
        rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
        rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4249,13 +4392,23 @@ static void __init kfree_rcu_batch_init(void)
 
        for_each_possible_cpu(cpu) {
                struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+               struct kvfree_rcu_bulk_data *bnode;
 
-               spin_lock_init(&krcp->lock);
                for (i = 0; i < KFREE_N_BATCHES; i++) {
                        INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
                        krcp->krw_arr[i].krcp = krcp;
                }
 
+               for (i = 0; i < rcu_min_cached_objs; i++) {
+                       bnode = (struct kvfree_rcu_bulk_data *)
+                               __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+
+                       if (bnode)
+                               put_cached_bnode(krcp, bnode);
+                       else
+                               pr_err("Failed to preallocate for %d CPU!\n", cpu);
+               }
+
                INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
                krcp->initialized = true;
        }
index 43991a4..c96ae35 100644 (file)
@@ -41,7 +41,7 @@ struct rcu_node {
        raw_spinlock_t __private lock;  /* Root rcu_node's lock protects */
                                        /*  some rcu_state fields as well as */
                                        /*  following. */
-       unsigned long gp_seq;   /* Track rsp->rcu_gp_seq. */
+       unsigned long gp_seq;   /* Track rsp->gp_seq. */
        unsigned long gp_seq_needed; /* Track furthest future GP request. */
        unsigned long completedqs; /* All QSes done for this node. */
        unsigned long qsmask;   /* CPUs or groups that need to switch in */
@@ -73,9 +73,9 @@ struct rcu_node {
        unsigned long ffmask;   /* Fully functional CPUs. */
        unsigned long grpmask;  /* Mask to apply to parent qsmask. */
                                /*  Only one bit will be set in this mask. */
-       int     grplo;          /* lowest-numbered CPU or group here. */
-       int     grphi;          /* highest-numbered CPU or group here. */
-       u8      grpnum;         /* CPU/group number for next level up. */
+       int     grplo;          /* lowest-numbered CPU here. */
+       int     grphi;          /* highest-numbered CPU here. */
+       u8      grpnum;         /* group number for next level up. */
        u8      level;          /* root is at level 0. */
        bool    wait_blkd_tasks;/* Necessary to wait for blocked tasks to */
                                /*  exit RCU read-side critical sections */
@@ -149,7 +149,7 @@ union rcu_noqs {
 /* Per-CPU data for read-copy update. */
 struct rcu_data {
        /* 1) quiescent-state and grace-period handling : */
-       unsigned long   gp_seq;         /* Track rsp->rcu_gp_seq counter. */
+       unsigned long   gp_seq;         /* Track rsp->gp_seq counter. */
        unsigned long   gp_seq_needed;  /* Track furthest future GP request. */
        union rcu_noqs  cpu_no_qs;      /* No QSes yet for this CPU. */
        bool            core_needs_qs;  /* Core waits for quiesc state. */
@@ -171,6 +171,7 @@ struct rcu_data {
                                        /* different grace periods. */
        long            qlen_last_fqs_check;
                                        /* qlen at last check for QS forcing */
+       unsigned long   n_cbs_invoked;  /* # callbacks invoked since boot. */
        unsigned long   n_force_qs_snap;
                                        /* did other CPU force QS recently? */
        long            blimit;         /* Upper limit on a processed batch */
@@ -301,6 +302,8 @@ struct rcu_state {
        u8      boost ____cacheline_internodealigned_in_smp;
                                                /* Subject to priority boost. */
        unsigned long gp_seq;                   /* Grace-period sequence #. */
+       unsigned long gp_max;                   /* Maximum GP duration in */
+                                               /*  jiffies. */
        struct task_struct *gp_kthread;         /* Task for grace periods. */
        struct swait_queue_head gp_wq;          /* Where GP task waits. */
        short gp_flags;                         /* Commands for GP task. */
@@ -346,8 +349,6 @@ struct rcu_state {
                                                /*  a reluctant CPU. */
        unsigned long n_force_qs_gpstart;       /* Snapshot of n_force_qs at */
                                                /*  GP start. */
-       unsigned long gp_max;                   /* Maximum GP duration in */
-                                               /*  jiffies. */
        const char *name;                       /* Name of structure. */
        char abbr;                              /* Abbreviated name. */
 
index 72952ed..1888c0e 100644 (file)
@@ -403,7 +403,7 @@ retry_ipi:
                        /* Online, so delay for a bit and try again. */
                        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
                        trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
-                       schedule_timeout_uninterruptible(1);
+                       schedule_timeout_idle(1);
                        goto retry_ipi;
                }
                /* CPU really is offline, so we must report its QS. */
index 3522236..982fc5b 100644 (file)
@@ -1033,7 +1033,7 @@ static int rcu_boost_kthread(void *arg)
                if (spincnt > 10) {
                        WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
                        trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
-                       schedule_timeout_interruptible(2);
+                       schedule_timeout_idle(2);
                        trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
                        spincnt = 0;
                }
@@ -2005,7 +2005,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
                /* Polling, so trace if first poll in the series. */
                if (gotcbs)
                        trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
-               schedule_timeout_interruptible(1);
+               schedule_timeout_idle(1);
        } else if (!needwait_gp) {
                /* Wait for callbacks to appear. */
                trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
index 54a6dba..b5d3b47 100644 (file)
@@ -237,14 +237,12 @@ struct rcu_stall_chk_rdr {
  */
 static bool check_slow_task(struct task_struct *t, void *arg)
 {
-       struct rcu_node *rnp;
        struct rcu_stall_chk_rdr *rscrp = arg;
 
        if (task_curr(t))
                return false; // It is running, so decline to inspect it.
        rscrp->nesting = t->rcu_read_lock_nesting;
        rscrp->rs = t->rcu_read_unlock_special;
-       rnp = t->rcu_blocked_node;
        rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry);
        return true;
 }
@@ -468,7 +466,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 
        /*
         * OK, time to rat on our buddy...
-        * See Documentation/RCU/stallwarn.txt for info on how to debug
+        * See Documentation/RCU/stallwarn.rst for info on how to debug
         * RCU CPU stall warnings.
         */
        pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
@@ -535,7 +533,7 @@ static void print_cpu_stall(unsigned long gps)
 
        /*
         * OK, time to rat on ourselves...
-        * See Documentation/RCU/stallwarn.txt for info on how to debug
+        * See Documentation/RCU/stallwarn.rst for info on how to debug
         * RCU CPU stall warnings.
         */
        pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
@@ -649,6 +647,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
  */
 void show_rcu_gp_kthreads(void)
 {
+       unsigned long cbs = 0;
        int cpu;
        unsigned long j;
        unsigned long ja;
@@ -690,9 +689,11 @@ void show_rcu_gp_kthreads(void)
        }
        for_each_possible_cpu(cpu) {
                rdp = per_cpu_ptr(&rcu_data, cpu);
+               cbs += data_race(rdp->n_cbs_invoked);
                if (rcu_segcblist_is_offloaded(&rdp->cblist))
                        show_rcu_nocb_state(rdp);
        }
+       pr_info("RCU callbacks invoked since boot: %lu\n", cbs);
        show_rcu_tasks_gp_kthreads();
 }
 EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
index 84843ad..2de49b5 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/kprobes.h>
 #include <linux/slab.h>
 #include <linux/irq_work.h>
+#include <linux/rcupdate_trace.h>
 
 #define CREATE_TRACE_POINTS
 
@@ -207,7 +208,7 @@ void rcu_end_inkernel_boot(void)
        rcu_unexpedite_gp();
        if (rcu_normal_after_boot)
                WRITE_ONCE(rcu_normal, 1);
-       rcu_boot_ended = 1;
+       rcu_boot_ended = true;
 }
 
 /*
@@ -279,6 +280,7 @@ struct lockdep_map rcu_sched_lock_map = {
 };
 EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 
+// Tell lockdep when RCU callbacks are being invoked.
 static struct lock_class_key rcu_callback_key;
 struct lockdep_map rcu_callback_map =
        STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
@@ -390,13 +392,14 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
                        might_sleep();
                        continue;
                }
-               init_rcu_head_on_stack(&rs_array[i].head);
-               init_completion(&rs_array[i].completion);
                for (j = 0; j < i; j++)
                        if (crcu_array[j] == crcu_array[i])
                                break;
-               if (j == i)
+               if (j == i) {
+                       init_rcu_head_on_stack(&rs_array[i].head);
+                       init_completion(&rs_array[i].completion);
                        (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
+               }
        }
 
        /* Wait for all callbacks to be invoked. */
@@ -407,9 +410,10 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
                for (j = 0; j < i; j++)
                        if (crcu_array[j] == crcu_array[i])
                                break;
-               if (j == i)
+               if (j == i) {
                        wait_for_completion(&rs_array[i].completion);
-               destroy_rcu_head_on_stack(&rs_array[i].head);
+                       destroy_rcu_head_on_stack(&rs_array[i].head);
+               }
        }
 }
 EXPORT_SYMBOL_GPL(__wait_rcu_gp);
index 491f134..e7b78d5 100644 (file)
@@ -26,7 +26,7 @@ int C_A_D = 1;
 struct pid *cad_pid;
 EXPORT_SYMBOL(cad_pid);
 
-#if defined(CONFIG_ARM) || defined(CONFIG_UNICORE32)
+#if defined(CONFIG_ARM)
 #define DEFAULT_REBOOT_MODE            = REBOOT_HARD
 #else
 #define DEFAULT_REBOOT_MODE
index 2142c67..4a0e7b4 100644 (file)
@@ -6,6 +6,10 @@
  *
  *  Copyright (C) 1991-2002  Linus Torvalds
  */
+#define CREATE_TRACE_POINTS
+#include <trace/events/sched.h>
+#undef CREATE_TRACE_POINTS
+
 #include "sched.h"
 
 #include <linux/nospec.h>
@@ -23,9 +27,6 @@
 #include "pelt.h"
 #include "smp.h"
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/sched.h>
-
 /*
  * Export tracepoints that act as a bare tracehook (ie: have no trace event
  * associated with them) to allow external modules to probe them.
@@ -36,6 +37,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
 EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
@@ -75,6 +79,100 @@ __read_mostly int scheduler_running;
  */
 int sysctl_sched_rt_runtime = 950000;
 
+
+/*
+ * Serialization rules:
+ *
+ * Lock order:
+ *
+ *   p->pi_lock
+ *     rq->lock
+ *       hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls)
+ *
+ *  rq1->lock
+ *    rq2->lock  where: rq1 < rq2
+ *
+ * Regular state:
+ *
+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the
+ * local CPU's rq->lock, it optionally removes the task from the runqueue and
+ * always looks at the local rq data structures to find the most elegible task
+ * to run next.
+ *
+ * Task enqueue is also under rq->lock, possibly taken from another CPU.
+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to
+ * the local CPU to avoid bouncing the runqueue state around [ see
+ * ttwu_queue_wakelist() ]
+ *
+ * Task wakeup, specifically wakeups that involve migration, are horribly
+ * complicated to avoid having to take two rq->locks.
+ *
+ * Special state:
+ *
+ * System-calls and anything external will use task_rq_lock() which acquires
+ * both p->pi_lock and rq->lock. As a consequence the state they change is
+ * stable while holding either lock:
+ *
+ *  - sched_setaffinity()/
+ *    set_cpus_allowed_ptr():  p->cpus_ptr, p->nr_cpus_allowed
+ *  - set_user_nice():         p->se.load, p->*prio
+ *  - __sched_setscheduler():  p->sched_class, p->policy, p->*prio,
+ *                             p->se.load, p->rt_priority,
+ *                             p->dl.dl_{runtime, deadline, period, flags, bw, density}
+ *  - sched_setnuma():         p->numa_preferred_nid
+ *  - sched_move_task()/
+ *    cpu_cgroup_fork():       p->sched_task_group
+ *  - uclamp_update_active()   p->uclamp*
+ *
+ * p->state <- TASK_*:
+ *
+ *   is changed locklessly using set_current_state(), __set_current_state() or
+ *   set_special_state(), see their respective comments, or by
+ *   try_to_wake_up(). This latter uses p->pi_lock to serialize against
+ *   concurrent self.
+ *
+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }:
+ *
+ *   is set by activate_task() and cleared by deactivate_task(), under
+ *   rq->lock. Non-zero indicates the task is runnable, the special
+ *   ON_RQ_MIGRATING state is used for migration without holding both
+ *   rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
+ *
+ * p->on_cpu <- { 0, 1 }:
+ *
+ *   is set by prepare_task() and cleared by finish_task() such that it will be
+ *   set before p is scheduled-in and cleared after p is scheduled-out, both
+ *   under rq->lock. Non-zero indicates the task is running on its CPU.
+ *
+ *   [ The astute reader will observe that it is possible for two tasks on one
+ *     CPU to have ->on_cpu = 1 at the same time. ]
+ *
+ * task_cpu(p): is changed by set_task_cpu(), the rules are:
+ *
+ *  - Don't call set_task_cpu() on a blocked task:
+ *
+ *    We don't care what CPU we're not running on, this simplifies hotplug,
+ *    the CPU assignment of blocked tasks isn't required to be valid.
+ *
+ *  - for try_to_wake_up(), called under p->pi_lock:
+ *
+ *    This allows try_to_wake_up() to only take one rq->lock, see its comment.
+ *
+ *  - for migration called under rq->lock:
+ *    [ see task_on_rq_migrating() in task_rq_lock() ]
+ *
+ *    o move_queued_task()
+ *    o detach_task()
+ *
+ *  - for migration called under double_rq_lock():
+ *
+ *    o __migrate_swap_task()
+ *    o push_rt_task() / pull_rt_task()
+ *    o push_dl_task() / pull_dl_task()
+ *    o dl_task_offline_migration()
+ *
+ */
+
 /*
  * __task_rq_lock - lock the rq @p resides on.
  */
@@ -791,9 +889,46 @@ unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
 /* Max allowed maximum utilization */
 unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
 
+/*
+ * By default RT tasks run at the maximum performance point/capacity of the
+ * system. Uclamp enforces this by always setting UCLAMP_MIN of RT tasks to
+ * SCHED_CAPACITY_SCALE.
+ *
+ * This knob allows admins to change the default behavior when uclamp is being
+ * used. In battery powered devices, particularly, running at the maximum
+ * capacity and frequency will increase energy consumption and shorten the
+ * battery life.
+ *
+ * This knob only affects RT tasks that their uclamp_se->user_defined == false.
+ *
+ * This knob will not override the system default sched_util_clamp_min defined
+ * above.
+ */
+unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
+
 /* All clamps are required to be less or equal than these values */
 static struct uclamp_se uclamp_default[UCLAMP_CNT];
 
+/*
+ * This static key is used to reduce the uclamp overhead in the fast path. It
+ * primarily disables the call to uclamp_rq_{inc, dec}() in
+ * enqueue/dequeue_task().
+ *
+ * This allows users to continue to enable uclamp in their kernel config with
+ * minimum uclamp overhead in the fast path.
+ *
+ * As soon as userspace modifies any of the uclamp knobs, the static key is
+ * enabled, since we have an actual users that make use of uclamp
+ * functionality.
+ *
+ * The knobs that would enable this static key are:
+ *
+ *   * A task modifying its uclamp value with sched_setattr().
+ *   * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs.
+ *   * An admin modifying the cgroup cpu.uclamp.{min, max}
+ */
+DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
+
 /* Integer rounded range for each bucket */
 #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
 
@@ -873,6 +1008,64 @@ unsigned int uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
        return uclamp_idle_value(rq, clamp_id, clamp_value);
 }
 
+static void __uclamp_update_util_min_rt_default(struct task_struct *p)
+{
+       unsigned int default_util_min;
+       struct uclamp_se *uc_se;
+
+       lockdep_assert_held(&p->pi_lock);
+
+       uc_se = &p->uclamp_req[UCLAMP_MIN];
+
+       /* Only sync if user didn't override the default */
+       if (uc_se->user_defined)
+               return;
+
+       default_util_min = sysctl_sched_uclamp_util_min_rt_default;
+       uclamp_se_set(uc_se, default_util_min, false);
+}
+
+static void uclamp_update_util_min_rt_default(struct task_struct *p)
+{
+       struct rq_flags rf;
+       struct rq *rq;
+
+       if (!rt_task(p))
+               return;
+
+       /* Protect updates to p->uclamp_* */
+       rq = task_rq_lock(p, &rf);
+       __uclamp_update_util_min_rt_default(p);
+       task_rq_unlock(rq, p, &rf);
+}
+
+static void uclamp_sync_util_min_rt_default(void)
+{
+       struct task_struct *g, *p;
+
+       /*
+        * copy_process()                       sysctl_uclamp
+        *                                        uclamp_min_rt = X;
+        *   write_lock(&tasklist_lock)           read_lock(&tasklist_lock)
+        *   // link thread                       smp_mb__after_spinlock()
+        *   write_unlock(&tasklist_lock)         read_unlock(&tasklist_lock);
+        *   sched_post_fork()                    for_each_process_thread()
+        *     __uclamp_sync_rt()                   __uclamp_sync_rt()
+        *
+        * Ensures that either sched_post_fork() will observe the new
+        * uclamp_min_rt or for_each_process_thread() will observe the new
+        * task.
+        */
+       read_lock(&tasklist_lock);
+       smp_mb__after_spinlock();
+       read_unlock(&tasklist_lock);
+
+       rcu_read_lock();
+       for_each_process_thread(g, p)
+               uclamp_update_util_min_rt_default(p);
+       rcu_read_unlock();
+}
+
 static inline struct uclamp_se
 uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
 {
@@ -990,10 +1183,38 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
 
        lockdep_assert_held(&rq->lock);
 
+       /*
+        * If sched_uclamp_used was enabled after task @p was enqueued,
+        * we could end up with unbalanced call to uclamp_rq_dec_id().
+        *
+        * In this case the uc_se->active flag should be false since no uclamp
+        * accounting was performed at enqueue time and we can just return
+        * here.
+        *
+        * Need to be careful of the following enqeueue/dequeue ordering
+        * problem too
+        *
+        *      enqueue(taskA)
+        *      // sched_uclamp_used gets enabled
+        *      enqueue(taskB)
+        *      dequeue(taskA)
+        *      // Must not decrement bukcet->tasks here
+        *      dequeue(taskB)
+        *
+        * where we could end up with stale data in uc_se and
+        * bucket[uc_se->bucket_id].
+        *
+        * The following check here eliminates the possibility of such race.
+        */
+       if (unlikely(!uc_se->active))
+               return;
+
        bucket = &uc_rq->bucket[uc_se->bucket_id];
+
        SCHED_WARN_ON(!bucket->tasks);
        if (likely(bucket->tasks))
                bucket->tasks--;
+
        uc_se->active = false;
 
        /*
@@ -1021,6 +1242,15 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
 {
        enum uclamp_id clamp_id;
 
+       /*
+        * Avoid any overhead until uclamp is actually used by the userspace.
+        *
+        * The condition is constructed such that a NOP is generated when
+        * sched_uclamp_used is disabled.
+        */
+       if (!static_branch_unlikely(&sched_uclamp_used))
+               return;
+
        if (unlikely(!p->sched_class->uclamp_enabled))
                return;
 
@@ -1036,6 +1266,15 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
 {
        enum uclamp_id clamp_id;
 
+       /*
+        * Avoid any overhead until uclamp is actually used by the userspace.
+        *
+        * The condition is constructed such that a NOP is generated when
+        * sched_uclamp_used is disabled.
+        */
+       if (!static_branch_unlikely(&sched_uclamp_used))
+               return;
+
        if (unlikely(!p->sched_class->uclamp_enabled))
                return;
 
@@ -1114,12 +1353,13 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
                                void *buffer, size_t *lenp, loff_t *ppos)
 {
        bool update_root_tg = false;
-       int old_min, old_max;
+       int old_min, old_max, old_min_rt;
        int result;
 
        mutex_lock(&uclamp_mutex);
        old_min = sysctl_sched_uclamp_util_min;
        old_max = sysctl_sched_uclamp_util_max;
+       old_min_rt = sysctl_sched_uclamp_util_min_rt_default;
 
        result = proc_dointvec(table, write, buffer, lenp, ppos);
        if (result)
@@ -1128,7 +1368,9 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
                goto done;
 
        if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
-           sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
+           sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE ||
+           sysctl_sched_uclamp_util_min_rt_default > SCHED_CAPACITY_SCALE) {
+
                result = -EINVAL;
                goto undo;
        }
@@ -1144,8 +1386,15 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
                update_root_tg = true;
        }
 
-       if (update_root_tg)
+       if (update_root_tg) {
+               static_branch_enable(&sched_uclamp_used);
                uclamp_update_root_tg();
+       }
+
+       if (old_min_rt != sysctl_sched_uclamp_util_min_rt_default) {
+               static_branch_enable(&sched_uclamp_used);
+               uclamp_sync_util_min_rt_default();
+       }
 
        /*
         * We update all RUNNABLE tasks only when task groups are in use.
@@ -1158,6 +1407,7 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
 undo:
        sysctl_sched_uclamp_util_min = old_min;
        sysctl_sched_uclamp_util_max = old_max;
+       sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
 done:
        mutex_unlock(&uclamp_mutex);
 
@@ -1180,6 +1430,15 @@ static int uclamp_validate(struct task_struct *p,
        if (upper_bound > SCHED_CAPACITY_SCALE)
                return -EINVAL;
 
+       /*
+        * We have valid uclamp attributes; make sure uclamp is enabled.
+        *
+        * We need to do that here, because enabling static branches is a
+        * blocking operation which obviously cannot be done while holding
+        * scheduler locks.
+        */
+       static_branch_enable(&sched_uclamp_used);
+
        return 0;
 }
 
@@ -1194,17 +1453,20 @@ static void __setscheduler_uclamp(struct task_struct *p,
         */
        for_each_clamp_id(clamp_id) {
                struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
-               unsigned int clamp_value = uclamp_none(clamp_id);
 
                /* Keep using defined clamps across class changes */
                if (uc_se->user_defined)
                        continue;
 
-               /* By default, RT tasks always get 100% boost */
+               /*
+                * RT by default have a 100% boost value that could be modified
+                * at runtime.
+                */
                if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
-                       clamp_value = uclamp_none(UCLAMP_MAX);
+                       __uclamp_update_util_min_rt_default(p);
+               else
+                       uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
 
-               uclamp_se_set(uc_se, clamp_value, false);
        }
 
        if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
@@ -1225,6 +1487,10 @@ static void uclamp_fork(struct task_struct *p)
 {
        enum uclamp_id clamp_id;
 
+       /*
+        * We don't need to hold task_rq_lock() when updating p->uclamp_* here
+        * as the task is still at its early fork stages.
+        */
        for_each_clamp_id(clamp_id)
                p->uclamp[clamp_id].active = false;
 
@@ -1237,19 +1503,33 @@ static void uclamp_fork(struct task_struct *p)
        }
 }
 
+static void uclamp_post_fork(struct task_struct *p)
+{
+       uclamp_update_util_min_rt_default(p);
+}
+
+static void __init init_uclamp_rq(struct rq *rq)
+{
+       enum uclamp_id clamp_id;
+       struct uclamp_rq *uc_rq = rq->uclamp;
+
+       for_each_clamp_id(clamp_id) {
+               uc_rq[clamp_id] = (struct uclamp_rq) {
+                       .value = uclamp_none(clamp_id)
+               };
+       }
+
+       rq->uclamp_flags = 0;
+}
+
 static void __init init_uclamp(void)
 {
        struct uclamp_se uc_max = {};
        enum uclamp_id clamp_id;
        int cpu;
 
-       mutex_init(&uclamp_mutex);
-
-       for_each_possible_cpu(cpu) {
-               memset(&cpu_rq(cpu)->uclamp, 0,
-                               sizeof(struct uclamp_rq)*UCLAMP_CNT);
-               cpu_rq(cpu)->uclamp_flags = 0;
-       }
+       for_each_possible_cpu(cpu)
+               init_uclamp_rq(cpu_rq(cpu));
 
        for_each_clamp_id(clamp_id) {
                uclamp_se_set(&init_task.uclamp_req[clamp_id],
@@ -1278,6 +1558,7 @@ static inline int uclamp_validate(struct task_struct *p,
 static void __setscheduler_uclamp(struct task_struct *p,
                                  const struct sched_attr *attr) { }
 static inline void uclamp_fork(struct task_struct *p) { }
+static inline void uclamp_post_fork(struct task_struct *p) { }
 static inline void init_uclamp(void) { }
 #endif /* CONFIG_UCLAMP_TASK */
 
@@ -1404,20 +1685,10 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 
 void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
-       const struct sched_class *class;
-
-       if (p->sched_class == rq->curr->sched_class) {
+       if (p->sched_class == rq->curr->sched_class)
                rq->curr->sched_class->check_preempt_curr(rq, p, flags);
-       } else {
-               for_each_class(class) {
-                       if (class == rq->curr->sched_class)
-                               break;
-                       if (class == p->sched_class) {
-                               resched_curr(rq);
-                               break;
-                       }
-               }
-       }
+       else if (p->sched_class > rq->curr->sched_class)
+               resched_curr(rq);
 
        /*
         * A queue event has occurred, and we're going to schedule.  In
@@ -1468,8 +1739,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 {
        lockdep_assert_held(&rq->lock);
 
-       WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-       dequeue_task(rq, p, DEQUEUE_NOCLOCK);
+       deactivate_task(rq, p, DEQUEUE_NOCLOCK);
        set_task_cpu(p, new_cpu);
        rq_unlock(rq, rf);
 
@@ -1477,8 +1747,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 
        rq_lock(rq, rf);
        BUG_ON(task_cpu(p) != new_cpu);
-       enqueue_task(rq, p, 0);
-       p->on_rq = TASK_ON_RQ_QUEUED;
+       activate_task(rq, p, 0);
        check_preempt_curr(rq, p, 0);
 
        return rq;
@@ -2243,12 +2512,31 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 }
 
 /*
- * Called in case the task @p isn't fully descheduled from its runqueue,
- * in this case we must do a remote wakeup. Its a 'light' wakeup though,
- * since all we need to do is flip p->state to TASK_RUNNING, since
- * the task is still ->on_rq.
+ * Consider @p being inside a wait loop:
+ *
+ *   for (;;) {
+ *      set_current_state(TASK_UNINTERRUPTIBLE);
+ *
+ *      if (CONDITION)
+ *         break;
+ *
+ *      schedule();
+ *   }
+ *   __set_current_state(TASK_RUNNING);
+ *
+ * between set_current_state() and schedule(). In this case @p is still
+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
+ * an atomic manner.
+ *
+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
+ * then schedule() must still happen and p->state can be changed to
+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
+ * need to do a full wakeup with enqueue.
+ *
+ * Returns: %true when the wakeup is done,
+ *          %false otherwise.
  */
-static int ttwu_remote(struct task_struct *p, int wake_flags)
+static int ttwu_runnable(struct task_struct *p, int wake_flags)
 {
        struct rq_flags rf;
        struct rq *rq;
@@ -2389,6 +2677,14 @@ static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
 
        return false;
 }
+
+#else /* !CONFIG_SMP */
+
+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
+{
+       return false;
+}
+
 #endif /* CONFIG_SMP */
 
 static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
@@ -2396,10 +2692,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
        struct rq *rq = cpu_rq(cpu);
        struct rq_flags rf;
 
-#if defined(CONFIG_SMP)
        if (ttwu_queue_wakelist(p, cpu, wake_flags))
                return;
-#endif
 
        rq_lock(rq, &rf);
        update_rq_clock(rq);
@@ -2455,8 +2749,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * migration. However the means are completely different as there is no lock
  * chain to provide order. Instead we do:
  *
- *   1) smp_store_release(X->on_cpu, 0)
- *   2) smp_cond_load_acquire(!X->on_cpu)
+ *   1) smp_store_release(X->on_cpu, 0)   -- finish_task()
+ *   2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up()
  *
  * Example:
  *
@@ -2496,15 +2790,33 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * @state: the mask of task states that can be woken
  * @wake_flags: wake modifier flags (WF_*)
  *
- * If (@state & @p->state) @p->state = TASK_RUNNING.
+ * Conceptually does:
+ *
+ *   If (@state & @p->state) @p->state = TASK_RUNNING.
  *
  * If the task was not queued/runnable, also place it back on a runqueue.
  *
- * Atomic against schedule() which would dequeue a task, also see
- * set_current_state().
+ * This function is atomic against schedule() which would dequeue the task.
+ *
+ * It issues a full memory barrier before accessing @p->state, see the comment
+ * with set_current_state().
  *
- * This function executes a full memory barrier before accessing the task
- * state; see set_current_state().
+ * Uses p->pi_lock to serialize against concurrent wake-ups.
+ *
+ * Relies on p->pi_lock stabilizing:
+ *  - p->sched_class
+ *  - p->cpus_ptr
+ *  - p->sched_task_group
+ * in order to do migration, see its use of select_task_rq()/set_task_cpu().
+ *
+ * Tries really hard to only take one task_rq(p)->lock for performance.
+ * Takes rq->lock in:
+ *  - ttwu_runnable()    -- old rq, unavoidable, see comment there;
+ *  - ttwu_queue()       -- new rq, for enqueue of the task;
+ *  - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us.
+ *
+ * As a consequence we race really badly with just about everything. See the
+ * many memory barriers and their comments for details.
  *
  * Return: %true if @p->state changes (an actual wakeup was done),
  *        %false otherwise.
@@ -2520,7 +2832,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                /*
                 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
                 * == smp_processor_id()'. Together this means we can special
-                * case the whole 'p->on_rq && ttwu_remote()' case below
+                * case the whole 'p->on_rq && ttwu_runnable()' case below
                 * without taking any locks.
                 *
                 * In particular:
@@ -2541,8 +2853,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        /*
         * If we are going to wake up a thread waiting for CONDITION we
         * need to ensure that CONDITION=1 done by the caller can not be
-        * reordered with p->state check below. This pairs with mb() in
-        * set_current_state() the waiting thread does.
+        * reordered with p->state check below. This pairs with smp_store_mb()
+        * in set_current_state() that the waiting thread does.
         */
        raw_spin_lock_irqsave(&p->pi_lock, flags);
        smp_mb__after_spinlock();
@@ -2577,7 +2889,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
         */
        smp_rmb();
-       if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags))
+       if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
                goto unlock;
 
        if (p->in_iowait) {
@@ -2990,6 +3302,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
        return 0;
 }
 
+void sched_post_fork(struct task_struct *p)
+{
+       uclamp_post_fork(p);
+}
+
 unsigned long to_ratio(u64 period, u64 runtime)
 {
        if (runtime == RUNTIME_INF)
@@ -3147,8 +3464,10 @@ static inline void prepare_task(struct task_struct *next)
        /*
         * Claim the task as running, we do this before switching to it
         * such that any running task will have this set.
+        *
+        * See the ttwu() WF_ON_CPU case and its ordering comment.
         */
-       next->on_cpu = 1;
+       WRITE_ONCE(next->on_cpu, 1);
 #endif
 }
 
@@ -3156,8 +3475,9 @@ static inline void finish_task(struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
        /*
-        * After ->on_cpu is cleared, the task can be moved to a different CPU.
-        * We must ensure this doesn't happen until the switch is completely
+        * This must be the very last reference to @prev from this CPU. After
+        * p->on_cpu is cleared, the task can be moved to a different CPU. We
+        * must ensure this doesn't happen until the switch is completely
         * finished.
         *
         * In particular, the load of prev->state in finish_task_switch() must
@@ -3656,17 +3976,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
        return ns;
 }
 
-DEFINE_PER_CPU(unsigned long, thermal_pressure);
-
-void arch_set_thermal_pressure(struct cpumask *cpus,
-                              unsigned long th_pressure)
-{
-       int cpu;
-
-       for_each_cpu(cpu, cpus)
-               WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-}
-
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -4029,8 +4338,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
         * higher scheduling class, because otherwise those loose the
         * opportunity to pull in more work from other CPUs.
         */
-       if (likely((prev->sched_class == &idle_sched_class ||
-                   prev->sched_class == &fair_sched_class) &&
+       if (likely(prev->sched_class <= &fair_sched_class &&
                   rq->nr_running == rq->cfs.h_nr_running)) {
 
                p = pick_next_task_fair(rq, prev, rf);
@@ -5519,6 +5827,11 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
                kattr.sched_nice = task_nice(p);
 
 #ifdef CONFIG_UCLAMP_TASK
+       /*
+        * This could race with another potential updater, but this is fine
+        * because it'll correctly read the old or the new value. We don't need
+        * to guarantee who wins the race as long as it doesn't return garbage.
+        */
        kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
        kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
 #endif
@@ -5876,7 +6189,7 @@ again:
        if (task_running(p_rq, p) || p->state)
                goto out_unlock;
 
-       yielded = curr->sched_class->yield_to_task(rq, p, preempt);
+       yielded = curr->sched_class->yield_to_task(rq, p);
        if (yielded) {
                schedstat_inc(rq->yld_count);
                /*
@@ -6710,6 +7023,14 @@ void __init sched_init(void)
        unsigned long ptr = 0;
        int i;
 
+       /* Make sure the linker didn't screw up */
+       BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
+              &fair_sched_class + 1 != &rt_sched_class ||
+              &rt_sched_class + 1   != &dl_sched_class);
+#ifdef CONFIG_SMP
+       BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
+#endif
+
        wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -7431,6 +7752,8 @@ static ssize_t cpu_uclamp_write(struct kernfs_open_file *of, char *buf,
        if (req.ret)
                return req.ret;
 
+       static_branch_enable(&sched_uclamp_used);
+
        mutex_lock(&uclamp_mutex);
        rcu_read_lock();
 
@@ -8118,4 +8441,7 @@ const u32 sched_prio_to_wmult[40] = {
  /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
 };
 
-#undef CREATE_TRACE_POINTS
+void call_trace_sched_update_nr_running(struct rq *rq, int count)
+{
+        trace_sched_update_nr_running_tp(rq, count);
+}
index 5cc4012..8cb06c8 100644 (file)
@@ -121,6 +121,30 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 
        if (later_mask &&
            cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
+               unsigned long cap, max_cap = 0;
+               int cpu, max_cpu = -1;
+
+               if (!static_branch_unlikely(&sched_asym_cpucapacity))
+                       return 1;
+
+               /* Ensure the capacity of the CPUs fits the task. */
+               for_each_cpu(cpu, later_mask) {
+                       if (!dl_task_fits_capacity(p, cpu)) {
+                               cpumask_clear_cpu(cpu, later_mask);
+
+                               cap = capacity_orig_of(cpu);
+
+                               if (cap > max_cap ||
+                                   (cpu == task_cpu(p) && cap == max_cap)) {
+                                       max_cap = cap;
+                                       max_cpu = cpu;
+                               }
+                       }
+               }
+
+               if (cpumask_empty(later_mask))
+                       cpumask_set_cpu(max_cpu, later_mask);
+
                return 1;
        } else {
                int best_cpu = cpudl_maximum(cp);
index 7fbaee2..dc6835b 100644 (file)
@@ -210,7 +210,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
        unsigned long dl_util, util, irq;
        struct rq *rq = cpu_rq(cpu);
 
-       if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) &&
+       if (!uclamp_is_used() &&
            type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
                return max;
        }
index ff9435d..5a55d23 100644 (file)
@@ -519,50 +519,6 @@ void account_idle_ticks(unsigned long ticks)
        account_idle_time(cputime);
 }
 
-/*
- * Perform (stime * rtime) / total, but avoid multiplication overflow by
- * losing precision when the numbers are big.
- */
-static u64 scale_stime(u64 stime, u64 rtime, u64 total)
-{
-       u64 scaled;
-
-       for (;;) {
-               /* Make sure "rtime" is the bigger of stime/rtime */
-               if (stime > rtime)
-                       swap(rtime, stime);
-
-               /* Make sure 'total' fits in 32 bits */
-               if (total >> 32)
-                       goto drop_precision;
-
-               /* Does rtime (and thus stime) fit in 32 bits? */
-               if (!(rtime >> 32))
-                       break;
-
-               /* Can we just balance rtime/stime rather than dropping bits? */
-               if (stime >> 31)
-                       goto drop_precision;
-
-               /* We can grow stime and shrink rtime and try to make them both fit */
-               stime <<= 1;
-               rtime >>= 1;
-               continue;
-
-drop_precision:
-               /* We drop from rtime, it has more bits than stime */
-               rtime >>= 1;
-               total >>= 1;
-       }
-
-       /*
-        * Make sure gcc understands that this is a 32x32->64 multiply,
-        * followed by a 64/32->64 divide.
-        */
-       scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
-       return scaled;
-}
-
 /*
  * Adjust tick based cputime random precision against scheduler runtime
  * accounting.
@@ -622,7 +578,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
                goto update;
        }
 
-       stime = scale_stime(stime, rtime, stime + utime);
+       stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
 
 update:
        /*
index f63f337..3862a28 100644 (file)
@@ -54,15 +54,49 @@ static inline struct dl_bw *dl_bw_of(int i)
 static inline int dl_bw_cpus(int i)
 {
        struct root_domain *rd = cpu_rq(i)->rd;
-       int cpus = 0;
+       int cpus;
 
        RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
                         "sched RCU must be held");
+
+       if (cpumask_subset(rd->span, cpu_active_mask))
+               return cpumask_weight(rd->span);
+
+       cpus = 0;
+
        for_each_cpu_and(i, rd->span, cpu_active_mask)
                cpus++;
 
        return cpus;
 }
+
+static inline unsigned long __dl_bw_capacity(int i)
+{
+       struct root_domain *rd = cpu_rq(i)->rd;
+       unsigned long cap = 0;
+
+       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+                        "sched RCU must be held");
+
+       for_each_cpu_and(i, rd->span, cpu_active_mask)
+               cap += capacity_orig_of(i);
+
+       return cap;
+}
+
+/*
+ * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
+ * of the CPU the task is running on rather rd's \Sum CPU capacity.
+ */
+static inline unsigned long dl_bw_capacity(int i)
+{
+       if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
+           capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
+               return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
+       } else {
+               return __dl_bw_capacity(i);
+       }
+}
 #else
 static inline struct dl_bw *dl_bw_of(int i)
 {
@@ -73,6 +107,11 @@ static inline int dl_bw_cpus(int i)
 {
        return 1;
 }
+
+static inline unsigned long dl_bw_capacity(int i)
+{
+       return SCHED_CAPACITY_SCALE;
+}
 #endif
 
 static inline
@@ -1098,7 +1137,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
  * cannot use the runtime, and so it replenishes the task. This rule
  * works fine for implicit deadline tasks (deadline == period), and the
  * CBS was designed for implicit deadline tasks. However, a task with
- * constrained deadline (deadine < period) might be awakened after the
+ * constrained deadline (deadline < period) might be awakened after the
  * deadline, but before the next period. In this case, replenishing the
  * task would allow it to run for runtime / deadline. As in this case
  * deadline < period, CBS enables a task to run for more than the
@@ -1604,6 +1643,7 @@ static int
 select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
        struct task_struct *curr;
+       bool select_rq;
        struct rq *rq;
 
        if (sd_flag != SD_BALANCE_WAKE)
@@ -1623,10 +1663,19 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
         * other hand, if it has a shorter deadline, we
         * try to make it stay here, it might be important.
         */
-       if (unlikely(dl_task(curr)) &&
-           (curr->nr_cpus_allowed < 2 ||
-            !dl_entity_preempt(&p->dl, &curr->dl)) &&
-           (p->nr_cpus_allowed > 1)) {
+       select_rq = unlikely(dl_task(curr)) &&
+                   (curr->nr_cpus_allowed < 2 ||
+                    !dl_entity_preempt(&p->dl, &curr->dl)) &&
+                   p->nr_cpus_allowed > 1;
+
+       /*
+        * Take the capacity of the CPU into account to
+        * ensure it fits the requirement of the task.
+        */
+       if (static_branch_unlikely(&sched_asym_cpucapacity))
+               select_rq |= !dl_task_fits_capacity(p, cpu);
+
+       if (select_rq) {
                int target = find_later_rq(p);
 
                if (target != -1 &&
@@ -2430,8 +2479,8 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
        }
 }
 
-const struct sched_class dl_sched_class = {
-       .next                   = &rt_sched_class,
+const struct sched_class dl_sched_class
+       __attribute__((section("__dl_sched_class"))) = {
        .enqueue_task           = enqueue_task_dl,
        .dequeue_task           = dequeue_task_dl,
        .yield_task             = yield_task_dl,
@@ -2551,11 +2600,12 @@ void sched_dl_do_global(void)
 int sched_dl_overflow(struct task_struct *p, int policy,
                      const struct sched_attr *attr)
 {
-       struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
        u64 period = attr->sched_period ?: attr->sched_deadline;
        u64 runtime = attr->sched_runtime;
        u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
-       int cpus, err = -1;
+       int cpus, err = -1, cpu = task_cpu(p);
+       struct dl_bw *dl_b = dl_bw_of(cpu);
+       unsigned long cap;
 
        if (attr->sched_flags & SCHED_FLAG_SUGOV)
                return 0;
@@ -2570,15 +2620,17 @@ int sched_dl_overflow(struct task_struct *p, int policy,
         * allocated bandwidth of the container.
         */
        raw_spin_lock(&dl_b->lock);
-       cpus = dl_bw_cpus(task_cpu(p));
+       cpus = dl_bw_cpus(cpu);
+       cap = dl_bw_capacity(cpu);
+
        if (dl_policy(policy) && !task_has_dl_policy(p) &&
-           !__dl_overflow(dl_b, cpus, 0, new_bw)) {
+           !__dl_overflow(dl_b, cap, 0, new_bw)) {
                if (hrtimer_active(&p->dl.inactive_timer))
                        __dl_sub(dl_b, p->dl.dl_bw, cpus);
                __dl_add(dl_b, new_bw, cpus);
                err = 0;
        } else if (dl_policy(policy) && task_has_dl_policy(p) &&
-                  !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
+                  !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
                /*
                 * XXX this is slightly incorrect: when the task
                 * utilization decreases, we should delay the total
@@ -2634,6 +2686,14 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
        attr->sched_flags = dl_se->flags;
 }
 
+/*
+ * Default limits for DL period; on the top end we guard against small util
+ * tasks still getting rediculous long effective runtimes, on the bottom end we
+ * guard against timer DoS.
+ */
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
+unsigned int sysctl_sched_dl_period_min = 100;     /* 100 us */
+
 /*
  * This function validates the new parameters of a -deadline task.
  * We ask for the deadline not being zero, and greater or equal
@@ -2646,6 +2706,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
  */
 bool __checkparam_dl(const struct sched_attr *attr)
 {
+       u64 period, max, min;
+
        /* special dl tasks don't actually use any parameter */
        if (attr->sched_flags & SCHED_FLAG_SUGOV)
                return true;
@@ -2669,12 +2731,21 @@ bool __checkparam_dl(const struct sched_attr *attr)
            attr->sched_period & (1ULL << 63))
                return false;
 
+       period = attr->sched_period;
+       if (!period)
+               period = attr->sched_deadline;
+
        /* runtime <= deadline <= period (if period != 0) */
-       if ((attr->sched_period != 0 &&
-            attr->sched_period < attr->sched_deadline) ||
+       if (period < attr->sched_deadline ||
            attr->sched_deadline < attr->sched_runtime)
                return false;
 
+       max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
+       min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
+
+       if (period < min || period > max)
+               return false;
+
        return true;
 }
 
@@ -2715,19 +2786,19 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
 #ifdef CONFIG_SMP
 int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
 {
+       unsigned long flags, cap;
        unsigned int dest_cpu;
        struct dl_bw *dl_b;
        bool overflow;
-       int cpus, ret;
-       unsigned long flags;
+       int ret;
 
        dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
 
        rcu_read_lock_sched();
        dl_b = dl_bw_of(dest_cpu);
        raw_spin_lock_irqsave(&dl_b->lock, flags);
-       cpus = dl_bw_cpus(dest_cpu);
-       overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
+       cap = dl_bw_capacity(dest_cpu);
+       overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
        if (overflow) {
                ret = -EBUSY;
        } else {
@@ -2737,6 +2808,8 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo
                 * We will free resources in the source root_domain
                 * later on (see set_cpus_allowed_dl()).
                 */
+               int cpus = dl_bw_cpus(dest_cpu);
+
                __dl_add(dl_b, p->dl.dl_bw, cpus);
                ret = 0;
        }
@@ -2769,16 +2842,15 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
 
 bool dl_cpu_busy(unsigned int cpu)
 {
-       unsigned long flags;
+       unsigned long flags, cap;
        struct dl_bw *dl_b;
        bool overflow;
-       int cpus;
 
        rcu_read_lock_sched();
        dl_b = dl_bw_of(cpu);
        raw_spin_lock_irqsave(&dl_b->lock, flags);
-       cpus = dl_bw_cpus(cpu);
-       overflow = __dl_overflow(dl_b, cpus, 0, 0);
+       cap = dl_bw_capacity(cpu);
+       overflow = __dl_overflow(dl_b, cap, 0, 0);
        raw_spin_unlock_irqrestore(&dl_b->lock, flags);
        rcu_read_unlock_sched();
 
index 04fa8db..2ba8f23 100644 (file)
@@ -22,8 +22,6 @@
  */
 #include "sched.h"
 
-#include <trace/events/sched.h>
-
 /*
  * Targeted preemption latency for CPU-bound tasks:
  *
@@ -3094,7 +3092,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 
 #ifdef CONFIG_SMP
        do {
-               u32 divider = LOAD_AVG_MAX - 1024 + se->avg.period_contrib;
+               u32 divider = get_pelt_divider(&se->avg);
 
                se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, divider);
        } while (0);
@@ -3440,16 +3438,18 @@ static inline void
 update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
        long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
-       /*
-        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
-        * See ___update_load_avg() for details.
-        */
-       u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+       u32 divider;
 
        /* Nothing to update */
        if (!delta)
                return;
 
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       divider = get_pelt_divider(&cfs_rq->avg);
+
        /* Set new sched_entity's utilization */
        se->avg.util_avg = gcfs_rq->avg.util_avg;
        se->avg.util_sum = se->avg.util_avg * divider;
@@ -3463,16 +3463,18 @@ static inline void
 update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
        long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
-       /*
-        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
-        * See ___update_load_avg() for details.
-        */
-       u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+       u32 divider;
 
        /* Nothing to update */
        if (!delta)
                return;
 
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       divider = get_pelt_divider(&cfs_rq->avg);
+
        /* Set new sched_entity's runnable */
        se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
        se->avg.runnable_sum = se->avg.runnable_avg * divider;
@@ -3500,7 +3502,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
         * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
         * See ___update_load_avg() for details.
         */
-       divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+       divider = get_pelt_divider(&cfs_rq->avg);
 
        if (runnable_sum >= 0) {
                /*
@@ -3646,7 +3648,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 
        if (cfs_rq->removed.nr) {
                unsigned long r;
-               u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
+               u32 divider = get_pelt_divider(&cfs_rq->avg);
 
                raw_spin_lock(&cfs_rq->removed.lock);
                swap(cfs_rq->removed.util_avg, removed_util);
@@ -3701,7 +3703,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
         * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
         * See ___update_load_avg() for details.
         */
-       u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
+       u32 divider = get_pelt_divider(&cfs_rq->avg);
 
        /*
         * When we attach the @se to the @cfs_rq, we must align the decay
@@ -3922,6 +3924,8 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
        enqueued  = cfs_rq->avg.util_est.enqueued;
        enqueued += _task_util_est(p);
        WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
+
+       trace_sched_util_est_cfs_tp(cfs_rq);
 }
 
 /*
@@ -3952,6 +3956,8 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
        ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p));
        WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
 
+       trace_sched_util_est_cfs_tp(cfs_rq);
+
        /*
         * Skip update of task's estimated utilization when the task has not
         * yet completed an activation, e.g. being migrated.
@@ -4017,6 +4023,8 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
        ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
 done:
        WRITE_ONCE(p->se.avg.util_est, ue);
+
+       trace_sched_util_est_se_tp(&p->se);
 }
 
 static inline int task_fits_capacity(struct task_struct *p, long capacity)
@@ -5618,14 +5626,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
        }
 
-dequeue_throttle:
-       if (!se)
-               sub_nr_running(rq, 1);
+       /* At this point se is NULL and we are at root level*/
+       sub_nr_running(rq, 1);
 
        /* balance early to pull high priority tasks */
        if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
                rq->next_balance = jiffies;
 
+dequeue_throttle:
        util_est_dequeue(&rq->cfs, p, task_sleep);
        hrtick_update(rq);
 }
@@ -7161,7 +7169,7 @@ static void yield_task_fair(struct rq *rq)
        set_skip_buddy(se);
 }
 
-static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
+static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
 {
        struct sched_entity *se = &p->se;
 
@@ -8049,7 +8057,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
        };
 }
 
-static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
+static unsigned long scale_rt_capacity(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
        unsigned long max = arch_scale_cpu_capacity(cpu);
@@ -8081,7 +8089,7 @@ static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
 
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
-       unsigned long capacity = scale_rt_capacity(sd, cpu);
+       unsigned long capacity = scale_rt_capacity(cpu);
        struct sched_group *sdg = sd->groups;
 
        cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
@@ -8703,8 +8711,14 @@ static bool update_pick_idlest(struct sched_group *idlest,
 
        case group_has_spare:
                /* Select group with most idle CPUs */
-               if (idlest_sgs->idle_cpus >= sgs->idle_cpus)
+               if (idlest_sgs->idle_cpus > sgs->idle_cpus)
+                       return false;
+
+               /* Select group with lowest group_util */
+               if (idlest_sgs->idle_cpus == sgs->idle_cpus &&
+                       idlest_sgs->group_util <= sgs->group_util)
                        return false;
+
                break;
        }
 
@@ -10027,7 +10041,12 @@ static void kick_ilb(unsigned int flags)
 {
        int ilb_cpu;
 
-       nohz.next_balance++;
+       /*
+        * Increase nohz.next_balance only when if full ilb is triggered but
+        * not if we only update stats.
+        */
+       if (flags & NOHZ_BALANCE_KICK)
+               nohz.next_balance = jiffies+1;
 
        ilb_cpu = find_new_ilb();
 
@@ -10348,6 +10367,14 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
                }
        }
 
+       /*
+        * next_balance will be updated only when there is a need.
+        * When the CPU is attached to null domain for ex, it will not be
+        * updated.
+        */
+       if (likely(update_next_balance))
+               nohz.next_balance = next_balance;
+
        /* Newly idle CPU doesn't need an update */
        if (idle != CPU_NEWLY_IDLE) {
                update_blocked_averages(this_cpu);
@@ -10368,14 +10395,6 @@ abort:
        if (has_blocked_load)
                WRITE_ONCE(nohz.has_blocked, 1);
 
-       /*
-        * next_balance will be updated only when there is a need.
-        * When the CPU is attached to null domain for ex, it will not be
-        * updated.
-        */
-       if (likely(update_next_balance))
-               nohz.next_balance = next_balance;
-
        return ret;
 }
 
@@ -11118,8 +11137,8 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
 /*
  * All the scheduling class methods:
  */
-const struct sched_class fair_sched_class = {
-       .next                   = &idle_sched_class,
+const struct sched_class fair_sched_class
+       __attribute__((section("__fair_sched_class"))) = {
        .enqueue_task           = enqueue_task_fair,
        .dequeue_task           = dequeue_task_fair,
        .yield_task             = yield_task_fair,
@@ -11292,3 +11311,9 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
 #endif
 }
 EXPORT_SYMBOL_GPL(sched_trace_rd_span);
+
+int sched_trace_rq_nr_running(struct rq *rq)
+{
+        return rq ? rq->nr_running : -1;
+}
+EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running);
index 1ae95b9..6bf3498 100644 (file)
@@ -453,11 +453,6 @@ prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
        BUG();
 }
 
-static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
-{
-       return 0;
-}
-
 static void update_curr_idle(struct rq *rq)
 {
 }
@@ -465,8 +460,8 @@ static void update_curr_idle(struct rq *rq)
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
-const struct sched_class idle_sched_class = {
-       /* .next is NULL */
+const struct sched_class idle_sched_class
+       __attribute__((section("__idle_sched_class"))) = {
        /* no enqueue/yield_task for idle tasks */
 
        /* dequeue is not valid, we print a debug message there: */
@@ -486,8 +481,6 @@ const struct sched_class idle_sched_class = {
 
        .task_tick              = task_tick_idle,
 
-       .get_rr_interval        = get_rr_interval_idle,
-
        .prio_changed           = prio_changed_idle,
        .switched_to            = switched_to_idle,
        .update_curr            = update_curr_idle,
index 808244f..5a6ea03 100644 (file)
@@ -140,7 +140,8 @@ static int __init housekeeping_nohz_full_setup(char *str)
 {
        unsigned int flags;
 
-       flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
+       flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
+               HK_FLAG_MISC | HK_FLAG_KTHREAD;
 
        return housekeeping_setup(str, flags);
 }
index de22da6..d2a6556 100644 (file)
@@ -347,7 +347,7 @@ static inline void calc_global_nohz(void) { }
  *
  * Called from the global timer code.
  */
-void calc_global_load(unsigned long ticks)
+void calc_global_load(void)
 {
        unsigned long sample_window;
        long active, delta;
index b4b1ff9..2c613e1 100644 (file)
@@ -28,8 +28,6 @@
 #include "sched.h"
 #include "pelt.h"
 
-#include <trace/events/sched.h>
-
 /*
  * Approximate:
  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
@@ -83,8 +81,6 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
        return c1 + c2 + c3;
 }
 
-#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
-
 /*
  * Accumulate the three separate parts of the sum; d1 the remainder
  * of the last (incomplete) period, d2 the span of full periods and d3
@@ -264,7 +260,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
 static __always_inline void
 ___update_load_avg(struct sched_avg *sa, unsigned long load)
 {
-       u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
+       u32 divider = get_pelt_divider(sa);
 
        /*
         * Step 2: update *_avg.
index eb034d9..795e43e 100644 (file)
@@ -37,6 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
 }
 #endif
 
+static inline u32 get_pelt_divider(struct sched_avg *avg)
+{
+       return LOAD_AVG_MAX - 1024 + avg->period_contrib;
+}
+
 /*
  * When a task is dequeued, its estimated utilization should not be update if
  * its util_avg has not been updated at least once.
index 8f45cdb..e53b711 100644 (file)
@@ -190,7 +190,6 @@ static void group_init(struct psi_group *group)
        INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
        mutex_init(&group->avgs_lock);
        /* Init trigger-related members */
-       atomic_set(&group->poll_scheduled, 0);
        mutex_init(&group->trigger_lock);
        INIT_LIST_HEAD(&group->triggers);
        memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
@@ -199,7 +198,7 @@ static void group_init(struct psi_group *group)
        memset(group->polling_total, 0, sizeof(group->polling_total));
        group->polling_next_update = ULLONG_MAX;
        group->polling_until = 0;
-       rcu_assign_pointer(group->poll_kworker, NULL);
+       rcu_assign_pointer(group->poll_task, NULL);
 }
 
 void __init psi_init(void)
@@ -547,47 +546,38 @@ static u64 update_triggers(struct psi_group *group, u64 now)
        return now + group->poll_min_period;
 }
 
-/*
- * Schedule polling if it's not already scheduled. It's safe to call even from
- * hotpath because even though kthread_queue_delayed_work takes worker->lock
- * spinlock that spinlock is never contended due to poll_scheduled atomic
- * preventing such competition.
- */
+/* Schedule polling if it's not already scheduled. */
 static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
 {
-       struct kthread_worker *kworker;
+       struct task_struct *task;
 
-       /* Do not reschedule if already scheduled */
-       if (atomic_cmpxchg(&group->poll_scheduled, 0, 1) != 0)
+       /*
+        * Do not reschedule if already scheduled.
+        * Possible race with a timer scheduled after this check but before
+        * mod_timer below can be tolerated because group->polling_next_update
+        * will keep updates on schedule.
+        */
+       if (timer_pending(&group->poll_timer))
                return;
 
        rcu_read_lock();
 
-       kworker = rcu_dereference(group->poll_kworker);
+       task = rcu_dereference(group->poll_task);
        /*
         * kworker might be NULL in case psi_trigger_destroy races with
         * psi_task_change (hotpath) which can't use locks
         */
-       if (likely(kworker))
-               kthread_queue_delayed_work(kworker, &group->poll_work, delay);
-       else
-               atomic_set(&group->poll_scheduled, 0);
+       if (likely(task))
+               mod_timer(&group->poll_timer, jiffies + delay);
 
        rcu_read_unlock();
 }
 
-static void psi_poll_work(struct kthread_work *work)
+static void psi_poll_work(struct psi_group *group)
 {
-       struct kthread_delayed_work *dwork;
-       struct psi_group *group;
        u32 changed_states;
        u64 now;
 
-       dwork = container_of(work, struct kthread_delayed_work, work);
-       group = container_of(dwork, struct psi_group, poll_work);
-
-       atomic_set(&group->poll_scheduled, 0);
-
        mutex_lock(&group->trigger_lock);
 
        now = sched_clock();
@@ -623,6 +613,35 @@ out:
        mutex_unlock(&group->trigger_lock);
 }
 
+static int psi_poll_worker(void *data)
+{
+       struct psi_group *group = (struct psi_group *)data;
+       struct sched_param param = {
+               .sched_priority = 1,
+       };
+
+       sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+
+       while (true) {
+               wait_event_interruptible(group->poll_wait,
+                               atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
+                               kthread_should_stop());
+               if (kthread_should_stop())
+                       break;
+
+               psi_poll_work(group);
+       }
+       return 0;
+}
+
+static void poll_timer_fn(struct timer_list *t)
+{
+       struct psi_group *group = from_timer(group, t, poll_timer);
+
+       atomic_set(&group->poll_wakeup, 1);
+       wake_up_interruptible(&group->poll_wait);
+}
+
 static void record_times(struct psi_group_cpu *groupc, int cpu,
                         bool memstall_tick)
 {
@@ -1099,22 +1118,20 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 
        mutex_lock(&group->trigger_lock);
 
-       if (!rcu_access_pointer(group->poll_kworker)) {
-               struct sched_param param = {
-                       .sched_priority = 1,
-               };
-               struct kthread_worker *kworker;
+       if (!rcu_access_pointer(group->poll_task)) {
+               struct task_struct *task;
 
-               kworker = kthread_create_worker(0, "psimon");
-               if (IS_ERR(kworker)) {
+               task = kthread_create(psi_poll_worker, group, "psimon");
+               if (IS_ERR(task)) {
                        kfree(t);
                        mutex_unlock(&group->trigger_lock);
-                       return ERR_CAST(kworker);
+                       return ERR_CAST(task);
                }
-               sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
-               kthread_init_delayed_work(&group->poll_work,
-                               psi_poll_work);
-               rcu_assign_pointer(group->poll_kworker, kworker);
+               atomic_set(&group->poll_wakeup, 0);
+               init_waitqueue_head(&group->poll_wait);
+               wake_up_process(task);
+               timer_setup(&group->poll_timer, poll_timer_fn, 0);
+               rcu_assign_pointer(group->poll_task, task);
        }
 
        list_add(&t->node, &group->triggers);
@@ -1132,7 +1149,7 @@ static void psi_trigger_destroy(struct kref *ref)
 {
        struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
        struct psi_group *group = t->group;
-       struct kthread_worker *kworker_to_destroy = NULL;
+       struct task_struct *task_to_destroy = NULL;
 
        if (static_branch_likely(&psi_disabled))
                return;
@@ -1158,13 +1175,13 @@ static void psi_trigger_destroy(struct kref *ref)
                        period = min(period, div_u64(tmp->win.size,
                                        UPDATES_PER_WINDOW));
                group->poll_min_period = period;
-               /* Destroy poll_kworker when the last trigger is destroyed */
+               /* Destroy poll_task when the last trigger is destroyed */
                if (group->poll_states == 0) {
                        group->polling_until = 0;
-                       kworker_to_destroy = rcu_dereference_protected(
-                                       group->poll_kworker,
+                       task_to_destroy = rcu_dereference_protected(
+                                       group->poll_task,
                                        lockdep_is_held(&group->trigger_lock));
-                       rcu_assign_pointer(group->poll_kworker, NULL);
+                       rcu_assign_pointer(group->poll_task, NULL);
                }
        }
 
@@ -1172,25 +1189,23 @@ static void psi_trigger_destroy(struct kref *ref)
 
        /*
         * Wait for both *trigger_ptr from psi_trigger_replace and
-        * poll_kworker RCUs to complete their read-side critical sections
-        * before destroying the trigger and optionally the poll_kworker
+        * poll_task RCUs to complete their read-side critical sections
+        * before destroying the trigger and optionally the poll_task
         */
        synchronize_rcu();
        /*
         * Destroy the kworker after releasing trigger_lock to prevent a
         * deadlock while waiting for psi_poll_work to acquire trigger_lock
         */
-       if (kworker_to_destroy) {
+       if (task_to_destroy) {
                /*
                 * After the RCU grace period has expired, the worker
-                * can no longer be found through group->poll_kworker.
+                * can no longer be found through group->poll_task.
                 * But it might have been already scheduled before
                 * that - deschedule it cleanly before destroying it.
                 */
-               kthread_cancel_delayed_work_sync(&group->poll_work);
-               atomic_set(&group->poll_scheduled, 0);
-
-               kthread_destroy_worker(kworker_to_destroy);
+               del_timer_sync(&group->poll_timer);
+               kthread_stop(task_to_destroy);
        }
        kfree(t);
 }
index f395ddb..f215eea 100644 (file)
@@ -2429,8 +2429,8 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
                return 0;
 }
 
-const struct sched_class rt_sched_class = {
-       .next                   = &fair_sched_class,
+const struct sched_class rt_sched_class
+       __attribute__((section("__rt_sched_class"))) = {
        .enqueue_task           = enqueue_task_rt,
        .dequeue_task           = dequeue_task_rt,
        .yield_task             = yield_task_rt,
index 877fb08..3fd2838 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/tsacct_kern.h>
 
 #include <asm/tlb.h>
+#include <asm-generic/vmlinux.lds.h>
 
 #ifdef CONFIG_PARAVIRT
 # include <asm/paravirt.h>
@@ -75,6 +76,8 @@
 #include "cpupri.h"
 #include "cpudeadline.h"
 
+#include <trace/events/sched.h>
+
 #ifdef CONFIG_SCHED_DEBUG
 # define SCHED_WARN_ON(x)      WARN_ONCE(x, #x)
 #else
@@ -96,6 +99,7 @@ extern atomic_long_t calc_load_tasks;
 extern void calc_global_load_tick(struct rq *this_rq);
 extern long calc_load_fold_active(struct rq *this_rq, long adjust);
 
+extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
 /*
  * Helpers for converting nanosecond timing to jiffy resolution
  */
@@ -310,11 +314,26 @@ void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
        __dl_update(dl_b, -((s32)tsk_bw / cpus));
 }
 
-static inline
-bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
+static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
+                                u64 old_bw, u64 new_bw)
 {
        return dl_b->bw != -1 &&
-              dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
+              cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
+}
+
+/*
+ * Verify the fitness of task @p to run on @cpu taking into account the
+ * CPU original capacity and the runtime/deadline ratio of the task.
+ *
+ * The function will return true if the CPU original capacity of the
+ * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
+ * task and false otherwise.
+ */
+static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
+{
+       unsigned long cap = arch_scale_cpu_capacity(cpu);
+
+       return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
 }
 
 extern void init_dl_bw(struct dl_bw *dl_b);
@@ -862,6 +881,8 @@ struct uclamp_rq {
        unsigned int value;
        struct uclamp_bucket bucket[UCLAMP_BUCKETS];
 };
+
+DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
 #endif /* CONFIG_UCLAMP_TASK */
 
 /*
@@ -1182,6 +1203,16 @@ struct rq_flags {
 #endif
 };
 
+/*
+ * Lockdep annotation that avoids accidental unlocks; it's like a
+ * sticky/continuous lockdep_assert_held().
+ *
+ * This avoids code that has access to 'struct rq *rq' (basically everything in
+ * the scheduler) from accidentally unlocking the rq if they do not also have a
+ * copy of the (on-stack) 'struct rq_flags rf'.
+ *
+ * Also see Documentation/locking/lockdep-design.rst.
+ */
 static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
 {
        rf->cookie = lockdep_pin_lock(&rq->lock);
@@ -1739,7 +1770,6 @@ extern const u32          sched_prio_to_wmult[40];
 #define RETRY_TASK             ((void *)-1UL)
 
 struct sched_class {
-       const struct sched_class *next;
 
 #ifdef CONFIG_UCLAMP_TASK
        int uclamp_enabled;
@@ -1748,7 +1778,7 @@ struct sched_class {
        void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
        void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
        void (*yield_task)   (struct rq *rq);
-       bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
+       bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
 
        void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
 
@@ -1796,7 +1826,7 @@ struct sched_class {
 #ifdef CONFIG_FAIR_GROUP_SCHED
        void (*task_change_group)(struct task_struct *p, int type);
 #endif
-};
+} __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */
 
 static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
@@ -1810,17 +1840,18 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
        next->sched_class->set_next_task(rq, next, false);
 }
 
-#ifdef CONFIG_SMP
-#define sched_class_highest (&stop_sched_class)
-#else
-#define sched_class_highest (&dl_sched_class)
-#endif
+/* Defined in include/asm-generic/vmlinux.lds.h */
+extern struct sched_class __begin_sched_classes[];
+extern struct sched_class __end_sched_classes[];
+
+#define sched_class_highest (__end_sched_classes - 1)
+#define sched_class_lowest  (__begin_sched_classes - 1)
 
 #define for_class_range(class, _from, _to) \
-       for (class = (_from); class != (_to); class = class->next)
+       for (class = (_from); class != (_to); class--)
 
 #define for_each_class(class) \
-       for_class_range(class, sched_class_highest, NULL)
+       for_class_range(class, sched_class_highest, sched_class_lowest)
 
 extern const struct sched_class stop_sched_class;
 extern const struct sched_class dl_sched_class;
@@ -1930,12 +1961,7 @@ extern int __init sched_tick_offload_init(void);
  */
 static inline void sched_update_tick_dependency(struct rq *rq)
 {
-       int cpu;
-
-       if (!tick_nohz_full_enabled())
-               return;
-
-       cpu = cpu_of(rq);
+       int cpu = cpu_of(rq);
 
        if (!tick_nohz_full_cpu(cpu))
                return;
@@ -1955,6 +1981,9 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
        unsigned prev_nr = rq->nr_running;
 
        rq->nr_running = prev_nr + count;
+       if (trace_sched_update_nr_running_tp_enabled()) {
+               call_trace_sched_update_nr_running(rq, count);
+       }
 
 #ifdef CONFIG_SMP
        if (prev_nr < 2 && rq->nr_running >= 2) {
@@ -1969,6 +1998,10 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
        rq->nr_running -= count;
+       if (trace_sched_update_nr_running_tp_enabled()) {
+               call_trace_sched_update_nr_running(rq, count);
+       }
+
        /* Check if we still need preemption */
        sched_update_tick_dependency(rq);
 }
@@ -2016,6 +2049,16 @@ void arch_scale_freq_tick(void)
 #endif
 
 #ifndef arch_scale_freq_capacity
+/**
+ * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
+ * @cpu: the CPU in question.
+ *
+ * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
+ *
+ *     f_curr
+ *     ------ * SCHED_CAPACITY_SCALE
+ *     f_max
+ */
 static __always_inline
 unsigned long arch_scale_freq_capacity(int cpu)
 {
@@ -2349,12 +2392,35 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 #ifdef CONFIG_UCLAMP_TASK
 unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
 
+/**
+ * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
+ * @rq:                The rq to clamp against. Must not be NULL.
+ * @util:      The util value to clamp.
+ * @p:         The task to clamp against. Can be NULL if you want to clamp
+ *             against @rq only.
+ *
+ * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
+ *
+ * If sched_uclamp_used static key is disabled, then just return the util
+ * without any clamping since uclamp aggregation at the rq level in the fast
+ * path is disabled, rendering this operation a NOP.
+ *
+ * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
+ * will return the correct effective uclamp value of the task even if the
+ * static key is disabled.
+ */
 static __always_inline
 unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
                                  struct task_struct *p)
 {
-       unsigned long min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
-       unsigned long max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
+       unsigned long min_util;
+       unsigned long max_util;
+
+       if (!static_branch_likely(&sched_uclamp_used))
+               return util;
+
+       min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
+       max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
 
        if (p) {
                min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
@@ -2371,6 +2437,19 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
 
        return clamp(util, min_util, max_util);
 }
+
+/*
+ * When uclamp is compiled in, the aggregation at rq level is 'turned off'
+ * by default in the fast path and only gets turned on once userspace performs
+ * an operation that requires it.
+ *
+ * Returns true if userspace opted-in to use uclamp and aggregation at rq level
+ * hence is active.
+ */
+static inline bool uclamp_is_used(void)
+{
+       return static_branch_likely(&sched_uclamp_used);
+}
 #else /* CONFIG_UCLAMP_TASK */
 static inline
 unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
@@ -2378,6 +2457,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
 {
        return util;
 }
+
+static inline bool uclamp_is_used(void)
+{
+       return false;
+}
 #endif /* CONFIG_UCLAMP_TASK */
 
 #ifdef arch_scale_freq_capacity
index 4c9e997..394bc81 100644 (file)
@@ -102,12 +102,6 @@ prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
        BUG(); /* how!?, what priority? */
 }
 
-static unsigned int
-get_rr_interval_stop(struct rq *rq, struct task_struct *task)
-{
-       return 0;
-}
-
 static void update_curr_stop(struct rq *rq)
 {
 }
@@ -115,8 +109,8 @@ static void update_curr_stop(struct rq *rq)
 /*
  * Simple, special scheduling class for the per-CPU stop tasks:
  */
-const struct sched_class stop_sched_class = {
-       .next                   = &dl_sched_class,
+const struct sched_class stop_sched_class
+       __attribute__((section("__stop_sched_class"))) = {
 
        .enqueue_task           = enqueue_task_stop,
        .dequeue_task           = dequeue_task_stop,
@@ -136,8 +130,6 @@ const struct sched_class stop_sched_class = {
 
        .task_tick              = task_tick_stop,
 
-       .get_rr_interval        = get_rr_interval_stop,
-
        .prio_changed           = prio_changed_stop,
        .switched_to            = switched_to_stop,
        .update_curr            = update_curr_stop,
index ba81187..9079d86 100644 (file)
@@ -1328,7 +1328,7 @@ sd_init(struct sched_domain_topology_level *tl,
                sd_flags = (*tl->sd_flags)();
        if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
                        "wrong sd_flags in topology description\n"))
-               sd_flags &= ~TOPOLOGY_SD_FLAGS;
+               sd_flags &= TOPOLOGY_SD_FLAGS;
 
        /* Apply detected topology flags */
        sd_flags |= dflags;
index ba059fb..01f5d30 100644 (file)
@@ -389,7 +389,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
        int ret = default_wake_function(wq_entry, mode, sync, key);
 
        if (ret)
-               list_del_init(&wq_entry->entry);
+               list_del_init_careful(&wq_entry->entry);
 
        return ret;
 }
index ee22ec7..6f16f7c 100644 (file)
@@ -719,7 +719,7 @@ static int dequeue_synchronous_signal(kernel_siginfo_t *info)
         * Return the first synchronous signal in the queue.
         */
        list_for_each_entry(q, &pending->list, list) {
-               /* Synchronous signals have a postive si_code */
+               /* Synchronous signals have a positive si_code */
                if ((q->info.si_code > SI_USER) &&
                    (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
                        sync = q;
index aa17eed..d0ae8eb 100644 (file)
@@ -634,8 +634,7 @@ static int __init nrcpus(char *str)
 {
        int nr_cpus;
 
-       get_option(&str, &nr_cpus);
-       if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
+       if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
                nr_cpu_ids = nr_cpus;
 
        return 0;
index c4201b7..5e9aaa6 100644 (file)
@@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned long pending)
  * where hardirqs are disabled legitimately:
  */
 #ifdef CONFIG_TRACE_IRQFLAGS
+
+DEFINE_PER_CPU(int, hardirqs_enabled);
+DEFINE_PER_CPU(int, hardirq_context);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
+
 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 {
        unsigned long flags;
@@ -224,7 +230,7 @@ static inline bool lockdep_softirq_start(void)
 {
        bool in_hardirq = false;
 
-       if (lockdep_hardirq_context(current)) {
+       if (lockdep_hardirq_context()) {
                in_hardirq = true;
                lockdep_hardirq_exit();
        }
index db1ce7a..1b4d2dc 100644 (file)
@@ -1779,6 +1779,20 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = sched_rt_handler,
        },
+       {
+               .procname       = "sched_deadline_period_max_us",
+               .data           = &sysctl_sched_dl_period_max,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sched_deadline_period_min_us",
+               .data           = &sysctl_sched_dl_period_min,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "sched_rr_timeslice_ms",
                .data           = &sysctl_sched_rr_timeslice,
@@ -1801,6 +1815,13 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = sysctl_sched_uclamp_handler,
        },
+       {
+               .procname       = "sched_util_clamp_min_rt_default",
+               .data           = &sysctl_sched_uclamp_util_min_rt_default,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = sysctl_sched_uclamp_handler,
+       },
 #endif
 #ifdef CONFIG_SCHED_AUTOGROUP
        {
index fa3f800..0deaf4b 100644 (file)
 
 #include "timekeeping.h"
 
-/**
- * struct clock_read_data - data required to read from sched_clock()
- *
- * @epoch_ns:          sched_clock() value at last update
- * @epoch_cyc:         Clock cycle value at last update.
- * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
- *                     clocks.
- * @read_sched_clock:  Current clock source (or dummy source when suspended).
- * @mult:              Multipler for scaled math conversion.
- * @shift:             Shift value for scaled math conversion.
- *
- * Care must be taken when updating this structure; it is read by
- * some very hot code paths. It occupies <=40 bytes and, when combined
- * with the seqcount used to synchronize access, comfortably fits into
- * a 64 byte cache line.
- */
-struct clock_read_data {
-       u64 epoch_ns;
-       u64 epoch_cyc;
-       u64 sched_clock_mask;
-       u64 (*read_sched_clock)(void);
-       u32 mult;
-       u32 shift;
-};
-
 /**
  * struct clock_data - all data needed for sched_clock() (including
  *                     registration of a new clock source)
@@ -93,6 +68,17 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
        return (cyc * mult) >> shift;
 }
 
+struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
+{
+       *seq = raw_read_seqcount_latch(&cd.seq);
+       return cd.read_data + (*seq & 1);
+}
+
+int sched_clock_read_retry(unsigned int seq)
+{
+       return read_seqcount_retry(&cd.seq, seq);
+}
+
 unsigned long long notrace sched_clock(void)
 {
        u64 cyc, res;
@@ -100,13 +86,12 @@ unsigned long long notrace sched_clock(void)
        struct clock_read_data *rd;
 
        do {
-               seq = raw_read_seqcount(&cd.seq);
-               rd = cd.read_data + (seq & 1);
+               rd = sched_clock_read_begin(&seq);
 
                cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
                      rd->sched_clock_mask;
                res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
-       } while (read_seqcount_retry(&cd.seq, seq));
+       } while (sched_clock_read_retry(seq));
 
        return res;
 }
index 3e2dc9b..f0199a4 100644 (file)
@@ -351,16 +351,24 @@ void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
 EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
 
 /*
- * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
- * per task timers.
+ * Set a per-task tick dependency. RCU need this. Also posix CPU timers
+ * in order to elapse per task timers.
  */
 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
 {
-       /*
-        * We could optimize this with just kicking the target running the task
-        * if that noise matters for nohz full users.
-        */
-       tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+       if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
+               if (tsk == current) {
+                       preempt_disable();
+                       tick_nohz_full_kick();
+                       preempt_enable();
+               } else {
+                       /*
+                        * Some future tick_nohz_full_kick_task()
+                        * should optimize this.
+                        */
+                       tick_nohz_full_kick_all();
+               }
+       }
 }
 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
 
index d20d489..63a632f 100644 (file)
@@ -2193,7 +2193,7 @@ EXPORT_SYMBOL(ktime_get_coarse_ts64);
 void do_timer(unsigned long ticks)
 {
        jiffies_64 += ticks;
-       calc_global_load(ticks);
+       calc_global_load();
 }
 
 /**
index df1ff80..026ac01 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/random.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -1742,6 +1743,13 @@ void update_process_times(int user_tick)
        scheduler_tick();
        if (IS_ENABLED(CONFIG_POSIX_TIMERS))
                run_posix_cpu_timers();
+
+       /* The current CPU might make use of net randoms without receiving IRQs
+        * to renew them often enough. Let's update the net_rand_state from a
+        * non-constant value that's not affine to the number of calls to make
+        * sure it's updated when there's some activity (we don't care in idle).
+        */
+       this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
 }
 
 /**
index a1a4148..1061492 100644 (file)
@@ -45,6 +45,9 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
 static bool disable_onoff_at_boot;
 module_param(disable_onoff_at_boot, bool, 0444);
 
+static bool ftrace_dump_at_shutdown;
+module_param(ftrace_dump_at_shutdown, bool, 0444);
+
 static char *torture_type;
 static int verbose;
 
@@ -527,7 +530,8 @@ static int torture_shutdown(void *arg)
                torture_shutdown_hook();
        else
                VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
-       rcu_ftrace_dump(DUMP_ALL);
+       if (ftrace_dump_at_shutdown)
+               rcu_ftrace_dump(DUMP_ALL);
        kernel_power_off();     /* Shut down the system. */
        return 0;
 }
index 5ef0484..7ba62d6 100644 (file)
@@ -348,7 +348,7 @@ static int __blk_trace_remove(struct request_queue *q)
        struct blk_trace *bt;
 
        bt = rcu_replace_pointer(q->blk_trace, NULL,
-                                lockdep_is_held(&q->blk_trace_mutex));
+                                lockdep_is_held(&q->debugfs_mutex));
        if (!bt)
                return -EINVAL;
 
@@ -362,9 +362,9 @@ int blk_trace_remove(struct request_queue *q)
 {
        int ret;
 
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
        ret = __blk_trace_remove(q);
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
 
        return ret;
 }
@@ -483,12 +483,11 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
        struct dentry *dir = NULL;
        int ret;
 
+       lockdep_assert_held(&q->debugfs_mutex);
+
        if (!buts->buf_size || !buts->buf_nr)
                return -EINVAL;
 
-       if (!blk_debugfs_root)
-               return -ENOENT;
-
        strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
        buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
 
@@ -503,7 +502,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
         * we can be.
         */
        if (rcu_dereference_protected(q->blk_trace,
-                                     lockdep_is_held(&q->blk_trace_mutex))) {
+                                     lockdep_is_held(&q->debugfs_mutex))) {
                pr_warn("Concurrent blktraces are not allowed on %s\n",
                        buts->name);
                return -EBUSY;
@@ -522,12 +521,29 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
        if (!bt->msg_data)
                goto err;
 
-       ret = -ENOENT;
-
-       dir = debugfs_lookup(buts->name, blk_debugfs_root);
-       if (!dir)
+       /*
+        * When tracing the whole disk reuse the existing debugfs directory
+        * created by the block layer on init. For partitions block devices,
+        * and scsi-generic block devices we create a temporary new debugfs
+        * directory that will be removed once the trace ends.
+        */
+       if (bdev && bdev == bdev->bd_contains)
+               dir = q->debugfs_dir;
+       else
                bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
 
+       /*
+        * As blktrace relies on debugfs for its interface the debugfs directory
+        * is required, contrary to the usual mantra of not checking for debugfs
+        * files or directories.
+        */
+       if (IS_ERR_OR_NULL(dir)) {
+               pr_warn("debugfs_dir not present for %s so skipping\n",
+                       buts->name);
+               ret = -ENOENT;
+               goto err;
+       }
+
        bt->dev = dev;
        atomic_set(&bt->dropped, 0);
        INIT_LIST_HEAD(&bt->running_list);
@@ -563,8 +579,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 
        ret = 0;
 err:
-       if (dir && !bt->dir)
-               dput(dir);
        if (ret)
                blk_trace_free(bt);
        return ret;
@@ -597,9 +611,9 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 {
        int ret;
 
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
        ret = __blk_trace_setup(q, name, dev, bdev, arg);
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
 
        return ret;
 }
@@ -645,7 +659,7 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
        struct blk_trace *bt;
 
        bt = rcu_dereference_protected(q->blk_trace,
-                                      lockdep_is_held(&q->blk_trace_mutex));
+                                      lockdep_is_held(&q->debugfs_mutex));
        if (bt == NULL)
                return -EINVAL;
 
@@ -685,9 +699,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
 {
        int ret;
 
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
        ret = __blk_trace_startstop(q, start);
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
 
        return ret;
 }
@@ -716,7 +730,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
        if (!q)
                return -ENXIO;
 
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
 
        switch (cmd) {
        case BLKTRACESETUP:
@@ -743,7 +757,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
                break;
        }
 
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
        return ret;
 }
 
@@ -754,14 +768,14 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
  **/
 void blk_trace_shutdown(struct request_queue *q)
 {
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
        if (rcu_dereference_protected(q->blk_trace,
-                                     lockdep_is_held(&q->blk_trace_mutex))) {
+                                     lockdep_is_held(&q->debugfs_mutex))) {
                __blk_trace_startstop(q, 0);
                __blk_trace_remove(q);
        }
 
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
 }
 
 #ifdef CONFIG_BLK_CGROUP
@@ -846,6 +860,13 @@ static void blk_add_trace_rq_issue(void *ignore,
                         blk_trace_request_get_cgid(q, rq));
 }
 
+static void blk_add_trace_rq_merge(void *ignore,
+                                  struct request_queue *q, struct request *rq)
+{
+       blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,
+                        blk_trace_request_get_cgid(q, rq));
+}
+
 static void blk_add_trace_rq_requeue(void *ignore,
                                     struct request_queue *q,
                                     struct request *rq)
@@ -1130,6 +1151,8 @@ static void blk_register_tracepoints(void)
        WARN_ON(ret);
        ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
        WARN_ON(ret);
+       ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
+       WARN_ON(ret);
        ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
        WARN_ON(ret);
        ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
@@ -1176,6 +1199,7 @@ static void blk_unregister_tracepoints(void)
        unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
        unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
        unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
+       unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
        unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
        unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
 
@@ -1642,7 +1666,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
        struct blk_trace *bt;
 
        bt = rcu_replace_pointer(q->blk_trace, NULL,
-                                lockdep_is_held(&q->blk_trace_mutex));
+                                lockdep_is_held(&q->debugfs_mutex));
        if (bt == NULL)
                return -EINVAL;
 
@@ -1817,10 +1841,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
        if (q == NULL)
                goto out_bdput;
 
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
 
        bt = rcu_dereference_protected(q->blk_trace,
-                                      lockdep_is_held(&q->blk_trace_mutex));
+                                      lockdep_is_held(&q->debugfs_mutex));
        if (attr == &dev_attr_enable) {
                ret = sprintf(buf, "%u\n", !!bt);
                goto out_unlock_bdev;
@@ -1838,7 +1862,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
                ret = sprintf(buf, "%llu\n", bt->end_lba);
 
 out_unlock_bdev:
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
 out_bdput:
        bdput(bdev);
 out:
@@ -1881,10 +1905,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
        if (q == NULL)
                goto out_bdput;
 
-       mutex_lock(&q->blk_trace_mutex);
+       mutex_lock(&q->debugfs_mutex);
 
        bt = rcu_dereference_protected(q->blk_trace,
-                                      lockdep_is_held(&q->blk_trace_mutex));
+                                      lockdep_is_held(&q->debugfs_mutex));
        if (attr == &dev_attr_enable) {
                if (!!value == !!bt) {
                        ret = 0;
@@ -1901,7 +1925,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
        if (bt == NULL) {
                ret = blk_trace_setup_queue(q, bdev);
                bt = rcu_dereference_protected(q->blk_trace,
-                               lockdep_is_held(&q->blk_trace_mutex));
+                               lockdep_is_held(&q->debugfs_mutex));
        }
 
        if (ret == 0) {
@@ -1916,7 +1940,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
        }
 
 out_unlock_bdev:
-       mutex_unlock(&q->blk_trace_mutex);
+       mutex_unlock(&q->debugfs_mutex);
 out_bdput:
        bdput(bdev);
 out:
index 1903b80..7206454 100644 (file)
@@ -2764,6 +2764,50 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 {
 }
 
+/* List of trace_ops that have allocated trampolines */
+static LIST_HEAD(ftrace_ops_trampoline_list);
+
+static void ftrace_add_trampoline_to_kallsyms(struct ftrace_ops *ops)
+{
+       lockdep_assert_held(&ftrace_lock);
+       list_add_rcu(&ops->list, &ftrace_ops_trampoline_list);
+}
+
+static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops)
+{
+       lockdep_assert_held(&ftrace_lock);
+       list_del_rcu(&ops->list);
+}
+
+/*
+ * "__builtin__ftrace" is used as a module name in /proc/kallsyms for symbols
+ * for pages allocated for ftrace purposes, even though "__builtin__ftrace" is
+ * not a module.
+ */
+#define FTRACE_TRAMPOLINE_MOD "__builtin__ftrace"
+#define FTRACE_TRAMPOLINE_SYM "ftrace_trampoline"
+
+static void ftrace_trampoline_free(struct ftrace_ops *ops)
+{
+       if (ops && (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP) &&
+           ops->trampoline) {
+               /*
+                * Record the text poke event before the ksymbol unregister
+                * event.
+                */
+               perf_event_text_poke((void *)ops->trampoline,
+                                    (void *)ops->trampoline,
+                                    ops->trampoline_size, NULL, 0);
+               perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
+                                  ops->trampoline, ops->trampoline_size,
+                                  true, FTRACE_TRAMPOLINE_SYM);
+               /* Remove from kallsyms after the perf events */
+               ftrace_remove_trampoline_from_kallsyms(ops);
+       }
+
+       arch_ftrace_trampoline_free(ops);
+}
+
 static void ftrace_startup_enable(int command)
 {
        if (saved_ftrace_func != ftrace_trace_function) {
@@ -2934,7 +2978,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
                        synchronize_rcu_tasks();
 
  free_ops:
-               arch_ftrace_trampoline_free(ops);
+               ftrace_trampoline_free(ops);
        }
 
        return 0;
@@ -6178,6 +6222,27 @@ struct ftrace_mod_map {
        unsigned int            num_funcs;
 };
 
+static int ftrace_get_trampoline_kallsym(unsigned int symnum,
+                                        unsigned long *value, char *type,
+                                        char *name, char *module_name,
+                                        int *exported)
+{
+       struct ftrace_ops *op;
+
+       list_for_each_entry_rcu(op, &ftrace_ops_trampoline_list, list) {
+               if (!op->trampoline || symnum--)
+                       continue;
+               *value = op->trampoline;
+               *type = 't';
+               strlcpy(name, FTRACE_TRAMPOLINE_SYM, KSYM_NAME_LEN);
+               strlcpy(module_name, FTRACE_TRAMPOLINE_MOD, MODULE_NAME_LEN);
+               *exported = 0;
+               return 0;
+       }
+
+       return -ERANGE;
+}
+
 #ifdef CONFIG_MODULES
 
 #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
@@ -6514,6 +6579,7 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
 {
        struct ftrace_mod_map *mod_map;
        struct ftrace_mod_func *mod_func;
+       int ret;
 
        preempt_disable();
        list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
@@ -6540,8 +6606,10 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
                WARN_ON(1);
                break;
        }
+       ret = ftrace_get_trampoline_kallsym(symnum, value, type, name,
+                                           module_name, exported);
        preempt_enable();
-       return -ERANGE;
+       return ret;
 }
 
 #else
@@ -6553,6 +6621,18 @@ allocate_ftrace_mod_map(struct module *mod,
 {
        return NULL;
 }
+int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
+                          char *type, char *name, char *module_name,
+                          int *exported)
+{
+       int ret;
+
+       preempt_disable();
+       ret = ftrace_get_trampoline_kallsym(symnum, value, type, name,
+                                           module_name, exported);
+       preempt_enable();
+       return ret;
+}
 #endif /* CONFIG_MODULES */
 
 struct ftrace_init_func {
@@ -6733,7 +6813,24 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 
 static void ftrace_update_trampoline(struct ftrace_ops *ops)
 {
+       unsigned long trampoline = ops->trampoline;
+
        arch_ftrace_update_trampoline(ops);
+       if (ops->trampoline && ops->trampoline != trampoline &&
+           (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) {
+               /* Add to kallsyms before the perf events */
+               ftrace_add_trampoline_to_kallsyms(ops);
+               perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
+                                  ops->trampoline, ops->trampoline_size, false,
+                                  FTRACE_TRAMPOLINE_SYM);
+               /*
+                * Record the perf text poke event after the ksymbol register
+                * event.
+                */
+               perf_event_text_poke((void *)ops->trampoline, NULL, 0,
+                                    (void *)ops->trampoline,
+                                    ops->trampoline_size);
+       }
 }
 
 void ftrace_init_trace_array(struct trace_array *tr)
index df3f3da..a5d6f23 100644 (file)
@@ -342,6 +342,10 @@ config DECOMPRESS_LZ4
        select LZ4_DECOMPRESS
        tristate
 
+config DECOMPRESS_ZSTD
+       select ZSTD_DECOMPRESS
+       tristate
+
 #
 # Generic allocator support is selected if needed
 #
index 9ad9210..1da566b 100644 (file)
@@ -1117,6 +1117,7 @@ config PROVE_LOCKING
        select DEBUG_RWSEMS
        select DEBUG_WW_MUTEX_SLOWPATH
        select DEBUG_LOCK_ALLOC
+       select PREEMPT_COUNT if !ARCH_NO_PREEMPT
        select TRACE_IRQFLAGS
        default n
        help
@@ -1325,11 +1326,17 @@ config WW_MUTEX_SELFTEST
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
+       depends on TRACE_IRQFLAGS_SUPPORT
        bool
        help
          Enables hooks to interrupt enabling and disabling for
          either tracing or lock debugging.
 
+config TRACE_IRQFLAGS_NMI
+       def_bool y
+       depends on TRACE_IRQFLAGS
+       depends on TRACE_IRQFLAGS_NMI_SUPPORT
+
 config STACKTRACE
        bool "Stack backtrace support"
        depends on STACKTRACE_SUPPORT
index 5ee88e5..3d282d5 100644 (file)
@@ -4,7 +4,8 @@ config HAVE_ARCH_KCSAN
        bool
 
 config HAVE_KCSAN_COMPILER
-       def_bool CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-distinguish-volatile=1)
+       def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-distinguish-volatile=1)) || \
+                (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-distinguish-volatile=1))
        help
          For the list of compilers that support KCSAN, please see
          <file:Documentation/dev-tools/kcsan.rst>.
@@ -59,7 +60,28 @@ config KCSAN_SELFTEST
        bool "Perform short selftests on boot"
        default y
        help
-         Run KCSAN selftests on boot. On test failure, causes the kernel to panic.
+         Run KCSAN selftests on boot. On test failure, causes the kernel to
+         panic. Recommended to be enabled, ensuring critical functionality
+         works as intended.
+
+config KCSAN_TEST
+       tristate "KCSAN test for integrated runtime behaviour"
+       depends on TRACEPOINTS && KUNIT
+       select TORTURE_TEST
+       help
+         KCSAN test focusing on behaviour of the integrated runtime. Tests
+         various race scenarios, and verifies the reports generated to
+         console. Makes use of KUnit for test organization, and the Torture
+         framework for test thread control.
+
+         Each test case may run at least up to KCSAN_REPORT_ONCE_IN_MS
+         milliseconds. Test run duration may be optimized by building the
+         kernel and KCSAN test with KCSAN_REPORT_ONCE_IN_MS set to a lower
+         than default value.
+
+         Say Y here if you want the test to be built into the kernel and run
+         during boot; say M if you want the test to build as a module; say N
+         if you are unsure.
 
 config KCSAN_EARLY_ENABLE
        bool "Early enable during boot"
index b1c42c1..2ba9642 100644 (file)
@@ -170,6 +170,7 @@ lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
 lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
 lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o
+lib-$(CONFIG_DECOMPRESS_ZSTD) += decompress_unzstd.o
 
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
index fb22fb2..85da6ab 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/export.h>
 #include <linux/memblock.h>
 #include <linux/numa.h>
+#include <linux/sched/isolation.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
@@ -205,22 +206,27 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
  */
 unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-       int cpu;
+       int cpu, hk_flags;
+       const struct cpumask *mask;
 
+       hk_flags = HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ;
+       mask = housekeeping_cpumask(hk_flags);
        /* Wrap: we always want a cpu. */
-       i %= num_online_cpus();
+       i %= cpumask_weight(mask);
 
        if (node == NUMA_NO_NODE) {
-               for_each_cpu(cpu, cpu_online_mask)
+               for_each_cpu(cpu, mask) {
                        if (i-- == 0)
                                return cpu;
+               }
        } else {
                /* NUMA first. */
-               for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
+               for_each_cpu_and(cpu, cpumask_of_node(node), mask) {
                        if (i-- == 0)
                                return cpu;
+               }
 
-               for_each_cpu(cpu, cpu_online_mask) {
+               for_each_cpu(cpu, mask) {
                        /* Skip NUMA nodes, done above. */
                        if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
                                continue;
index 8cc01a6..1ed2ed4 100644 (file)
 #include <linux/notifier.h>
 
 static struct crypto_shash __rcu *crct10dif_tfm;
-static struct static_key crct10dif_fallback __read_mostly;
+static DEFINE_STATIC_KEY_TRUE(crct10dif_fallback);
 static DEFINE_MUTEX(crc_t10dif_mutex);
+static struct work_struct crct10dif_rehash_work;
 
-static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data)
+static int crc_t10dif_notify(struct notifier_block *self, unsigned long val, void *data)
 {
        struct crypto_alg *alg = data;
-       struct crypto_shash *new, *old;
 
        if (val != CRYPTO_MSG_ALG_LOADED ||
-           static_key_false(&crct10dif_fallback) ||
-           strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING)))
-               return 0;
+           strcmp(alg->cra_name, CRC_T10DIF_STRING))
+               return NOTIFY_DONE;
+
+       schedule_work(&crct10dif_rehash_work);
+       return NOTIFY_OK;
+}
+
+static void crc_t10dif_rehash(struct work_struct *work)
+{
+       struct crypto_shash *new, *old;
 
        mutex_lock(&crc_t10dif_mutex);
        old = rcu_dereference_protected(crct10dif_tfm,
                                        lockdep_is_held(&crc_t10dif_mutex));
-       if (!old) {
-               mutex_unlock(&crc_t10dif_mutex);
-               return 0;
-       }
-       new = crypto_alloc_shash("crct10dif", 0, 0);
+       new = crypto_alloc_shash(CRC_T10DIF_STRING, 0, 0);
        if (IS_ERR(new)) {
                mutex_unlock(&crc_t10dif_mutex);
-               return 0;
+               return;
        }
        rcu_assign_pointer(crct10dif_tfm, new);
        mutex_unlock(&crc_t10dif_mutex);
 
-       synchronize_rcu();
-       crypto_free_shash(old);
-       return 0;
+       if (old) {
+               synchronize_rcu();
+               crypto_free_shash(old);
+       } else {
+               static_branch_disable(&crct10dif_fallback);
+       }
 }
 
 static struct notifier_block crc_t10dif_nb = {
-       .notifier_call = crc_t10dif_rehash,
+       .notifier_call = crc_t10dif_notify,
 };
 
 __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 {
        struct {
                struct shash_desc shash;
-               char ctx[2];
+               __u16 crc;
        } desc;
        int err;
 
-       if (static_key_false(&crct10dif_fallback))
+       if (static_branch_unlikely(&crct10dif_fallback))
                return crc_t10dif_generic(crc, buffer, len);
 
        rcu_read_lock();
        desc.shash.tfm = rcu_dereference(crct10dif_tfm);
-       *(__u16 *)desc.ctx = crc;
-
+       desc.crc = crc;
        err = crypto_shash_update(&desc.shash, buffer, len);
        rcu_read_unlock();
 
        BUG_ON(err);
 
-       return *(__u16 *)desc.ctx;
+       return desc.crc;
 }
 EXPORT_SYMBOL(crc_t10dif_update);
 
@@ -86,19 +91,17 @@ EXPORT_SYMBOL(crc_t10dif);
 
 static int __init crc_t10dif_mod_init(void)
 {
+       INIT_WORK(&crct10dif_rehash_work, crc_t10dif_rehash);
        crypto_register_notifier(&crc_t10dif_nb);
-       crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-       if (IS_ERR(crct10dif_tfm)) {
-               static_key_slow_inc(&crct10dif_fallback);
-               crct10dif_tfm = NULL;
-       }
+       crc_t10dif_rehash(&crct10dif_rehash_work);
        return 0;
 }
 
 static void __exit crc_t10dif_mod_fini(void)
 {
        crypto_unregister_notifier(&crc_t10dif_nb);
-       crypto_free_shash(crct10dif_tfm);
+       cancel_work_sync(&crct10dif_rehash_work);
+       crypto_free_shash(rcu_dereference_protected(crct10dif_tfm, 1));
 }
 
 module_init(crc_t10dif_mod_init);
@@ -106,15 +109,23 @@ module_exit(crc_t10dif_mod_fini);
 
 static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp)
 {
-       if (static_key_false(&crct10dif_fallback))
+       struct crypto_shash *tfm;
+       int len;
+
+       if (static_branch_unlikely(&crct10dif_fallback))
                return sprintf(buffer, "fallback\n");
 
-       return sprintf(buffer, "%s\n",
-               crypto_tfm_alg_driver_name(crypto_shash_tfm(crct10dif_tfm)));
+       rcu_read_lock();
+       tfm = rcu_dereference(crct10dif_tfm);
+       len = snprintf(buffer, PAGE_SIZE, "%s\n",
+                      crypto_shash_driver_name(tfm));
+       rcu_read_unlock();
+
+       return len;
 }
 
-module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0644);
+module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0444);
 
-MODULE_DESCRIPTION("T10 DIF CRC calculation");
+MODULE_DESCRIPTION("T10 DIF CRC calculation (library API)");
 MODULE_LICENSE("GPL");
 MODULE_SOFTDEP("pre: crct10dif");
index ad0699c..431e042 100644 (file)
@@ -21,8 +21,6 @@
 
 #define CHACHA_KEY_WORDS       (CHACHA_KEY_SIZE / sizeof(u32))
 
-bool __init chacha20poly1305_selftest(void);
-
 static void chacha_load_key(u32 *k, const u8 *in)
 {
        k[0] = get_unaligned_le32(in);
index 2e62169..2321f6c 100644 (file)
@@ -280,4 +280,14 @@ void sha224_final(struct sha256_state *sctx, u8 *out)
 }
 EXPORT_SYMBOL(sha224_final);
 
+void sha256(const u8 *data, unsigned int len, u8 *out)
+{
+       struct sha256_state sctx;
+
+       sha256_init(&sctx);
+       sha256_update(&sctx, data, len);
+       sha256_final(&sctx, out);
+}
+EXPORT_SYMBOL(sha256);
+
 MODULE_LICENSE("GPL");
index 48054db..fe45579 100644 (file)
@@ -1022,18 +1022,7 @@ static int debug_stats_show(struct seq_file *m, void *v)
        seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
        return 0;
 }
-
-static int debug_stats_open(struct inode *inode, struct file *filp)
-{
-       return single_open(filp, debug_stats_show, NULL);
-}
-
-static const struct file_operations debug_stats_fops = {
-       .open           = debug_stats_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(debug_stats);
 
 static int __init debug_objects_init_debugfs(void)
 {
index 857ab1a..ab3fc90 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/decompress/inflate.h>
 #include <linux/decompress/unlzo.h>
 #include <linux/decompress/unlz4.h>
+#include <linux/decompress/unzstd.h>
 
 #include <linux/types.h>
 #include <linux/string.h>
@@ -37,6 +38,9 @@
 #ifndef CONFIG_DECOMPRESS_LZ4
 # define unlz4 NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_ZSTD
+# define unzstd NULL
+#endif
 
 struct compress_format {
        unsigned char magic[2];
@@ -52,6 +56,7 @@ static const struct compress_format compressed_formats[] __initconst = {
        { {0xfd, 0x37}, "xz", unxz },
        { {0x89, 0x4c}, "lzo", unlzo },
        { {0x02, 0x21}, "lz4", unlz4 },
+       { {0x28, 0xb5}, "zstd", unzstd },
        { {0, 0}, NULL, NULL }
 };
 
diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
new file mode 100644 (file)
index 0000000..0ad2c15
--- /dev/null
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Important notes about in-place decompression
+ *
+ * At least on x86, the kernel is decompressed in place: the compressed data
+ * is placed to the end of the output buffer, and the decompressor overwrites
+ * most of the compressed data. There must be enough safety margin to
+ * guarantee that the write position is always behind the read position.
+ *
+ * The safety margin for ZSTD with a 128 KB block size is calculated below.
+ * Note that the margin with ZSTD is bigger than with GZIP or XZ!
+ *
+ * The worst case for in-place decompression is that the beginning of
+ * the file is compressed extremely well, and the rest of the file is
+ * uncompressible. Thus, we must look for worst-case expansion when the
+ * compressor is encoding uncompressible data.
+ *
+ * The structure of the .zst file in case of a compresed kernel is as follows.
+ * Maximum sizes (as bytes) of the fields are in parenthesis.
+ *
+ *    Frame Header: (18)
+ *    Blocks: (N)
+ *    Checksum: (4)
+ *
+ * The frame header and checksum overhead is at most 22 bytes.
+ *
+ * ZSTD stores the data in blocks. Each block has a header whose size is
+ * a 3 bytes. After the block header, there is up to 128 KB of payload.
+ * The maximum uncompressed size of the payload is 128 KB. The minimum
+ * uncompressed size of the payload is never less than the payload size
+ * (excluding the block header).
+ *
+ * The assumption, that the uncompressed size of the payload is never
+ * smaller than the payload itself, is valid only when talking about
+ * the payload as a whole. It is possible that the payload has parts where
+ * the decompressor consumes more input than it produces output. Calculating
+ * the worst case for this would be tricky. Instead of trying to do that,
+ * let's simply make sure that the decompressor never overwrites any bytes
+ * of the payload which it is currently reading.
+ *
+ * Now we have enough information to calculate the safety margin. We need
+ *   - 22 bytes for the .zst file format headers;
+ *   - 3 bytes per every 128 KiB of uncompressed size (one block header per
+ *     block); and
+ *   - 128 KiB (biggest possible zstd block size) to make sure that the
+ *     decompressor never overwrites anything from the block it is currently
+ *     reading.
+ *
+ * We get the following formula:
+ *
+ *    safety_margin = 22 + uncompressed_size * 3 / 131072 + 131072
+ *                 <= 22 + (uncompressed_size >> 15) + 131072
+ */
+
+/*
+ * Preboot environments #include "path/to/decompress_unzstd.c".
+ * All of the source files we depend on must be #included.
+ * zstd's only source dependeny is xxhash, which has no source
+ * dependencies.
+ *
+ * When UNZSTD_PREBOOT is defined we declare __decompress(), which is
+ * used for kernel decompression, instead of unzstd().
+ *
+ * Define __DISABLE_EXPORTS in preboot environments to prevent symbols
+ * from xxhash and zstd from being exported by the EXPORT_SYMBOL macro.
+ */
+#ifdef STATIC
+# define UNZSTD_PREBOOT
+# include "xxhash.c"
+# include "zstd/entropy_common.c"
+# include "zstd/fse_decompress.c"
+# include "zstd/huf_decompress.c"
+# include "zstd/zstd_common.c"
+# include "zstd/decompress.c"
+#endif
+
+#include <linux/decompress/mm.h>
+#include <linux/kernel.h>
+#include <linux/zstd.h>
+
+/* 128MB is the maximum window size supported by zstd. */
+#define ZSTD_WINDOWSIZE_MAX    (1 << ZSTD_WINDOWLOG_MAX)
+/*
+ * Size of the input and output buffers in multi-call mode.
+ * Pick a larger size because it isn't used during kernel decompression,
+ * since that is single pass, and we have to allocate a large buffer for
+ * zstd's window anyway. The larger size speeds up initramfs decompression.
+ */
+#define ZSTD_IOBUF_SIZE                (1 << 17)
+
+static int INIT handle_zstd_error(size_t ret, void (*error)(char *x))
+{
+       const int err = ZSTD_getErrorCode(ret);
+
+       if (!ZSTD_isError(ret))
+               return 0;
+
+       switch (err) {
+       case ZSTD_error_memory_allocation:
+               error("ZSTD decompressor ran out of memory");
+               break;
+       case ZSTD_error_prefix_unknown:
+               error("Input is not in the ZSTD format (wrong magic bytes)");
+               break;
+       case ZSTD_error_dstSize_tooSmall:
+       case ZSTD_error_corruption_detected:
+       case ZSTD_error_checksum_wrong:
+               error("ZSTD-compressed data is corrupt");
+               break;
+       default:
+               error("ZSTD-compressed data is probably corrupt");
+               break;
+       }
+       return -1;
+}
+
+/*
+ * Handle the case where we have the entire input and output in one segment.
+ * We can allocate less memory (no circular buffer for the sliding window),
+ * and avoid some memcpy() calls.
+ */
+static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
+                                 long out_len, long *in_pos,
+                                 void (*error)(char *x))
+{
+       const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+       void *wksp = large_malloc(wksp_size);
+       ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size);
+       int err;
+       size_t ret;
+
+       if (dctx == NULL) {
+               error("Out of memory while allocating ZSTD_DCtx");
+               err = -1;
+               goto out;
+       }
+       /*
+        * Find out how large the frame actually is, there may be junk at
+        * the end of the frame that ZSTD_decompressDCtx() can't handle.
+        */
+       ret = ZSTD_findFrameCompressedSize(in_buf, in_len);
+       err = handle_zstd_error(ret, error);
+       if (err)
+               goto out;
+       in_len = (long)ret;
+
+       ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len);
+       err = handle_zstd_error(ret, error);
+       if (err)
+               goto out;
+
+       if (in_pos != NULL)
+               *in_pos = in_len;
+
+       err = 0;
+out:
+       if (wksp != NULL)
+               large_free(wksp);
+       return err;
+}
+
+static int INIT __unzstd(unsigned char *in_buf, long in_len,
+                        long (*fill)(void*, unsigned long),
+                        long (*flush)(void*, unsigned long),
+                        unsigned char *out_buf, long out_len,
+                        long *in_pos,
+                        void (*error)(char *x))
+{
+       ZSTD_inBuffer in;
+       ZSTD_outBuffer out;
+       ZSTD_frameParams params;
+       void *in_allocated = NULL;
+       void *out_allocated = NULL;
+       void *wksp = NULL;
+       size_t wksp_size;
+       ZSTD_DStream *dstream;
+       int err;
+       size_t ret;
+
+       if (out_len == 0)
+               out_len = LONG_MAX; /* no limit */
+
+       if (fill == NULL && flush == NULL)
+               /*
+                * We can decompress faster and with less memory when we have a
+                * single chunk.
+                */
+               return decompress_single(in_buf, in_len, out_buf, out_len,
+                                        in_pos, error);
+
+       /*
+        * If in_buf is not provided, we must be using fill(), so allocate
+        * a large enough buffer. If it is provided, it must be at least
+        * ZSTD_IOBUF_SIZE large.
+        */
+       if (in_buf == NULL) {
+               in_allocated = large_malloc(ZSTD_IOBUF_SIZE);
+               if (in_allocated == NULL) {
+                       error("Out of memory while allocating input buffer");
+                       err = -1;
+                       goto out;
+               }
+               in_buf = in_allocated;
+               in_len = 0;
+       }
+       /* Read the first chunk, since we need to decode the frame header. */
+       if (fill != NULL)
+               in_len = fill(in_buf, ZSTD_IOBUF_SIZE);
+       if (in_len < 0) {
+               error("ZSTD-compressed data is truncated");
+               err = -1;
+               goto out;
+       }
+       /* Set the first non-empty input buffer. */
+       in.src = in_buf;
+       in.pos = 0;
+       in.size = in_len;
+       /* Allocate the output buffer if we are using flush(). */
+       if (flush != NULL) {
+               out_allocated = large_malloc(ZSTD_IOBUF_SIZE);
+               if (out_allocated == NULL) {
+                       error("Out of memory while allocating output buffer");
+                       err = -1;
+                       goto out;
+               }
+               out_buf = out_allocated;
+               out_len = ZSTD_IOBUF_SIZE;
+       }
+       /* Set the output buffer. */
+       out.dst = out_buf;
+       out.pos = 0;
+       out.size = out_len;
+
+       /*
+        * We need to know the window size to allocate the ZSTD_DStream.
+        * Since we are streaming, we need to allocate a buffer for the sliding
+        * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX
+        * (8 MB), so it is important to use the actual value so as not to
+        * waste memory when it is smaller.
+        */
+       ret = ZSTD_getFrameParams(&params, in.src, in.size);
+       err = handle_zstd_error(ret, error);
+       if (err)
+               goto out;
+       if (ret != 0) {
+               error("ZSTD-compressed data has an incomplete frame header");
+               err = -1;
+               goto out;
+       }
+       if (params.windowSize > ZSTD_WINDOWSIZE_MAX) {
+               error("ZSTD-compressed data has too large a window size");
+               err = -1;
+               goto out;
+       }
+
+       /*
+        * Allocate the ZSTD_DStream now that we know how much memory is
+        * required.
+        */
+       wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize);
+       wksp = large_malloc(wksp_size);
+       dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size);
+       if (dstream == NULL) {
+               error("Out of memory while allocating ZSTD_DStream");
+               err = -1;
+               goto out;
+       }
+
+       /*
+        * Decompression loop:
+        * Read more data if necessary (error if no more data can be read).
+        * Call the decompression function, which returns 0 when finished.
+        * Flush any data produced if using flush().
+        */
+       if (in_pos != NULL)
+               *in_pos = 0;
+       do {
+               /*
+                * If we need to reload data, either we have fill() and can
+                * try to get more data, or we don't and the input is truncated.
+                */
+               if (in.pos == in.size) {
+                       if (in_pos != NULL)
+                               *in_pos += in.pos;
+                       in_len = fill ? fill(in_buf, ZSTD_IOBUF_SIZE) : -1;
+                       if (in_len < 0) {
+                               error("ZSTD-compressed data is truncated");
+                               err = -1;
+                               goto out;
+                       }
+                       in.pos = 0;
+                       in.size = in_len;
+               }
+               /* Returns zero when the frame is complete. */
+               ret = ZSTD_decompressStream(dstream, &out, &in);
+               err = handle_zstd_error(ret, error);
+               if (err)
+                       goto out;
+               /* Flush all of the data produced if using flush(). */
+               if (flush != NULL && out.pos > 0) {
+                       if (out.pos != flush(out.dst, out.pos)) {
+                               error("Failed to flush()");
+                               err = -1;
+                               goto out;
+                       }
+                       out.pos = 0;
+               }
+       } while (ret != 0);
+
+       if (in_pos != NULL)
+               *in_pos += in.pos;
+
+       err = 0;
+out:
+       if (in_allocated != NULL)
+               large_free(in_allocated);
+       if (out_allocated != NULL)
+               large_free(out_allocated);
+       if (wksp != NULL)
+               large_free(wksp);
+       return err;
+}
+
+#ifndef UNZSTD_PREBOOT
+STATIC int INIT unzstd(unsigned char *buf, long len,
+                      long (*fill)(void*, unsigned long),
+                      long (*flush)(void*, unsigned long),
+                      unsigned char *out_buf,
+                      long *pos,
+                      void (*error)(char *x))
+{
+       return __unzstd(buf, len, fill, flush, out_buf, 0, pos, error);
+}
+#else
+STATIC int INIT __decompress(unsigned char *buf, long len,
+                            long (*fill)(void*, unsigned long),
+                            long (*flush)(void*, unsigned long),
+                            unsigned char *out_buf, long out_len,
+                            long *pos,
+                            void (*error)(char *x))
+{
+       return __unzstd(buf, len, fill, flush, out_buf, out_len, pos, error);
+}
+#endif
index 368ca7f..3952a07 100644 (file)
@@ -190,3 +190,44 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
        return __iter_div_u64_rem(dividend, divisor, remainder);
 }
 EXPORT_SYMBOL(iter_div_u64_rem);
+
+#ifndef mul_u64_u64_div_u64
+u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+{
+       u64 res = 0, div, rem;
+       int shift;
+
+       /* can a * b overflow ? */
+       if (ilog2(a) + ilog2(b) > 62) {
+               /*
+                * (b * a) / c is equal to
+                *
+                *      (b / c) * a +
+                *      (b % c) * a / c
+                *
+                * if nothing overflows. Can the 1st multiplication
+                * overflow? Yes, but we do not care: this can only
+                * happen if the end result can't fit in u64 anyway.
+                *
+                * So the code below does
+                *
+                *      res = (b / c) * a;
+                *      b = b % c;
+                */
+               div = div64_u64_rem(b, c, &rem);
+               res = div * a;
+               b = rem;
+
+               shift = ilog2(a) + ilog2(b) - 62;
+               if (shift > 0) {
+                       /* drop precision */
+                       b >>= shift;
+                       c >>= shift;
+                       if (!c)
+                               return res;
+               }
+       }
+
+       return res + div64_u64(a * b, c);
+}
+#endif
index d5874a7..43b8fce 100644 (file)
@@ -16,6 +16,7 @@ mpi-y = \
        mpicoder.o                      \
        mpi-bit.o                       \
        mpi-cmp.o                       \
+       mpi-sub-ui.o                    \
        mpih-cmp.o                      \
        mpih-div.o                      \
        mpih-mul.o                      \
diff --git a/lib/mpi/mpi-sub-ui.c b/lib/mpi/mpi-sub-ui.c
new file mode 100644 (file)
index 0000000..b41b082
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* mpi-sub-ui.c - Subtract an unsigned integer from an MPI.
+ *
+ * Copyright 1991, 1993, 1994, 1996, 1999-2002, 2004, 2012, 2013, 2015
+ * Free Software Foundation, Inc.
+ *
+ * This file was based on the GNU MP Library source file:
+ * https://gmplib.org/repo/gmp-6.2/file/510b83519d1c/mpz/aors_ui.h
+ *
+ * The GNU MP Library is free software; you can redistribute it and/or modify
+ * it under the terms of either:
+ *
+ *   * the GNU Lesser General Public License as published by the Free
+ *     Software Foundation; either version 3 of the License, or (at your
+ *     option) any later version.
+ *
+ * or
+ *
+ *   * the GNU General Public License as published by the Free Software
+ *     Foundation; either version 2 of the License, or (at your option) any
+ *     later version.
+ *
+ * or both in parallel, as here.
+ *
+ * The GNU MP Library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received copies of the GNU General Public License and the
+ * GNU Lesser General Public License along with the GNU MP Library.  If not,
+ * see https://www.gnu.org/licenses/.
+ */
+
+#include "mpi-internal.h"
+
+int mpi_sub_ui(MPI w, MPI u, unsigned long vval)
+{
+       if (u->nlimbs == 0) {
+               if (mpi_resize(w, 1) < 0)
+                       return -ENOMEM;
+               w->d[0] = vval;
+               w->nlimbs = (vval != 0);
+               w->sign = (vval != 0);
+               return 0;
+       }
+
+       /* If not space for W (and possible carry), increase space. */
+       if (mpi_resize(w, u->nlimbs + 1))
+               return -ENOMEM;
+
+       if (u->sign) {
+               mpi_limb_t cy;
+
+               cy = mpihelp_add_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval);
+               w->d[u->nlimbs] = cy;
+               w->nlimbs = u->nlimbs + cy;
+               w->sign = 1;
+       } else {
+               /* The signs are different.  Need exact comparison to determine
+                * which operand to subtract from which.
+                */
+               if (u->nlimbs == 1 && u->d[0] < vval) {
+                       w->d[0] = vval - u->d[0];
+                       w->nlimbs = 1;
+                       w->sign = 1;
+               } else {
+                       mpihelp_sub_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval);
+                       /* Size can decrease with at most one limb. */
+                       w->nlimbs = (u->nlimbs - (w->d[u->nlimbs - 1] == 0));
+                       w->sign = 0;
+               }
+       }
+
+       mpi_normalize(w);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_sub_ui);
index 763b920..3d749ab 100644 (file)
@@ -48,7 +48,7 @@ static inline void prandom_state_selftest(void)
 }
 #endif
 
-static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
+DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
 /**
  *     prandom_u32_state - seeded pseudo-random number generator.
index 9f6890a..c949c1e 100644 (file)
@@ -31,7 +31,7 @@
 
 union nested_table {
        union nested_table __rcu *table;
-       struct rhash_lock_head *bucket;
+       struct rhash_lock_head __rcu *bucket;
 };
 
 static u32 head_hashfn(struct rhashtable *ht,
@@ -222,7 +222,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 }
 
 static int rhashtable_rehash_one(struct rhashtable *ht,
-                                struct rhash_lock_head **bkt,
+                                struct rhash_lock_head __rcu **bkt,
                                 unsigned int old_hash)
 {
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
@@ -275,7 +275,7 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
                                    unsigned int old_hash)
 {
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-       struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash);
+       struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
        int err;
 
        if (!bkt)
@@ -485,7 +485,7 @@ fail:
 }
 
 static void *rhashtable_lookup_one(struct rhashtable *ht,
-                                  struct rhash_lock_head **bkt,
+                                  struct rhash_lock_head __rcu **bkt,
                                   struct bucket_table *tbl, unsigned int hash,
                                   const void *key, struct rhash_head *obj)
 {
@@ -535,12 +535,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
        return ERR_PTR(-ENOENT);
 }
 
-static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
-                                                 struct rhash_lock_head **bkt,
-                                                 struct bucket_table *tbl,
-                                                 unsigned int hash,
-                                                 struct rhash_head *obj,
-                                                 void *data)
+static struct bucket_table *rhashtable_insert_one(
+       struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
+       struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
+       void *data)
 {
        struct bucket_table *new_tbl;
        struct rhash_head *head;
@@ -591,7 +589,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 {
        struct bucket_table *new_tbl;
        struct bucket_table *tbl;
-       struct rhash_lock_head **bkt;
+       struct rhash_lock_head __rcu **bkt;
        unsigned int hash;
        void *data;
 
@@ -1173,8 +1171,8 @@ void rhashtable_destroy(struct rhashtable *ht)
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
 
-struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
-                                            unsigned int hash)
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+       const struct bucket_table *tbl, unsigned int hash)
 {
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
        unsigned int index = hash & ((1 << tbl->nest) - 1);
@@ -1202,10 +1200,10 @@ struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
 }
 EXPORT_SYMBOL_GPL(__rht_bucket_nested);
 
-struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
-                                          unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested(
+       const struct bucket_table *tbl, unsigned int hash)
 {
-       static struct rhash_lock_head *rhnull;
+       static struct rhash_lock_head __rcu *rhnull;
 
        if (!rhnull)
                INIT_RHT_NULLS_HEAD(rhnull);
@@ -1213,9 +1211,8 @@ struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
 }
 EXPORT_SYMBOL_GPL(rht_bucket_nested);
 
-struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
-                                                 struct bucket_table *tbl,
-                                                 unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+       struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
 {
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
        unsigned int index = hash & ((1 << tbl->nest) - 1);
index af88d13..267aa77 100644 (file)
@@ -292,8 +292,11 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
 
        for (i = 0; i < sb->map_nr; i++) {
                unsigned long word = READ_ONCE(sb->map[i].word);
+               unsigned long cleared = READ_ONCE(sb->map[i].cleared);
                unsigned int word_bits = READ_ONCE(sb->map[i].depth);
 
+               word &= ~cleared;
+
                while (word_bits > 0) {
                        unsigned int bits = min(8 - byte_bits, word_bits);
 
index ddc9685..5cf2fe9 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/delay.h>
 #include <linux/rwsem.h>
 #include <linux/mm.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
 
 #define __param(type, name, init, msg)         \
        static type name = init;                                \
@@ -35,14 +37,18 @@ __param(int, test_loop_count, 1000000,
 
 __param(int, run_test_mask, INT_MAX,
        "Set tests specified in the mask.\n\n"
-               "\t\tid: 1,   name: fix_size_alloc_test\n"
-               "\t\tid: 2,   name: full_fit_alloc_test\n"
-               "\t\tid: 4,   name: long_busy_list_alloc_test\n"
-               "\t\tid: 8,   name: random_size_alloc_test\n"
-               "\t\tid: 16,  name: fix_align_alloc_test\n"
-               "\t\tid: 32,  name: random_size_align_alloc_test\n"
-               "\t\tid: 64,  name: align_shift_alloc_test\n"
-               "\t\tid: 128, name: pcpu_alloc_test\n"
+               "\t\tid: 1,    name: fix_size_alloc_test\n"
+               "\t\tid: 2,    name: full_fit_alloc_test\n"
+               "\t\tid: 4,    name: long_busy_list_alloc_test\n"
+               "\t\tid: 8,    name: random_size_alloc_test\n"
+               "\t\tid: 16,   name: fix_align_alloc_test\n"
+               "\t\tid: 32,   name: random_size_align_alloc_test\n"
+               "\t\tid: 64,   name: align_shift_alloc_test\n"
+               "\t\tid: 128,  name: pcpu_alloc_test\n"
+               "\t\tid: 256,  name: kvfree_rcu_1_arg_vmalloc_test\n"
+               "\t\tid: 512,  name: kvfree_rcu_2_arg_vmalloc_test\n"
+               "\t\tid: 1024, name: kvfree_rcu_1_arg_slab_test\n"
+               "\t\tid: 2048, name: kvfree_rcu_2_arg_slab_test\n"
                /* Add a new test case description here. */
 );
 
@@ -316,6 +322,83 @@ pcpu_alloc_test(void)
        return rv;
 }
 
+struct test_kvfree_rcu {
+       struct rcu_head rcu;
+       unsigned char array[20];
+};
+
+static int
+kvfree_rcu_1_arg_vmalloc_test(void)
+{
+       struct test_kvfree_rcu *p;
+       int i;
+
+       for (i = 0; i < test_loop_count; i++) {
+               p = vmalloc(1 * PAGE_SIZE);
+               if (!p)
+                       return -1;
+
+               p->array[0] = 'a';
+               kvfree_rcu(p);
+       }
+
+       return 0;
+}
+
+static int
+kvfree_rcu_2_arg_vmalloc_test(void)
+{
+       struct test_kvfree_rcu *p;
+       int i;
+
+       for (i = 0; i < test_loop_count; i++) {
+               p = vmalloc(1 * PAGE_SIZE);
+               if (!p)
+                       return -1;
+
+               p->array[0] = 'a';
+               kvfree_rcu(p, rcu);
+       }
+
+       return 0;
+}
+
+static int
+kvfree_rcu_1_arg_slab_test(void)
+{
+       struct test_kvfree_rcu *p;
+       int i;
+
+       for (i = 0; i < test_loop_count; i++) {
+               p = kmalloc(sizeof(*p), GFP_KERNEL);
+               if (!p)
+                       return -1;
+
+               p->array[0] = 'a';
+               kvfree_rcu(p);
+       }
+
+       return 0;
+}
+
+static int
+kvfree_rcu_2_arg_slab_test(void)
+{
+       struct test_kvfree_rcu *p;
+       int i;
+
+       for (i = 0; i < test_loop_count; i++) {
+               p = kmalloc(sizeof(*p), GFP_KERNEL);
+               if (!p)
+                       return -1;
+
+               p->array[0] = 'a';
+               kvfree_rcu(p, rcu);
+       }
+
+       return 0;
+}
+
 struct test_case_desc {
        const char *test_name;
        int (*test_func)(void);
@@ -330,6 +413,10 @@ static struct test_case_desc test_case_array[] = {
        { "random_size_align_alloc_test", random_size_align_alloc_test },
        { "align_shift_alloc_test", align_shift_alloc_test },
        { "pcpu_alloc_test", pcpu_alloc_test },
+       { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
+       { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
+       { "kvfree_rcu_1_arg_slab_test", kvfree_rcu_1_arg_slab_test },
+       { "kvfree_rcu_2_arg_slab_test", kvfree_rcu_2_arg_slab_test },
        /* Add a new test case here. */
 };
 
index a84300e..0b35353 100644 (file)
@@ -47,6 +47,7 @@
 ****************************************************************/
 #include "bitstream.h"
 #include "fse.h"
+#include "zstd_internal.h"
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/string.h> /* memcpy, memset */
                enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
        } /* use only *after* variable declarations */
 
-/* check and forward error code */
-#define CHECK_F(f)                  \
-       {                           \
-               size_t const e = f; \
-               if (FSE_isError(e)) \
-                       return e;   \
-       }
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
index 1a79fab..dac7533 100644 (file)
@@ -127,7 +127,14 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 *********************************************/
 ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
-       memcpy(dst, src, 8);
+       /*
+        * zstd relies heavily on gcc being able to analyze and inline this
+        * memcpy() call, since it is called in a tight loop. Preboot mode
+        * is compiled in freestanding mode, which stops gcc from analyzing
+        * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a
+        * regular memcpy().
+        */
+       __builtin_memcpy(dst, src, 8);
 }
 /*! ZSTD_wildcopy() :
 *   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
@@ -137,13 +144,16 @@ ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
        const BYTE* ip = (const BYTE*)src;
        BYTE* op = (BYTE*)dst;
        BYTE* const oend = op + length;
-       /* Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
+#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200
+       /*
+        * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
         * Avoid the bad case where the loop only runs once by handling the
         * special case separately. This doesn't trigger the bug because it
         * doesn't involve pointer/integer overflow.
         */
        if (length <= 8)
                return ZSTD_copy8(dst, src);
+#endif
        do {
                ZSTD_copy8(op, ip);
                op += 8;
index d382272..8e8b006 100644 (file)
@@ -281,7 +281,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
 #define INIT_BW                (100 << (20 - PAGE_SHIFT))
 
 static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
-                  int blkcg_id, gfp_t gfp)
+                  gfp_t gfp)
 {
        int i, err;
 
@@ -308,15 +308,9 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
        INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
        wb->dirty_sleep = jiffies;
 
-       wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
-       if (!wb->congested) {
-               err = -ENOMEM;
-               goto out_put_bdi;
-       }
-
        err = fprop_local_init_percpu(&wb->completions, gfp);
        if (err)
-               goto out_put_cong;
+               goto out_put_bdi;
 
        for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
                err = percpu_counter_init(&wb->stat[i], 0, gfp);
@@ -330,8 +324,6 @@ out_destroy_stat:
        while (i--)
                percpu_counter_destroy(&wb->stat[i]);
        fprop_local_destroy_percpu(&wb->completions);
-out_put_cong:
-       wb_congested_put(wb->congested);
 out_put_bdi:
        if (wb != &bdi->wb)
                bdi_put(bdi);
@@ -374,7 +366,6 @@ static void wb_exit(struct bdi_writeback *wb)
                percpu_counter_destroy(&wb->stat[i]);
 
        fprop_local_destroy_percpu(&wb->completions);
-       wb_congested_put(wb->congested);
        if (wb != &wb->bdi->wb)
                bdi_put(wb->bdi);
 }
@@ -384,99 +375,12 @@ static void wb_exit(struct bdi_writeback *wb)
 #include <linux/memcontrol.h>
 
 /*
- * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
- * blkcg->cgwb_list, and memcg->cgwb_list.  bdi->cgwb_tree is also RCU
- * protected.
+ * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, and memcg->cgwb_list.
+ * bdi->cgwb_tree is also RCU protected.
  */
 static DEFINE_SPINLOCK(cgwb_lock);
 static struct workqueue_struct *cgwb_release_wq;
 
-/**
- * wb_congested_get_create - get or create a wb_congested
- * @bdi: associated bdi
- * @blkcg_id: ID of the associated blkcg
- * @gfp: allocation mask
- *
- * Look up the wb_congested for @blkcg_id on @bdi.  If missing, create one.
- * The returned wb_congested has its reference count incremented.  Returns
- * NULL on failure.
- */
-struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
-{
-       struct bdi_writeback_congested *new_congested = NULL, *congested;
-       struct rb_node **node, *parent;
-       unsigned long flags;
-retry:
-       spin_lock_irqsave(&cgwb_lock, flags);
-
-       node = &bdi->cgwb_congested_tree.rb_node;
-       parent = NULL;
-
-       while (*node != NULL) {
-               parent = *node;
-               congested = rb_entry(parent, struct bdi_writeback_congested,
-                                    rb_node);
-               if (congested->blkcg_id < blkcg_id)
-                       node = &parent->rb_left;
-               else if (congested->blkcg_id > blkcg_id)
-                       node = &parent->rb_right;
-               else
-                       goto found;
-       }
-
-       if (new_congested) {
-               /* !found and storage for new one already allocated, insert */
-               congested = new_congested;
-               rb_link_node(&congested->rb_node, parent, node);
-               rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
-               spin_unlock_irqrestore(&cgwb_lock, flags);
-               return congested;
-       }
-
-       spin_unlock_irqrestore(&cgwb_lock, flags);
-
-       /* allocate storage for new one and retry */
-       new_congested = kzalloc(sizeof(*new_congested), gfp);
-       if (!new_congested)
-               return NULL;
-
-       refcount_set(&new_congested->refcnt, 1);
-       new_congested->__bdi = bdi;
-       new_congested->blkcg_id = blkcg_id;
-       goto retry;
-
-found:
-       refcount_inc(&congested->refcnt);
-       spin_unlock_irqrestore(&cgwb_lock, flags);
-       kfree(new_congested);
-       return congested;
-}
-
-/**
- * wb_congested_put - put a wb_congested
- * @congested: wb_congested to put
- *
- * Put @congested and destroy it if the refcnt reaches zero.
- */
-void wb_congested_put(struct bdi_writeback_congested *congested)
-{
-       unsigned long flags;
-
-       if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
-               return;
-
-       /* bdi might already have been destroyed leaving @congested unlinked */
-       if (congested->__bdi) {
-               rb_erase(&congested->rb_node,
-                        &congested->__bdi->cgwb_congested_tree);
-               congested->__bdi = NULL;
-       }
-
-       spin_unlock_irqrestore(&cgwb_lock, flags);
-       kfree(congested);
-}
-
 static void cgwb_release_workfn(struct work_struct *work)
 {
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@@ -558,7 +462,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
                goto out_put;
        }
 
-       ret = wb_init(wb, bdi, blkcg_css->id, gfp);
+       ret = wb_init(wb, bdi, gfp);
        if (ret)
                goto err_free;
 
@@ -696,11 +600,10 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
        int ret;
 
        INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
-       bdi->cgwb_congested_tree = RB_ROOT;
        mutex_init(&bdi->cgwb_release_mutex);
        init_rwsem(&bdi->wb_switch_rwsem);
 
-       ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
+       ret = wb_init(&bdi->wb, bdi, GFP_KERNEL);
        if (!ret) {
                bdi->wb.memcg_css = &root_mem_cgroup->css;
                bdi->wb.blkcg_css = blkcg_root_css;
@@ -769,21 +672,6 @@ void wb_blkcg_offline(struct blkcg *blkcg)
        spin_unlock_irq(&cgwb_lock);
 }
 
-static void cgwb_bdi_exit(struct backing_dev_info *bdi)
-{
-       struct rb_node *rbn;
-
-       spin_lock_irq(&cgwb_lock);
-       while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
-               struct bdi_writeback_congested *congested =
-                       rb_entry(rbn, struct bdi_writeback_congested, rb_node);
-
-               rb_erase(rbn, &bdi->cgwb_congested_tree);
-               congested->__bdi = NULL;        /* mark @congested unlinked */
-       }
-       spin_unlock_irq(&cgwb_lock);
-}
-
 static void cgwb_bdi_register(struct backing_dev_info *bdi)
 {
        spin_lock_irq(&cgwb_lock);
@@ -810,29 +698,11 @@ subsys_initcall(cgwb_init);
 
 static int cgwb_bdi_init(struct backing_dev_info *bdi)
 {
-       int err;
-
-       bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
-       if (!bdi->wb_congested)
-               return -ENOMEM;
-
-       refcount_set(&bdi->wb_congested->refcnt, 1);
-
-       err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
-       if (err) {
-               wb_congested_put(bdi->wb_congested);
-               return err;
-       }
-       return 0;
+       return wb_init(&bdi->wb, bdi, GFP_KERNEL);
 }
 
 static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
 
-static void cgwb_bdi_exit(struct backing_dev_info *bdi)
-{
-       wb_congested_put(bdi->wb_congested);
-}
-
 static void cgwb_bdi_register(struct backing_dev_info *bdi)
 {
        list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
@@ -1023,7 +893,6 @@ static void release_bdi(struct kref *ref)
                bdi_unregister(bdi);
        WARN_ON_ONCE(bdi->dev);
        wb_exit(&bdi->wb);
-       cgwb_bdi_exit(bdi);
        kfree(bdi);
 }
 
@@ -1047,29 +916,29 @@ static wait_queue_head_t congestion_wqh[2] = {
        };
 static atomic_t nr_wb_congested[2];
 
-void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 {
        wait_queue_head_t *wqh = &congestion_wqh[sync];
        enum wb_congested_state bit;
 
        bit = sync ? WB_sync_congested : WB_async_congested;
-       if (test_and_clear_bit(bit, &congested->state))
+       if (test_and_clear_bit(bit, &bdi->wb.congested))
                atomic_dec(&nr_wb_congested[sync]);
        smp_mb__after_atomic();
        if (waitqueue_active(wqh))
                wake_up(wqh);
 }
-EXPORT_SYMBOL(clear_wb_congested);
+EXPORT_SYMBOL(clear_bdi_congested);
 
-void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
+void set_bdi_congested(struct backing_dev_info *bdi, int sync)
 {
        enum wb_congested_state bit;
 
        bit = sync ? WB_sync_congested : WB_async_congested;
-       if (!test_and_set_bit(bit, &congested->state))
+       if (!test_and_set_bit(bit, &bdi->wb.congested))
                atomic_inc(&nr_wb_congested[sync]);
 }
-EXPORT_SYMBOL(set_wb_congested);
+EXPORT_SYMBOL(set_bdi_congested);
 
 /**
  * congestion_wait - wait for a backing_dev to become uncongested
index 385759c..9f131f1 100644 (file)
@@ -987,44 +987,46 @@ void __init pagecache_init(void)
        page_writeback_init();
 }
 
-/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
-struct wait_page_key {
-       struct page *page;
-       int bit_nr;
-       int page_match;
-};
-
-struct wait_page_queue {
-       struct page *page;
-       int bit_nr;
-       wait_queue_entry_t wait;
-};
-
 static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
 {
+       int ret;
        struct wait_page_key *key = arg;
        struct wait_page_queue *wait_page
                = container_of(wait, struct wait_page_queue, wait);
 
-       if (wait_page->page != key->page)
-              return 0;
-       key->page_match = 1;
-
-       if (wait_page->bit_nr != key->bit_nr)
+       if (!wake_page_match(wait_page, key))
                return 0;
 
        /*
-        * Stop walking if it's locked.
-        * Is this safe if put_and_wait_on_page_locked() is in use?
-        * Yes: the waker must hold a reference to this page, and if PG_locked
-        * has now already been set by another task, that task must also hold
-        * a reference to the *same usage* of this page; so there is no need
-        * to walk on to wake even the put_and_wait_on_page_locked() callers.
+        * If it's an exclusive wait, we get the bit for it, and
+        * stop walking if we can't.
+        *
+        * If it's a non-exclusive wait, then the fact that this
+        * wake function was called means that the bit already
+        * was cleared, and we don't care if somebody then
+        * re-took it.
         */
-       if (test_bit(key->bit_nr, &key->page->flags))
-               return -1;
+       ret = 0;
+       if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+               if (test_and_set_bit(key->bit_nr, &key->page->flags))
+                       return -1;
+               ret = 1;
+       }
+       wait->flags |= WQ_FLAG_WOKEN;
 
-       return autoremove_wake_function(wait, mode, sync, key);
+       wake_up_state(wait->private, mode);
+
+       /*
+        * Ok, we have successfully done what we're waiting for,
+        * and we can unconditionally remove the wait entry.
+        *
+        * Note that this has to be the absolute last thing we do,
+        * since after list_del_init(&wait->entry) the wait entry
+        * might be de-allocated and the process might even have
+        * exited.
+        */
+       list_del_init_careful(&wait->entry);
+       return ret;
 }
 
 static void wake_up_page_bit(struct page *page, int bit_nr)
@@ -1103,16 +1105,31 @@ enum behavior {
                         */
 };
 
+/*
+ * Attempt to check (or get) the page bit, and mark the
+ * waiter woken if successful.
+ */
+static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+                                       struct wait_queue_entry *wait)
+{
+       if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+               if (test_and_set_bit(bit_nr, &page->flags))
+                       return false;
+       } else if (test_bit(bit_nr, &page->flags))
+               return false;
+
+       wait->flags |= WQ_FLAG_WOKEN;
+       return true;
+}
+
 static inline int wait_on_page_bit_common(wait_queue_head_t *q,
        struct page *page, int bit_nr, int state, enum behavior behavior)
 {
        struct wait_page_queue wait_page;
        wait_queue_entry_t *wait = &wait_page.wait;
-       bool bit_is_set;
        bool thrashing = false;
        bool delayacct = false;
        unsigned long pflags;
-       int ret = 0;
 
        if (bit_nr == PG_locked &&
            !PageUptodate(page) && PageWorkingset(page)) {
@@ -1130,48 +1147,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
        wait_page.page = page;
        wait_page.bit_nr = bit_nr;
 
-       for (;;) {
-               spin_lock_irq(&q->lock);
+       /*
+        * Do one last check whether we can get the
+        * page bit synchronously.
+        *
+        * Do the SetPageWaiters() marking before that
+        * to let any waker we _just_ missed know they
+        * need to wake us up (otherwise they'll never
+        * even go to the slow case that looks at the
+        * page queue), and add ourselves to the wait
+        * queue if we need to sleep.
+        *
+        * This part needs to be done under the queue
+        * lock to avoid races.
+        */
+       spin_lock_irq(&q->lock);
+       SetPageWaiters(page);
+       if (!trylock_page_bit_common(page, bit_nr, wait))
+               __add_wait_queue_entry_tail(q, wait);
+       spin_unlock_irq(&q->lock);
 
-               if (likely(list_empty(&wait->entry))) {
-                       __add_wait_queue_entry_tail(q, wait);
-                       SetPageWaiters(page);
-               }
+       /*
+        * From now on, all the logic will be based on
+        * the WQ_FLAG_WOKEN flag, and the and the page
+        * bit testing (and setting) will be - or has
+        * already been - done by the wake function.
+        *
+        * We can drop our reference to the page.
+        */
+       if (behavior == DROP)
+               put_page(page);
 
+       for (;;) {
                set_current_state(state);
 
-               spin_unlock_irq(&q->lock);
-
-               bit_is_set = test_bit(bit_nr, &page->flags);
-               if (behavior == DROP)
-                       put_page(page);
-
-               if (likely(bit_is_set))
-                       io_schedule();
-
-               if (behavior == EXCLUSIVE) {
-                       if (!test_and_set_bit_lock(bit_nr, &page->flags))
-                               break;
-               } else if (behavior == SHARED) {
-                       if (!test_bit(bit_nr, &page->flags))
-                               break;
-               }
-
-               if (signal_pending_state(state, current)) {
-                       ret = -EINTR;
+               if (signal_pending_state(state, current))
                        break;
-               }
 
-               if (behavior == DROP) {
-                       /*
-                        * We can no longer safely access page->flags:
-                        * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
-                        * there is a risk of waiting forever on a page reused
-                        * for something that keeps it locked indefinitely.
-                        * But best check for -EINTR above before breaking.
-                        */
+               if (wait->flags & WQ_FLAG_WOKEN)
                        break;
-               }
+
+               io_schedule();
        }
 
        finish_wait(q, wait);
@@ -1190,7 +1206,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
         * bother with signals either.
         */
 
-       return ret;
+       return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
 }
 
 void wait_on_page_bit(struct page *page, int bit_nr)
@@ -1207,6 +1223,44 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit_killable);
 
+static int __wait_on_page_locked_async(struct page *page,
+                                      struct wait_page_queue *wait, bool set)
+{
+       struct wait_queue_head *q = page_waitqueue(page);
+       int ret = 0;
+
+       wait->page = page;
+       wait->bit_nr = PG_locked;
+
+       spin_lock_irq(&q->lock);
+       __add_wait_queue_entry_tail(q, &wait->wait);
+       SetPageWaiters(page);
+       if (set)
+               ret = !trylock_page(page);
+       else
+               ret = PageLocked(page);
+       /*
+        * If we were succesful now, we know we're still on the
+        * waitqueue as we're still under the lock. This means it's
+        * safe to remove and return success, we know the callback
+        * isn't going to trigger.
+        */
+       if (!ret)
+               __remove_wait_queue(q, &wait->wait);
+       else
+               ret = -EIOCBQUEUED;
+       spin_unlock_irq(&q->lock);
+       return ret;
+}
+
+static int wait_on_page_locked_async(struct page *page,
+                                    struct wait_page_queue *wait)
+{
+       if (!PageLocked(page))
+               return 0;
+       return __wait_on_page_locked_async(compound_head(page), wait, false);
+}
+
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -1369,6 +1423,11 @@ int __lock_page_killable(struct page *__page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+{
+       return __wait_on_page_locked_async(page, wait, true);
+}
+
 /*
  * Return values:
  * 1 - page is locked; mmap_lock is still held.
@@ -2028,7 +2087,7 @@ find_page:
 
                page = find_get_page(mapping, index);
                if (!page) {
-                       if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
+                       if (iocb->ki_flags & IOCB_NOIO)
                                goto would_block;
                        page_cache_sync_readahead(mapping,
                                        ra, filp,
@@ -2047,17 +2106,25 @@ find_page:
                                        index, last_index - index);
                }
                if (!PageUptodate(page)) {
-                       if (iocb->ki_flags & IOCB_NOWAIT) {
-                               put_page(page);
-                               goto would_block;
-                       }
-
                        /*
                         * See comment in do_read_cache_page on why
                         * wait_on_page_locked is used to avoid unnecessarily
                         * serialisations and why it's safe.
                         */
-                       error = wait_on_page_locked_killable(page);
+                       if (iocb->ki_flags & IOCB_WAITQ) {
+                               if (written) {
+                                       put_page(page);
+                                       goto out;
+                               }
+                               error = wait_on_page_locked_async(page,
+                                                               iocb->ki_waitq);
+                       } else {
+                               if (iocb->ki_flags & IOCB_NOWAIT) {
+                                       put_page(page);
+                                       goto would_block;
+                               }
+                               error = wait_on_page_locked_killable(page);
+                       }
                        if (unlikely(error))
                                goto readpage_error;
                        if (PageUptodate(page))
@@ -2145,7 +2212,10 @@ page_ok:
 
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
-               error = lock_page_killable(page);
+               if (iocb->ki_flags & IOCB_WAITQ)
+                       error = lock_page_async(page, iocb->ki_waitq);
+               else
+                       error = lock_page_killable(page);
                if (unlikely(error))
                        goto readpage_error;
 
@@ -2164,7 +2234,7 @@ page_not_up_to_date_locked:
                }
 
 readpage:
-               if (iocb->ki_flags & IOCB_NOIO) {
+               if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
                        unlock_page(page);
                        put_page(page);
                        goto would_block;
index 9222910..e825804 100644 (file)
@@ -373,14 +373,14 @@ static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
        struct list_lru_memcg *memcg_lrus;
        /*
         * This is called when shrinker has already been unregistered,
-        * and nobody can use it. So, there is no need to use kvfree_rcu().
+        * and nobody can use it. So, there is no need to use kvfree_rcu_local().
         */
        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
        __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
        kvfree(memcg_lrus);
 }
 
-static void kvfree_rcu(struct rcu_head *head)
+static void kvfree_rcu_local(struct rcu_head *head)
 {
        struct list_lru_memcg *mlru;
 
@@ -419,7 +419,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
        rcu_assign_pointer(nlru->memcg_lrus, new);
        spin_unlock_irq(&nlru->lock);
 
-       call_rcu(&old->rcu, kvfree_rcu);
+       call_rcu(&old->rcu, kvfree_rcu_local);
        return 0;
 }
 
index 39aceaf..45f1987 100644 (file)
  *   in the system, for instance when the memory is restricted with
  *   ``mem=`` command line parameter
  * * ``reserved`` - describes the regions that were allocated
- * * ``physmap`` - describes the actual physical memory regardless of
- *   the possible restrictions; the ``physmap`` type is only available
- *   on some architectures.
+ * * ``physmem`` - describes the actual physical memory available during
+ *   boot regardless of the possible restrictions and memory hot(un)plug;
+ *   the ``physmem`` type is only available on some architectures.
  *
  * Each region is represented by :c:type:`struct memblock_region` that
  * defines the region extents, its attributes and NUMA node id on NUMA
  * systems. Every memory type is described by the :c:type:`struct
  * memblock_type` which contains an array of memory regions along with
- * the allocator metadata. The memory types are nicely wrapped with
- * :c:type:`struct memblock`. This structure is statically initialzed
- * at build time. The region arrays for the "memory" and "reserved"
- * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the
- * "physmap" type to %INIT_PHYSMEM_REGIONS.
+ * the allocator metadata. The "memory" and "reserved" types are nicely
+ * wrapped with :c:type:`struct memblock`. This structure is statically
+ * initialized at build time. The region arrays are initially sized to
+ * %INIT_MEMBLOCK_REGIONS for "memory" and %INIT_MEMBLOCK_RESERVED_REGIONS
+ * for "reserved". The region array for "physmem" is initially sized to
+ * %INIT_PHYSMEM_REGIONS.
  * The memblock_allow_resize() enables automatic resizing of the region
  * arrays during addition of new regions. This feature should be used
  * with care so that memory allocated for the region array will not
@@ -87,8 +88,8 @@
  * function frees all the memory to the buddy page allocator.
  *
  * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the
- * memblock data structures will be discarded after the system
- * initialization completes.
+ * memblock data structures (except "physmem") will be discarded after the
+ * system initialization completes.
  */
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -104,7 +105,7 @@ unsigned long long max_possible_pfn;
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
-static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
 #endif
 
 struct memblock memblock __initdata_memblock = {
@@ -118,17 +119,19 @@ struct memblock memblock __initdata_memblock = {
        .reserved.max           = INIT_MEMBLOCK_RESERVED_REGIONS,
        .reserved.name          = "reserved",
 
-#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
-       .physmem.regions        = memblock_physmem_init_regions,
-       .physmem.cnt            = 1,    /* empty dummy entry */
-       .physmem.max            = INIT_PHYSMEM_REGIONS,
-       .physmem.name           = "physmem",
-#endif
-
        .bottom_up              = false,
        .current_limit          = MEMBLOCK_ALLOC_ANYWHERE,
 };
 
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+struct memblock_type physmem = {
+       .regions                = memblock_physmem_init_regions,
+       .cnt                    = 1,    /* empty dummy entry */
+       .max                    = INIT_PHYSMEM_REGIONS,
+       .name                   = "physmem",
+};
+#endif
+
 int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
@@ -838,7 +841,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
        memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
                     &base, &end, (void *)_RET_IP_);
 
-       return memblock_add_range(&memblock.physmem, base, size, MAX_NUMNODES, 0);
+       return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0);
 }
 #endif
 
@@ -1019,12 +1022,10 @@ static bool should_skip_region(struct memblock_region *m, int nid, int flags)
  * As both region arrays are sorted, the function advances the two indices
  * in lockstep and returns each intersection.
  */
-void __init_memblock __next_mem_range(u64 *idx, int nid,
-                                     enum memblock_flags flags,
-                                     struct memblock_type *type_a,
-                                     struct memblock_type *type_b,
-                                     phys_addr_t *out_start,
-                                     phys_addr_t *out_end, int *out_nid)
+void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
+                     struct memblock_type *type_a,
+                     struct memblock_type *type_b, phys_addr_t *out_start,
+                     phys_addr_t *out_end, int *out_nid)
 {
        int idx_a = *idx & 0xffffffff;
        int idx_b = *idx >> 32;
@@ -1924,7 +1925,7 @@ void __init_memblock __memblock_dump_all(void)
        memblock_dump(&memblock.memory);
        memblock_dump(&memblock.reserved);
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
-       memblock_dump(&memblock.physmem);
+       memblock_dump(&physmem);
 #endif
 }
 
@@ -2064,8 +2065,8 @@ static int __init memblock_init_debugfs(void)
        debugfs_create_file("reserved", 0444, root,
                            &memblock.reserved, &memblock_debug_fops);
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
-       debugfs_create_file("physmem", 0444, root,
-                           &memblock.physmem, &memblock_debug_fops);
+       debugfs_create_file("physmem", 0444, root, &physmem,
+                           &memblock_debug_fops);
 #endif
 
        return 0;
index 3ecad55..6e9903d 100644 (file)
@@ -437,7 +437,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
         * of a chain of data-dependent loads, meaning most CPUs (alpha
         * being the notable exception) will already guarantee loads are
         * seen in-order. See the alpha page table accessors for the
-        * smp_read_barrier_depends() barriers in page table walking code.
+        * smp_rmb() barriers in page table walking code.
         */
        smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
 
index 8c7ca73..dcdab26 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3171,6 +3171,7 @@ void exit_mmap(struct mm_struct *mm)
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
                vma = remove_vma(vma);
+               cond_resched();
        }
        vm_unacct_memory(nr_accounted);
 }
index e8726f3..ccda767 100644 (file)
@@ -277,6 +277,23 @@ static inline void count_swpout_vm_event(struct page *page)
        count_vm_events(PSWPOUT, hpage_nr_pages(page));
 }
 
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+{
+       struct cgroup_subsys_state *css;
+
+       if (!page->mem_cgroup)
+               return;
+
+       rcu_read_lock();
+       css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
+       bio_associate_blkg_from_css(bio, css);
+       rcu_read_unlock();
+}
+#else
+#define bio_associate_blkg_from_page(bio, page)                do { } while (0)
+#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
                bio_end_io_t end_write_func)
 {
index 987276c..6c26916 100644 (file)
@@ -2929,7 +2929,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                 * write only restriction.  Hence zoned block devices are not
                 * suitable for swapping.  Disallow them here.
                 */
-               if (blk_queue_is_zoned(p->bdev->bd_queue))
+               if (blk_queue_is_zoned(p->bdev->bd_disk->queue))
                        return -EINVAL;
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
index 13cd683..12ecacf 100644 (file)
@@ -362,6 +362,10 @@ static void p9_read_work(struct work_struct *work)
                if (m->rreq->status == REQ_STATUS_SENT) {
                        list_del(&m->rreq->req_list);
                        p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+               } else if (m->rreq->status == REQ_STATUS_FLSHD) {
+                       /* Ignore replies associated with a cancelled request. */
+                       p9_debug(P9_DEBUG_TRANS,
+                                "Ignore replies associated with a cancelled request\n");
                } else {
                        spin_unlock(&m->client->lock);
                        p9_debug(P9_DEBUG_ERROR,
@@ -703,11 +707,20 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
 {
        p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
 
+       spin_lock(&client->lock);
+       /* Ignore cancelled request if message has been received
+        * before lock.
+        */
+       if (req->status == REQ_STATUS_RCVD) {
+               spin_unlock(&client->lock);
+               return 0;
+       }
+
        /* we haven't received a response for oldreq,
         * remove it from the list.
         */
-       spin_lock(&client->lock);
        list_del(&req->req_list);
+       req->status = REQ_STATUS_FLSHD;
        spin_unlock(&client->lock);
        p9_req_put(req);
 
@@ -803,20 +816,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
                return -ENOMEM;
 
        ts->rd = fget(rfd);
+       if (!ts->rd)
+               goto out_free_ts;
+       if (!(ts->rd->f_mode & FMODE_READ))
+               goto out_put_rd;
        ts->wr = fget(wfd);
-       if (!ts->rd || !ts->wr) {
-               if (ts->rd)
-                       fput(ts->rd);
-               if (ts->wr)
-                       fput(ts->wr);
-               kfree(ts);
-               return -EIO;
-       }
+       if (!ts->wr)
+               goto out_put_rd;
+       if (!(ts->wr->f_mode & FMODE_WRITE))
+               goto out_put_wr;
 
        client->trans = ts;
        client->status = Connected;
 
        return 0;
+
+out_put_wr:
+       fput(ts->wr);
+out_put_rd:
+       fput(ts->rd);
+out_free_ts:
+       kfree(ts);
+       return -EIO;
 }
 
 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
index cfeaee3..af9d7f2 100644 (file)
@@ -1338,6 +1338,9 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
 {
        struct discovery_state *d = &hdev->discovery;
 
+       if (len > HCI_MAX_AD_LENGTH)
+               return;
+
        bacpy(&d->last_adv_addr, bdaddr);
        d->last_adv_addr_type = bdaddr_type;
        d->last_adv_rssi = rssi;
@@ -5355,7 +5358,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 
 static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
                               u8 bdaddr_type, bdaddr_t *direct_addr,
-                              u8 direct_addr_type, s8 rssi, u8 *data, u8 len)
+                              u8 direct_addr_type, s8 rssi, u8 *data, u8 len,
+                              bool ext_adv)
 {
        struct discovery_state *d = &hdev->discovery;
        struct smp_irk *irk;
@@ -5377,6 +5381,11 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
                return;
        }
 
+       if (!ext_adv && len > HCI_MAX_AD_LENGTH) {
+               bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes");
+               return;
+       }
+
        /* Find the end of the data in case the report contains padded zero
         * bytes at the end causing an invalid length value.
         *
@@ -5437,7 +5446,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
         */
        conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
                                                                direct_addr);
-       if (conn && type == LE_ADV_IND) {
+       if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
                /* Store report for later inclusion by
                 * mgmt_device_connected
                 */
@@ -5491,7 +5500,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
         * event or send an immediate device found event if the data
         * should not be stored for later.
         */
-       if (!has_pending_adv_report(hdev)) {
+       if (!ext_adv && !has_pending_adv_report(hdev)) {
                /* If the report will trigger a SCAN_REQ store it for
                 * later merging.
                 */
@@ -5526,7 +5535,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
                /* If the new report will trigger a SCAN_REQ store it for
                 * later merging.
                 */
-               if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+               if (!ext_adv && (type == LE_ADV_IND ||
+                                type == LE_ADV_SCAN_IND)) {
                        store_pending_adv_report(hdev, bdaddr, bdaddr_type,
                                                 rssi, flags, data, len);
                        return;
@@ -5566,7 +5576,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
                        rssi = ev->data[ev->length];
                        process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
                                           ev->bdaddr_type, NULL, 0, rssi,
-                                          ev->data, ev->length);
+                                          ev->data, ev->length, false);
                } else {
                        bt_dev_err(hdev, "Dropping invalid advertising data");
                }
@@ -5638,7 +5648,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
                if (legacy_evt_type != LE_ADV_INVALID) {
                        process_adv_report(hdev, legacy_evt_type, &ev->bdaddr,
                                           ev->bdaddr_type, NULL, 0, ev->rssi,
-                                          ev->data, ev->length);
+                                          ev->data, ev->length,
+                                          !(evt_type & LE_EXT_ADV_LEGACY_PDU));
                }
 
                ptr += sizeof(*ev) + ev->length;
@@ -5836,7 +5847,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev,
 
                process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
                                   ev->bdaddr_type, &ev->direct_addr,
-                                  ev->direct_addr_type, ev->rssi, NULL, 0);
+                                  ev->direct_addr_type, ev->rssi, NULL, 0,
+                                  false);
 
                ptr += sizeof(*ev);
        }
index 1905e01..4494ea6 100644 (file)
@@ -39,7 +39,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 {
        struct mbox_request req;
        struct mbox_reply reply;
-       loff_t pos;
+       loff_t pos = 0;
        ssize_t n;
        int ret = -EFAULT;
 
index 5e3041a..434838b 100644 (file)
@@ -202,7 +202,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 
                /* Advance. */
                kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp);
-               ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+               ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len);
        }
 
        /*
index 2cafbc8..47f14a2 100644 (file)
@@ -1065,7 +1065,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
                                                   devlink_sb,
                                                   NETLINK_CB(cb->skb).portid,
                                                   cb->nlh->nlmsg_seq);
-                       if (err && err != -EOPNOTSUPP) {
+                       if (err == -EOPNOTSUPP) {
+                               err = 0;
+                       } else if (err) {
                                mutex_unlock(&devlink->lock);
                                goto out;
                        }
@@ -1266,7 +1268,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
                                                        devlink, devlink_sb,
                                                        NETLINK_CB(cb->skb).portid,
                                                        cb->nlh->nlmsg_seq);
-                       if (err && err != -EOPNOTSUPP) {
+                       if (err == -EOPNOTSUPP) {
+                               err = 0;
+                       } else if (err) {
                                mutex_unlock(&devlink->lock);
                                goto out;
                        }
@@ -1498,7 +1502,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
                                                           devlink_sb,
                                                           NETLINK_CB(cb->skb).portid,
                                                           cb->nlh->nlmsg_seq);
-                       if (err && err != -EOPNOTSUPP) {
+                       if (err == -EOPNOTSUPP) {
+                               err = 0;
+                       } else if (err) {
                                mutex_unlock(&devlink->lock);
                                goto out;
                        }
@@ -3299,7 +3305,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
                                                    NETLINK_CB(cb->skb).portid,
                                                    cb->nlh->nlmsg_seq,
                                                    NLM_F_MULTI);
-                       if (err && err != -EOPNOTSUPP) {
+                       if (err == -EOPNOTSUPP) {
+                               err = 0;
+                       } else if (err) {
                                mutex_unlock(&devlink->lock);
                                goto out;
                        }
@@ -3569,7 +3577,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
                                                NETLINK_CB(cb->skb).portid,
                                                cb->nlh->nlmsg_seq,
                                                NLM_F_MULTI);
-                               if (err && err != -EOPNOTSUPP) {
+                               if (err == -EOPNOTSUPP) {
+                                       err = 0;
+                               } else if (err) {
                                        mutex_unlock(&devlink->lock);
                                        goto out;
                                }
@@ -4518,7 +4528,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                           cb->extack);
                mutex_unlock(&devlink->lock);
-               if (err && err != -EOPNOTSUPP)
+               if (err == -EOPNOTSUPP)
+                       err = 0;
+               else if (err)
                        break;
                idx++;
        }
@@ -8567,6 +8579,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
        DEVLINK_TRAP_GROUP(PIM),
        DEVLINK_TRAP_GROUP(UC_LB),
        DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
+       DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
        DEVLINK_TRAP_GROUP(IPV6),
        DEVLINK_TRAP_GROUP(PTP_EVENT),
        DEVLINK_TRAP_GROUP(PTP_GENERAL),
index 7bd6440..9de33b5 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/if_arp.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/isolation.h>
 #include <linux/nsproxy.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
@@ -741,7 +742,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 {
        struct rps_map *old_map, *map;
        cpumask_var_t mask;
-       int err, cpu, i;
+       int err, cpu, i, hk_flags;
        static DEFINE_MUTEX(rps_map_mutex);
 
        if (!capable(CAP_NET_ADMIN))
@@ -756,6 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
                return err;
        }
 
+       hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
+       cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
+       if (cpumask_empty(mask)) {
+               free_cpumask_var(mask);
+               return -EINVAL;
+       }
+
        map = kzalloc(max_t(unsigned int,
                            RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
                      GFP_KERNEL);
index 2e5b787..b8ac834 100644 (file)
@@ -1973,7 +1973,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 
                /*
                 * Before updating sk_refcnt, we must commit prior changes to memory
-                * (Documentation/RCU/rculist_nulls.txt for details)
+                * (Documentation/RCU/rculist_nulls.rst for details)
                 */
                smp_wmb();
                refcount_set(&newsk->sk_refcnt, 2);
@@ -3035,7 +3035,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk_rx_queue_clear(sk);
        /*
         * Before updating sk_refcnt, we must commit prior changes to memory
-        * (Documentation/RCU/rculist_nulls.txt for details)
+        * (Documentation/RCU/rculist_nulls.rst for details)
         */
        smp_wmb();
        refcount_set(&sk->sk_refcnt, 1);
index 248f1c1..3c65f71 100644 (file)
@@ -1864,7 +1864,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
        while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
                struct key_vector *local_l = NULL, *local_tp;
 
-               hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+               hlist_for_each_entry(fa, &l->leaf, fa_list) {
                        struct fib_alias *new_fa;
 
                        if (local_tb->tb_id != fa->tb_id)
index 8932612..dacdea7 100644 (file)
@@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
        return 0;
 }
 
-void ipv6_sock_ac_close(struct sock *sk)
+void __ipv6_sock_ac_close(struct sock *sk)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net_device *dev = NULL;
@@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk)
        struct net *net = sock_net(sk);
        int     prev_index;
 
-       if (!np->ipv6_ac_list)
-               return;
-
-       rtnl_lock();
+       ASSERT_RTNL();
        pac = np->ipv6_ac_list;
        np->ipv6_ac_list = NULL;
 
@@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk)
                sock_kfree_s(sk, pac, sizeof(*pac));
                pac = next;
        }
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+       struct ipv6_pinfo *np = inet6_sk(sk);
+
+       if (!np->ipv6_ac_list)
+               return;
+       rtnl_lock();
+       __ipv6_sock_ac_close(sk);
        rtnl_unlock();
 }
 
index c435927..52c2f06 100644 (file)
@@ -805,10 +805,17 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 
        if (x->encap) {
                const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+               int offset = skb_network_offset(skb) + sizeof(*ip6h);
                struct xfrm_encap_tmpl *encap = x->encap;
-               struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
-               struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len);
-               __be16 source;
+               u8 nexthdr = ip6h->nexthdr;
+               __be16 frag_off, source;
+               struct udphdr *uh;
+               struct tcphdr *th;
+
+               offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+               uh = (void *)(skb->data + offset);
+               th = (void *)(skb->data + offset);
+               hdr_len += offset;
 
                switch (x->encap->encap_type) {
                case TCP_ENCAP_ESPINTCP:
index 20576e8..76f9e41 100644 (file)
@@ -240,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 
                        fl6_free_socklist(sk);
                        __ipv6_sock_mc_close(sk);
+                       __ipv6_sock_ac_close(sk);
 
                        /*
                         * Sock is moving from IPv6 to IPv4 (sk_prot), so
index f327981..4c36bd0 100644 (file)
@@ -3685,14 +3685,14 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
        rt->fib6_src.plen = cfg->fc_src_len;
 #endif
        if (nh) {
-               if (!nexthop_get(nh)) {
-                       NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
-                       goto out;
-               }
                if (rt->fib6_src.plen) {
                        NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
                        goto out;
                }
+               if (!nexthop_get(nh)) {
+                       NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+                       goto out;
+               }
                rt->nh = nh;
                fib6_nh = nexthop_fib6_nh(rt->nh);
        } else {
index b67ed3a..a915bc8 100644 (file)
@@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
        if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
                struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
 
+               if ((xfilter->sadb_x_filter_splen >=
+                       (sizeof(xfrm_address_t) << 3)) ||
+                   (xfilter->sadb_x_filter_dplen >=
+                       (sizeof(xfrm_address_t) << 3))) {
+                       mutex_unlock(&pfk->dump_lock);
+                       return -EINVAL;
+               }
                filter = kmalloc(sizeof(*filter), GFP_KERNEL);
                if (filter == NULL) {
                        mutex_unlock(&pfk->dump_lock);
@@ -2400,7 +2407,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
                        return err;
        }
 
-       xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+       xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
                                   pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
                                   1, &err);
        security_xfrm_policy_free(pol_ctx);
@@ -2651,7 +2658,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
                return -EINVAL;
 
        delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-       xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+       xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
                              dir, pol->sadb_x_policy_id, delete, &err);
        if (xp == NULL)
                return -ENOENT;
index 9b36054..1079a07 100644 (file)
@@ -2166,6 +2166,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
        ieee80211_stop_mesh(sdata);
        mutex_lock(&sdata->local->mtx);
        ieee80211_vif_release_channel(sdata);
+       kfree(sdata->u.mesh.ie);
        mutex_unlock(&sdata->local->mtx);
 
        return 0;
index 5f1ca25..e88beb3 100644 (file)
@@ -617,6 +617,19 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
 int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
                            struct sk_buff *skb)
 {
+       struct ieee80211_supported_band *sband;
+       const struct ieee80211_sband_iftype_data *iftd;
+
+       sband = ieee80211_get_sband(sdata);
+       if (!sband)
+               return -EINVAL;
+
+       iftd = ieee80211_get_sband_iftype_data(sband,
+                                              NL80211_IFTYPE_MESH_POINT);
+       /* The device doesn't support HE in mesh mode or at all */
+       if (!iftd)
+               return 0;
+
        ieee80211_ie_build_he_6ghz_cap(sdata, skb);
        return 0;
 }
index 117519b..aca608a 100644 (file)
@@ -521,6 +521,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl,
        del_timer_sync(&mpath->timer);
        atomic_dec(&sdata->u.mesh.mpaths);
        atomic_dec(&tbl->entries);
+       mesh_path_flush_pending(mpath);
        kfree_rcu(mpath, rcu);
 }
 
index cd8487b..af4cc5f 100644 (file)
@@ -1923,9 +1923,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
        if (sta) {
                tx_pending = atomic_sub_return(tx_airtime,
                                               &sta->airtime[ac].aql_tx_pending);
-               if (WARN_ONCE(tx_pending < 0,
-                             "STA %pM AC %d txq pending airtime underflow: %u, %u",
-                             sta->addr, ac, tx_pending, tx_airtime))
+               if (tx_pending < 0)
                        atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
                                       tx_pending, 0);
        }
index 1a2941e..3529d13 100644 (file)
@@ -4230,11 +4230,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
            test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
                goto out_free;
 
+       memset(info, 0, sizeof(*info));
+
        if (unlikely(!multicast && skb->sk &&
                     skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
-               ieee80211_store_ack_skb(local, skb, &info->flags, NULL);
-
-       memset(info, 0, sizeof(*info));
+               info->ack_frame_id = ieee80211_store_ack_skb(local, skb,
+                                                            &info->flags, NULL);
 
        if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) {
                if (sdata->control_port_no_encrypt)
index 21c9409..dd9f5c7 100644 (file)
@@ -2878,6 +2878,10 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
        if (WARN_ON(!iftd))
                return;
 
+       /* Check for device HE 6 GHz capability before adding element */
+       if (!iftd->he_6ghz_capa.capa)
+               return;
+
        cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
        cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
 
index 3d98071..82bd2b5 100644 (file)
@@ -32,11 +32,8 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
 {
        __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
        __be64 input = cpu_to_be64(key);
-       struct sha256_state state;
 
-       sha256_init(&state);
-       sha256_update(&state, (__force u8 *)&input, sizeof(input));
-       sha256_final(&state, (u8 *)mptcp_hashed_key);
+       sha256((__force u8 *)&input, sizeof(input), (u8 *)mptcp_hashed_key);
 
        if (token)
                *token = be32_to_cpu(mptcp_hashed_key[0]);
@@ -47,7 +44,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
 void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
 {
        u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
-       struct sha256_state state;
        u8 key1be[8];
        u8 key2be[8];
        int i;
@@ -67,13 +63,10 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
 
        memcpy(&input[SHA256_BLOCK_SIZE], msg, len);
 
-       sha256_init(&state);
-       sha256_update(&state, input, SHA256_BLOCK_SIZE + len);
-
        /* emit sha256(K1 || msg) on the second input block, so we can
         * reuse 'input' for the last hashing
         */
-       sha256_final(&state, &input[SHA256_BLOCK_SIZE]);
+       sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]);
 
        /* Prepare second part of hmac */
        memset(input, 0x5C, SHA256_BLOCK_SIZE);
@@ -82,9 +75,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
        for (i = 0; i < 8; i++)
                input[i + 8] ^= key2be[i];
 
-       sha256_init(&state);
-       sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE);
-       sha256_final(&state, (u8 *)hmac);
+       sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac);
 }
 
 #ifdef CONFIG_MPTCP_HMAC_TEST
index 3980fbb..c0abe73 100644 (file)
@@ -1833,7 +1833,7 @@ do_connect:
        /* on successful connect, the msk state will be moved to established by
         * subflow_finish_connect()
         */
-       if (!err || err == EINPROGRESS)
+       if (!err || err == -EINPROGRESS)
                mptcp_copy_inaddrs(sock->sk, ssock->sk);
        else
                inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
index c840497..aba4afe 100644 (file)
@@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
 int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
 {
        struct rds_notifier *notifier;
-       struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
+       struct rds_rdma_notify cmsg;
        unsigned int count = 0, max_messages = ~0U;
        unsigned long flags;
        LIST_HEAD(copy);
        int err = 0;
 
+       memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */
 
        /* put_cmsg copies to user space and thus may sleep. We can't do this
         * with rs_lock held, so first grab as many notifications as we can stuff
index f079702..38a4616 100644 (file)
@@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
         */
        ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
        if (ret < 0)
-               goto error;
+               goto error_attached_to_socket;
 
        trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
                         atomic_read(&call->usage), here, NULL);
@@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 error_dup_user_ID:
        write_unlock(&rx->call_lock);
        release_sock(&rx->sk);
-       ret = -EEXIST;
-
-error:
        __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
-                                   RX_CALL_DEAD, ret);
+                                   RX_CALL_DEAD, -EEXIST);
        trace_rxrpc_call(call->debug_id, rxrpc_call_error,
-                        atomic_read(&call->usage), here, ERR_PTR(ret));
+                        atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
        rxrpc_release_call(rx, call);
        mutex_unlock(&call->user_mutex);
        rxrpc_put_call(call, rxrpc_call_put);
-       _leave(" = %d", ret);
-       return ERR_PTR(ret);
+       _leave(" = -EEXIST");
+       return ERR_PTR(-EEXIST);
+
+       /* We got an error, but the call is attached to the socket and is in
+        * need of release.  However, we might now race with recvmsg() when
+        * completing the call queues it.  Return 0 from sys_sendmsg() and
+        * leave the error to recvmsg() to deal with.
+        */
+error_attached_to_socket:
+       trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+                        atomic_read(&call->usage), here, ERR_PTR(ret));
+       set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+       __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+                                   RX_CALL_DEAD, ret);
+       _leave(" = c=%08x [err]", call->debug_id);
+       return call;
 }
 
 /*
index 19e141e..8cbe0bf 100644 (file)
@@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 
        call->peer->cong_cwnd = call->cong_cwnd;
 
-       spin_lock_bh(&conn->params.peer->lock);
-       hlist_del_rcu(&call->error_link);
-       spin_unlock_bh(&conn->params.peer->lock);
+       if (!hlist_unhashed(&call->error_link)) {
+               spin_lock_bh(&call->peer->lock);
+               hlist_del_rcu(&call->error_link);
+               spin_unlock_bh(&call->peer->lock);
+       }
 
        if (rxrpc_is_client_call(call))
                return rxrpc_disconnect_client_call(call);
index 490b192..efecc5a 100644 (file)
@@ -620,7 +620,7 @@ try_again:
                        goto error_unlock_call;
        }
 
-       if (msg->msg_name) {
+       if (msg->msg_name && call->peer) {
                struct sockaddr_rxrpc *srx = msg->msg_name;
                size_t len = sizeof(call->peer->srx);
 
index 03a30d0..f3f6da6 100644 (file)
@@ -681,6 +681,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                if (IS_ERR(call))
                        return PTR_ERR(call);
                /* ... and we have the call lock. */
+               ret = 0;
+               if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE)
+                       goto out_put_unlock;
        } else {
                switch (READ_ONCE(call->state)) {
                case RXRPC_CALL_UNINITIALISED:
index 5928efb..6ed1652 100644 (file)
@@ -1543,10 +1543,10 @@ static int __init ct_init_module(void)
 
        return 0;
 
-err_tbl_init:
-       destroy_workqueue(act_ct_wq);
 err_register:
        tcf_ct_flow_tables_uninit();
+err_tbl_init:
+       destroy_workqueue(act_ct_wq);
        return err;
 }
 
index 0e07fb8..7fbca08 100644 (file)
@@ -13266,13 +13266,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
                                if (!wdev_running(wdev))
                                        return -ENETDOWN;
                        }
-
-                       if (!vcmd->doit)
-                               return -EOPNOTSUPP;
                } else {
                        wdev = NULL;
                }
 
+               if (!vcmd->doit)
+                       return -EOPNOTSUPP;
+
                if (info->attrs[NL80211_ATTR_VENDOR_DATA]) {
                        data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]);
                        len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]);
index 100e296..827ccdf 100644 (file)
@@ -15,6 +15,7 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
 {
        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf ||
            !sk_rmem_schedule(sk, skb, skb->truesize)) {
+               XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR);
                kfree_skb(skb);
                return;
        }
@@ -49,23 +50,51 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
        struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx,
                                                strp);
        struct strp_msg *rxm = strp_msg(skb);
+       int len = rxm->full_len - 2;
        u32 nonesp_marker;
        int err;
 
+       /* keepalive packet? */
+       if (unlikely(len == 1)) {
+               u8 data;
+
+               err = skb_copy_bits(skb, rxm->offset + 2, &data, 1);
+               if (err < 0) {
+                       XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
+                       kfree_skb(skb);
+                       return;
+               }
+
+               if (data == 0xff) {
+                       kfree_skb(skb);
+                       return;
+               }
+       }
+
+       /* drop other short messages */
+       if (unlikely(len <= sizeof(nonesp_marker))) {
+               XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
+               kfree_skb(skb);
+               return;
+       }
+
        err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker,
                            sizeof(nonesp_marker));
        if (err < 0) {
+               XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
                kfree_skb(skb);
                return;
        }
 
        /* remove header, leave non-ESP marker/SPI */
        if (!__pskb_pull(skb, rxm->offset + 2)) {
+               XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
                kfree_skb(skb);
                return;
        }
 
        if (pskb_trim(skb, rxm->full_len - 2) != 0) {
+               XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
                kfree_skb(skb);
                return;
        }
@@ -91,7 +120,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
                return err;
 
        len = be16_to_cpu(blen);
-       if (len < 6)
+       if (len < 2)
                return -EINVAL;
 
        return len;
@@ -109,8 +138,11 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
        flags |= nonblock ? MSG_DONTWAIT : 0;
 
        skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
-       if (!skb)
+       if (!skb) {
+               if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
+                       return 0;
                return err;
+       }
 
        copied = len;
        if (copied > skb->len)
@@ -213,7 +245,7 @@ retry:
        return 0;
 }
 
-static int espintcp_push_msgs(struct sock *sk)
+static int espintcp_push_msgs(struct sock *sk, int flags)
 {
        struct espintcp_ctx *ctx = espintcp_getctx(sk);
        struct espintcp_msg *emsg = &ctx->partial;
@@ -227,12 +259,12 @@ static int espintcp_push_msgs(struct sock *sk)
        ctx->tx_running = 1;
 
        if (emsg->skb)
-               err = espintcp_sendskb_locked(sk, emsg, 0);
+               err = espintcp_sendskb_locked(sk, emsg, flags);
        else
-               err = espintcp_sendskmsg_locked(sk, emsg, 0);
+               err = espintcp_sendskmsg_locked(sk, emsg, flags);
        if (err == -EAGAIN) {
                ctx->tx_running = 0;
-               return 0;
+               return flags & MSG_DONTWAIT ? -EAGAIN : 0;
        }
        if (!err)
                memset(emsg, 0, sizeof(*emsg));
@@ -257,7 +289,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
        offset = skb_transport_offset(skb);
        len = skb->len - offset;
 
-       espintcp_push_msgs(sk);
+       espintcp_push_msgs(sk, 0);
 
        if (emsg->len) {
                kfree_skb(skb);
@@ -270,7 +302,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
        emsg->len = len;
        emsg->skb = skb;
 
-       espintcp_push_msgs(sk);
+       espintcp_push_msgs(sk, 0);
 
        return 0;
 }
@@ -287,7 +319,7 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
        char buf[2] = {0};
        int err, end;
 
-       if (msg->msg_flags)
+       if (msg->msg_flags & ~MSG_DONTWAIT)
                return -EOPNOTSUPP;
 
        if (size > MAX_ESPINTCP_MSG)
@@ -298,9 +330,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
        lock_sock(sk);
 
-       err = espintcp_push_msgs(sk);
+       err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
        if (err < 0) {
-               err = -ENOBUFS;
+               if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT))
+                       err = -ENOBUFS;
                goto unlock;
        }
 
@@ -337,10 +370,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
        tcp_rate_check_app_limited(sk);
 
-       err = espintcp_push_msgs(sk);
+       err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
        /* this message could be partially sent, keep it */
-       if (err < 0)
-               goto unlock;
+
        release_sock(sk);
 
        return size;
@@ -374,7 +406,7 @@ static void espintcp_tx_work(struct work_struct *work)
 
        lock_sock(sk);
        if (!ctx->tx_running)
-               espintcp_push_msgs(sk);
+               espintcp_push_msgs(sk, 0);
        release_sock(sk);
 }
 
index 564aa64..19c5e0f 100644 (file)
@@ -39,7 +39,7 @@
 #ifdef CONFIG_XFRM_STATISTICS
 #include <net/snmp.h>
 #endif
-#ifdef CONFIG_INET_ESPINTCP
+#ifdef CONFIG_XFRM_ESPINTCP
 #include <net/espintcp.h>
 #endif
 
@@ -1433,14 +1433,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
        spin_unlock_bh(&pq->hold_queue.lock);
 }
 
-static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
-                                  struct xfrm_policy *pol)
+static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
+                                         struct xfrm_policy *pol)
 {
-       if (policy->mark.v == pol->mark.v &&
-           policy->priority == pol->priority)
-               return true;
-
-       return false;
+       return mark->v == pol->mark.v && mark->m == pol->mark.m;
 }
 
 static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
@@ -1503,7 +1499,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
                if (pol->type == policy->type &&
                    pol->if_id == policy->if_id &&
                    !selector_cmp(&pol->selector, &policy->selector) &&
-                   xfrm_policy_mark_match(policy, pol) &&
+                   xfrm_policy_mark_match(&policy->mark, pol) &&
                    xfrm_sec_ctx_match(pol->security, policy->security) &&
                    !WARN_ON(delpol)) {
                        delpol = pol;
@@ -1538,7 +1534,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
                if (pol->type == policy->type &&
                    pol->if_id == policy->if_id &&
                    !selector_cmp(&pol->selector, &policy->selector) &&
-                   xfrm_policy_mark_match(policy, pol) &&
+                   xfrm_policy_mark_match(&policy->mark, pol) &&
                    xfrm_sec_ctx_match(pol->security, policy->security) &&
                    !WARN_ON(delpol)) {
                        if (excl)
@@ -1610,9 +1606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 EXPORT_SYMBOL(xfrm_policy_insert);
 
 static struct xfrm_policy *
-__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
-                       u8 type, int dir,
-                       struct xfrm_selector *sel,
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
+                       u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
                        struct xfrm_sec_ctx *ctx)
 {
        struct xfrm_policy *pol;
@@ -1623,7 +1618,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
        hlist_for_each_entry(pol, chain, bydst) {
                if (pol->type == type &&
                    pol->if_id == if_id &&
-                   (mark & pol->mark.m) == pol->mark.v &&
+                   xfrm_policy_mark_match(mark, pol) &&
                    !selector_cmp(sel, &pol->selector) &&
                    xfrm_sec_ctx_match(ctx, pol->security))
                        return pol;
@@ -1632,11 +1627,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
        return NULL;
 }
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
-                                         u8 type, int dir,
-                                         struct xfrm_selector *sel,
-                                         struct xfrm_sec_ctx *ctx, int delete,
-                                         int *err)
+struct xfrm_policy *
+xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+                     u8 type, int dir, struct xfrm_selector *sel,
+                     struct xfrm_sec_ctx *ctx, int delete, int *err)
 {
        struct xfrm_pol_inexact_bin *bin = NULL;
        struct xfrm_policy *pol, *ret = NULL;
@@ -1703,9 +1697,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
-                                    u8 type, int dir, u32 id, int delete,
-                                    int *err)
+struct xfrm_policy *
+xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+                u8 type, int dir, u32 id, int delete, int *err)
 {
        struct xfrm_policy *pol, *ret;
        struct hlist_head *chain;
@@ -1720,8 +1714,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
        ret = NULL;
        hlist_for_each_entry(pol, chain, byidx) {
                if (pol->type == type && pol->index == id &&
-                   pol->if_id == if_id &&
-                   (mark & pol->mark.m) == pol->mark.v) {
+                   pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
                        xfrm_pol_hold(pol);
                        if (delete) {
                                *err = security_xfrm_policy_delete(
@@ -4156,7 +4149,7 @@ void __init xfrm_init(void)
        seqcount_init(&xfrm_policy_hash_generation);
        xfrm_input_init();
 
-#ifdef CONFIG_INET_ESPINTCP
+#ifdef CONFIG_XFRM_ESPINTCP
        espintcp_init();
 #endif
 
index e6cfaa6..fbb7d9d 100644 (file)
@@ -1863,7 +1863,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct km_event c;
        int delete;
        struct xfrm_mark m;
-       u32 mark = xfrm_mark_get(attrs, &m);
        u32 if_id = 0;
 
        p = nlmsg_data(nlh);
@@ -1880,8 +1879,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (attrs[XFRMA_IF_ID])
                if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
 
+       xfrm_mark_get(attrs, &m);
+
        if (p->index)
-               xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
+               xp = xfrm_policy_byid(net, &m, if_id, type, p->dir,
+                                     p->index, delete, &err);
        else {
                struct nlattr *rt = attrs[XFRMA_SEC_CTX];
                struct xfrm_sec_ctx *ctx;
@@ -1898,8 +1900,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                        if (err)
                                return err;
                }
-               xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
-                                          ctx, delete, &err);
+               xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
+                                          &p->sel, ctx, delete, &err);
                security_xfrm_policy_free(ctx);
        }
        if (xp == NULL)
@@ -2166,7 +2168,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        u8 type = XFRM_POLICY_TYPE_MAIN;
        int err = -ENOENT;
        struct xfrm_mark m;
-       u32 mark = xfrm_mark_get(attrs, &m);
        u32 if_id = 0;
 
        err = copy_from_user_policy_type(&type, attrs);
@@ -2180,8 +2181,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (attrs[XFRMA_IF_ID])
                if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
 
+       xfrm_mark_get(attrs, &m);
+
        if (p->index)
-               xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
+               xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index,
+                                     0, &err);
        else {
                struct nlattr *rt = attrs[XFRMA_SEC_CTX];
                struct xfrm_sec_ctx *ctx;
@@ -2198,7 +2202,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
                        if (err)
                                return err;
                }
-               xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
+               xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
                                           &p->sel, ctx, 0, &err);
                security_xfrm_policy_free(ctx);
        }
index bd4da1a..dd66206 100644 (file)
@@ -6,7 +6,7 @@ ifdef CONFIG_KCSAN
 ifdef CONFIG_CC_IS_CLANG
 cc-param = -mllvm -$(1)
 else
-cc-param = --param -$(1)
+cc-param = --param $(1)
 endif
 
 # Keep most options here optional, to allow enabling more compilers if absence
index 916b2f7..54f7b7e 100644 (file)
@@ -413,6 +413,28 @@ quiet_cmd_xzkern = XZKERN  $@
 quiet_cmd_xzmisc = XZMISC  $@
       cmd_xzmisc = cat $(real-prereqs) | $(XZ) --check=crc32 --lzma2=dict=1MiB > $@
 
+# ZSTD
+# ---------------------------------------------------------------------------
+# Appends the uncompressed size of the data using size_append. The .zst
+# format has the size information available at the beginning of the file too,
+# but it's in a more complex format and it's good to avoid changing the part
+# of the boot code that reads the uncompressed size.
+#
+# Note that the bytes added by size_append will make the zstd tool think that
+# the file is corrupt. This is expected.
+#
+# zstd uses a maximum window size of 8 MB. zstd22 uses a maximum window size of
+# 128 MB. zstd22 is used for kernel compression because it is decompressed in a
+# single pass, so zstd doesn't need to allocate a window buffer. When streaming
+# decompression is used, like initramfs decompression, zstd22 should likely not
+# be used because it would require zstd to allocate a 128 MB buffer.
+
+quiet_cmd_zstd = ZSTD    $@
+      cmd_zstd = { cat $(real-prereqs) | $(ZSTD) -19; $(size_append); } > $@
+
+quiet_cmd_zstd22 = ZSTD22  $@
+      cmd_zstd22 = { cat $(real-prereqs) | $(ZSTD) -22 --ultra; $(size_append); } > $@
+
 # ASM offsets
 # ---------------------------------------------------------------------------
 
index 3651cbf..f54b6ac 100644 (file)
@@ -124,9 +124,6 @@ existing-targets := $(wildcard $(sort $(targets)))
 
 -include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
 
-PHONY += FORCE
-FORCE:
-
 endif
 
 .PHONY: $(PHONY)
index 4c82060..8032f80 100755 (executable)
@@ -5903,8 +5903,7 @@ sub process {
                my $barriers = qr{
                        mb|
                        rmb|
-                       wmb|
-                       read_barrier_depends
+                       wmb
                }x;
                my $barrier_stems = qr{
                        mb__before_atomic|
@@ -5953,12 +5952,6 @@ sub process {
                        }
                }
 
-# check for smp_read_barrier_depends and read_barrier_depends
-               if (!$file && $line =~ /\b(smp_|)read_barrier_depends\s*\(/) {
-                       WARN("READ_BARRIER_DEPENDS",
-                            "$1read_barrier_depends should only be used in READ_ONCE or DEC Alpha code\n" . $herecurr);
-               }
-
 # check of hardware specific defines
                if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
                        CHK("ARCH_DEFINES",
index 12a67fd..c3d537c 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-*.moc
+/qconf-moc.cc
 *conf-cfg
 
 #
index 426881e..52b59bf 100644 (file)
@@ -181,19 +181,22 @@ $(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/mconf-cfg
 
 # qconf: Used for the xconfig target based on Qt
 hostprogs      += qconf
-qconf-cxxobjs  := qconf.o
+qconf-cxxobjs  := qconf.o qconf-moc.o
 qconf-objs     := images.o $(common-objs)
 
 HOSTLDLIBS_qconf       = $(shell . $(obj)/qconf-cfg && echo $$libs)
 HOSTCXXFLAGS_qconf.o   = $(shell . $(obj)/qconf-cfg && echo $$cflags)
+HOSTCXXFLAGS_qconf-moc.o = $(shell . $(obj)/qconf-cfg && echo $$cflags)
 
-$(obj)/qconf.o: $(obj)/qconf-cfg $(obj)/qconf.moc
+$(obj)/qconf.o: $(obj)/qconf-cfg
 
 quiet_cmd_moc = MOC     $@
-      cmd_moc = $(shell . $(obj)/qconf-cfg && echo $$moc) -i $< -o $@
+      cmd_moc = $(shell . $(obj)/qconf-cfg && echo $$moc) $< -o $@
 
-$(obj)/%.moc: $(src)/%.h $(obj)/qconf-cfg
-       $(call cmd,moc)
+$(obj)/qconf-moc.cc: $(src)/qconf.h $(obj)/qconf-cfg FORCE
+       $(call if_changed,moc)
+
+targets += qconf-moc.cc
 
 # gconf: Used for the gconfig target based on GTK+
 hostprogs      += gconf
index 4a61612..23d1cb0 100644 (file)
@@ -23,7 +23,6 @@
 #include "lkc.h"
 #include "qconf.h"
 
-#include "qconf.moc"
 #include "images.h"
 
 
@@ -308,10 +307,7 @@ ConfigList::ConfigList(ConfigView* p, const char *name)
        setVerticalScrollMode(ScrollPerPixel);
        setHorizontalScrollMode(ScrollPerPixel);
 
-       if (mode == symbolMode)
-               setHeaderLabels(QStringList() << "Item" << "Name" << "N" << "M" << "Y" << "Value");
-       else
-               setHeaderLabels(QStringList() << "Option" << "Name" << "N" << "M" << "Y" << "Value");
+       setHeaderLabels(QStringList() << "Option" << "Name" << "N" << "M" << "Y" << "Value");
 
        connect(this, SIGNAL(itemSelectionChanged(void)),
                SLOT(updateSelection(void)));
@@ -392,11 +388,6 @@ void ConfigList::updateSelection(void)
        struct menu *menu;
        enum prop_type type;
 
-       if (mode == symbolMode)
-               setHeaderLabels(QStringList() << "Item" << "Name" << "N" << "M" << "Y" << "Value");
-       else
-               setHeaderLabels(QStringList() << "Option" << "Name" << "N" << "M" << "Y" << "Value");
-
        if (selectedItems().count() == 0)
                return;
 
@@ -437,14 +428,13 @@ void ConfigList::updateList(ConfigItem* item)
        if (rootEntry != &rootmenu && (mode == singleMode ||
            (mode == symbolMode && rootEntry->parent != &rootmenu))) {
                item = (ConfigItem *)topLevelItem(0);
-               if (!item && mode != symbolMode) {
+               if (!item)
                        item = new ConfigItem(this, 0, true);
-                       last = item;
-               }
+               last = item;
        }
        if ((mode == singleMode || (mode == symbolMode && !(rootEntry->flags & MENU_ROOT))) &&
            rootEntry->sym && rootEntry->prompt) {
-               item = last ? last->nextSibling() : firstChild();
+               item = last ? last->nextSibling() : nullptr;
                if (!item)
                        item = new ConfigItem(this, last, rootEntry, true);
                else
@@ -1239,7 +1229,7 @@ void ConfigInfoView::clicked(const QUrl &url)
 
        if (count < 1) {
                qInfo() << "Clicked link is empty";
-               delete data;
+               delete[] data;
                return;
        }
 
@@ -1252,7 +1242,7 @@ void ConfigInfoView::clicked(const QUrl &url)
        result = sym_re_search(data);
        if (!result) {
                qInfo() << "Clicked symbol is invalid:" << data;
-               delete data;
+               delete[] data;
                return;
        }
 
@@ -1735,7 +1725,6 @@ void ConfigMainWindow::listFocusChanged(void)
 
 void ConfigMainWindow::goBack(void)
 {
-qInfo() << __FUNCTION__;
        if (configList->rootEntry == &rootmenu)
                return;
 
index fb9e972..5eeab4a 100644 (file)
@@ -92,10 +92,6 @@ public:
        {
                return this;
        }
-       ConfigItem* firstChild() const
-       {
-               return (ConfigItem *)children().first();
-       }
        void addColumn(colIdx idx)
        {
                showColumn(idx);
index 45f2ab2..69341b3 100644 (file)
@@ -144,6 +144,7 @@ char *get_line(char **stringp)
        if (!orig || *orig == '\0')
                return NULL;
 
+       /* don't use strsep here, it is not available everywhere */
        next = strchr(orig, '\n');
        if (next)
                *next++ = '\0';
index 7225107..e59022b 100644 (file)
@@ -434,6 +434,11 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp)
        return 1;
 }
 
+static int arm64_is_fake_mcount(Elf64_Rel const *rp)
+{
+       return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26;
+}
+
 /* 64-bit EM_MIPS has weird ELF64_Rela.r_info.
  * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
  * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40]
@@ -547,6 +552,7 @@ static int do_file(char const *const fname)
                make_nop = make_nop_arm64;
                rel_type_nop = R_AARCH64_NONE;
                ideal_nop = ideal_nop4_arm64;
+               is_fake_mcount64 = arm64_is_fake_mcount;
                break;
        case EM_IA_64:  reltype = R_IA64_IMM64; break;
        case EM_MIPS:   /* reltype: e_class    */ break;
index ec6b5e8..0ef3abf 100644 (file)
@@ -255,6 +255,45 @@ static void x86_sort_relative_table(char *extab_image, int image_size)
        }
 }
 
+static void s390_sort_relative_table(char *extab_image, int image_size)
+{
+       int i;
+
+       for (i = 0; i < image_size; i += 16) {
+               char *loc = extab_image + i;
+               uint64_t handler;
+
+               w(r((uint32_t *)loc) + i, (uint32_t *)loc);
+               w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4));
+               /*
+                * 0 is a special self-relative handler value, which means that
+                * handler should be ignored. It is safe, because it means that
+                * handler field points to itself, which should never happen.
+                * When creating extable-relative values, keep it as 0, since
+                * this should never occur either: it would mean that handler
+                * field points to the first extable entry.
+                */
+               handler = r8((uint64_t *)(loc + 8));
+               if (handler)
+                       handler += i + 8;
+               w8(handler, (uint64_t *)(loc + 8));
+       }
+
+       qsort(extab_image, image_size / 16, 16, compare_relative_table);
+
+       for (i = 0; i < image_size; i += 16) {
+               char *loc = extab_image + i;
+               uint64_t handler;
+
+               w(r((uint32_t *)loc) - i, (uint32_t *)loc);
+               w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4));
+               handler = r8((uint64_t *)(loc + 8));
+               if (handler)
+                       handler -= i + 8;
+               w8(handler, (uint64_t *)(loc + 8));
+       }
+}
+
 static int do_file(char const *const fname, void *addr)
 {
        int rc = -1;
@@ -297,6 +336,8 @@ static int do_file(char const *const fname, void *addr)
                custom_sort = x86_sort_relative_table;
                break;
        case EM_S390:
+               custom_sort = s390_sort_relative_table;
+               break;
        case EM_AARCH64:
        case EM_PARISC:
        case EM_PPC:
index ee5cb94..670a1ae 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/fs.h>
 #include <linux/lsm_hooks.h>
 #include <linux/mount.h>
+#include <linux/blkdev.h>
 #include <linux/path.h>
 #include <linux/sched.h>       /* current */
 #include <linux/string_helpers.h>
index 7e3ae45..803978d 100644 (file)
@@ -2935,6 +2935,10 @@ static int hda_codec_runtime_suspend(struct device *dev)
        struct hda_codec *codec = dev_to_hda_codec(dev);
        unsigned int state;
 
+       /* Nothing to do if card registration fails and the component driver never probes */
+       if (!codec->card)
+               return 0;
+
        cancel_delayed_work_sync(&codec->jackpoll_work);
        state = hda_call_codec_suspend(codec);
        if (codec->link_down_at_suspend ||
@@ -2949,6 +2953,10 @@ static int hda_codec_runtime_resume(struct device *dev)
 {
        struct hda_codec *codec = dev_to_hda_codec(dev);
 
+       /* Nothing to do if card registration fails and the component driver never probes */
+       if (!codec->card)
+               return 0;
+
        codec_display_power(codec, true);
        snd_hdac_codec_link_up(&codec->core);
        hda_call_codec_resume(codec);
index 82e2644..a356fb0 100644 (file)
@@ -41,7 +41,7 @@
 /* 24 unused */
 #define AZX_DCAPS_COUNT_LPIB_DELAY  (1 << 25)  /* Take LPIB as delay */
 #define AZX_DCAPS_PM_RUNTIME   (1 << 26)       /* runtime PM support */
-/* 27 unused */
+#define AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP (1 << 27) /* Workaround for spurious wakeups after suspend */
 #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28)  /* CORBRP clears itself after reset */
 #define AZX_DCAPS_NO_MSI64      (1 << 29)      /* Stick to 32-bit MSIs */
 #define AZX_DCAPS_SEPARATE_STREAM_TAG  (1 << 30) /* capture and playback use separate stream tag */
index 3565e2a..3fbba2e 100644 (file)
@@ -298,7 +298,8 @@ enum {
 /* PCH for HSW/BDW; with runtime PM */
 /* no i915 binding for this as HSW/BDW has another controller for HDMI */
 #define AZX_DCAPS_INTEL_PCH \
-       (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME)
+       (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\
+        AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP)
 
 /* HSW HDMI */
 #define AZX_DCAPS_INTEL_HASWELL \
@@ -1028,7 +1029,14 @@ static int azx_suspend(struct device *dev)
        chip = card->private_data;
        bus = azx_bus(chip);
        snd_power_change_state(card, SNDRV_CTL_POWER_D3hot);
-       pm_runtime_force_suspend(dev);
+       /* An ugly workaround: direct call of __azx_runtime_suspend() and
+        * __azx_runtime_resume() for old Intel platforms that suffer from
+        * spurious wakeups after S3 suspend
+        */
+       if (chip->driver_caps & AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP)
+               __azx_runtime_suspend(chip);
+       else
+               pm_runtime_force_suspend(dev);
        if (bus->irq >= 0) {
                free_irq(bus->irq, chip);
                bus->irq = -1;
@@ -1057,7 +1065,10 @@ static int azx_resume(struct device *dev)
        if (azx_acquire_irq(chip, 1) < 0)
                return -EIO;
 
-       pm_runtime_force_resume(dev);
+       if (chip->driver_caps & AZX_DCAPS_SUSPEND_SPURIOUS_WAKEUP)
+               __azx_runtime_resume(chip, false);
+       else
+               pm_runtime_force_resume(dev);
        snd_power_change_state(card, SNDRV_CTL_POWER_D0);
 
        trace_azx_resume(chip);
index 41eaa89..cd46247 100644 (file)
@@ -2440,6 +2440,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp,
        mutex_lock(&spec->bind_lock);
        spec->use_acomp_notifier = use_acomp;
        spec->codec->relaxed_resume = use_acomp;
+       spec->codec->bus->keep_power = 0;
        /* reprogram each jack detection logic depending on the notifier */
        for (i = 0; i < spec->num_pins; i++)
                reprogram_jack_detect(spec->codec,
@@ -2534,7 +2535,6 @@ static void generic_acomp_init(struct hda_codec *codec,
        if (!snd_hdac_acomp_init(&codec->bus->core, &spec->drm_audio_ops,
                                 match_bound_vga, 0)) {
                spec->acomp_registered = true;
-               codec->bus->keep_power = 0;
        }
 }
 
index 1b2d8e5..29f5878 100644 (file)
@@ -5975,6 +5975,16 @@ static void alc_fixup_disable_mic_vref(struct hda_codec *codec,
                snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ);
 }
 
+static void  alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec,
+                             const struct hda_fixup *fix, int action)
+{
+       if (action != HDA_FIXUP_ACT_INIT)
+               return;
+
+       msleep(100);
+       alc_write_coef_idx(codec, 0x65, 0x0);
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -6152,8 +6162,10 @@ enum {
        ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS,
        ALC269VC_FIXUP_ACER_HEADSET_MIC,
        ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE,
-       ALC289_FIXUP_ASUS_G401,
+       ALC289_FIXUP_ASUS_GA401,
+       ALC289_FIXUP_ASUS_GA502,
        ALC256_FIXUP_ACER_MIC_NO_PRESENCE,
+       ALC285_FIXUP_HP_GPIO_AMP_INIT,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -7363,7 +7375,14 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MIC
        },
-       [ALC289_FIXUP_ASUS_G401] = {
+       [ALC289_FIXUP_ASUS_GA401] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11020 }, /* headset mic with jack detect */
+                       { }
+               },
+       },
+       [ALC289_FIXUP_ASUS_GA502] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
                        { 0x19, 0x03a11020 }, /* headset mic with jack detect */
@@ -7379,6 +7398,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
        },
+       [ALC285_FIXUP_HP_GPIO_AMP_INIT] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc285_fixup_hp_gpio_amp_init,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_HP_GPIO_LED
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7529,7 +7554,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
        SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
-       SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
@@ -7561,7 +7586,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
-       SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_G401),
+       SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
+       SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
        SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
@@ -7581,7 +7607,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
-       SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC225_FIXUP_HEADSET_JACK),
+       SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
        SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
index 8d45c62..ab009c7 100644 (file)
@@ -103,28 +103,6 @@ error:
        return ret;
 }
 
-static int calculate_sha256(struct cros_ec_codec_priv *priv,
-                           uint8_t *buf, uint32_t size, uint8_t *digest)
-{
-       struct sha256_state sctx;
-
-       sha256_init(&sctx);
-       sha256_update(&sctx, buf, size);
-       sha256_final(&sctx, digest);
-
-#ifdef DEBUG
-       {
-               char digest_str[65];
-
-               bin2hex(digest_str, digest, 32);
-               digest_str[64] = 0;
-               dev_dbg(priv->dev, "hash=%s\n", digest_str);
-       }
-#endif
-
-       return 0;
-}
-
 static int dmic_get_gain(struct snd_kcontrol *kcontrol,
                         struct snd_ctl_elem_value *ucontrol)
 {
@@ -782,9 +760,8 @@ static int wov_hotword_model_put(struct snd_kcontrol *kcontrol,
        if (IS_ERR(buf))
                return PTR_ERR(buf);
 
-       ret = calculate_sha256(priv, buf, size, digest);
-       if (ret)
-               goto leave;
+       sha256(buf, size, digest);
+       dev_dbg(priv->dev, "hash=%*phN\n", SHA256_DIGEST_SIZE, digest);
 
        p.cmd = EC_CODEC_WOV_GET_LANG;
        ret = send_ec_host_command(priv->ec_device, EC_CMD_EC_CODEC_WOV,
index 40b7cd1..a69d9e7 100644 (file)
@@ -367,6 +367,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
                ifnum = 0;
                goto add_sync_ep_from_ifnum;
        case USB_ID(0x07fd, 0x0008): /* MOTU M Series */
+       case USB_ID(0x31e9, 0x0001): /* Solid State Logic SSL2 */
        case USB_ID(0x31e9, 0x0002): /* Solid State Logic SSL2+ */
        case USB_ID(0x0d9a, 0x00df): /* RTX6001 */
                ep = 0x81;
index 6df1850..8a69258 100644 (file)
@@ -9,7 +9,8 @@ MAKE = make
 INSTALL ?= install
 
 CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi \
+         -I$(srctree)/tools/include
 
 # This will work when bpf is built in tools env. where srctree
 # isn't set and when invoked from selftests build, where srctree
index 3c21de8..f4699f9 100644 (file)
@@ -173,7 +173,7 @@ class IocgStat:
         self.usages = []
         self.usage = 0
         for i in range(NR_USAGE_SLOTS):
-            usage = iocg.usages[(usage_idx + i) % NR_USAGE_SLOTS].value_()
+            usage = iocg.usages[(usage_idx + 1 + i) % NR_USAGE_SLOTS].value_()
             upct = usage * 100 / HWEIGHT_WHOLE
             self.usages.append(upct)
             self.usage = max(self.usage, upct)
index 67e01bb..501262a 100644 (file)
@@ -2,9 +2,9 @@
 #ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
 #define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
 
-# define lockdep_hardirq_context(p)    0
+# define lockdep_hardirq_context(    0
 # define lockdep_softirq_context(p)    0
-# define lockdep_hardirqs_enabled(p)   0
+# define lockdep_hardirqs_enabled(   0
 # define lockdep_softirqs_enabled(p)   0
 # define lockdep_hardirq_enter()       do { } while (0)
 # define lockdep_hardirq_exit()                do { } while (0)
diff --git a/tools/include/uapi/linux/filter.h b/tools/include/uapi/linux/filter.h
new file mode 100644 (file)
index 0000000..eaef459
--- /dev/null
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Linux Socket Filter Data Structures
+ */
+
+#ifndef __LINUX_FILTER_H__
+#define __LINUX_FILTER_H__
+
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/*
+ * Current version of the filter code architecture.
+ */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ *     Try and keep these values and structures similar to BSD, especially
+ *     the BPF code definitions which need to match so you can share filters
+ */
+struct sock_filter {   /* Filter block */
+       __u16   code;   /* Actual filter code */
+       __u8    jt;     /* Jump true */
+       __u8    jf;     /* Jump false */
+       __u32   k;      /* Generic multiuse field */
+};
+
+struct sock_fprog {    /* Required for SO_ATTACH_FILTER. */
+       unsigned short          len;    /* Number of filter blocks */
+       struct sock_filter *filter;
+};
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code)  ((code) & 0x18)
+#define         BPF_A           0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define         BPF_TAX         0x00
+#define         BPF_TXA         0x80
+
+/*
+ * Macros for filter block array initializers.
+ */
+#ifndef BPF_STMT
+#define BPF_STMT(code, k) { (unsigned short)(code), 0, 0, k }
+#endif
+#ifndef BPF_JUMP
+#define BPF_JUMP(code, k, jt, jf) { (unsigned short)(code), jt, jf, k }
+#endif
+
+/*
+ * Number of scratch memory words for: BPF_ST and BPF_STX
+ */
+#define BPF_MEMWORDS 16
+
+/* RATIONALE. Negative offsets are invalid in BPF.
+   We use them to reference ancillary data.
+   Unlike introduction new instructions, it does not break
+   existing compilers/optimizers.
+ */
+#define SKF_AD_OFF    (-0x1000)
+#define SKF_AD_PROTOCOL 0
+#define SKF_AD_PKTTYPE         4
+#define SKF_AD_IFINDEX         8
+#define SKF_AD_NLATTR  12
+#define SKF_AD_NLATTR_NEST     16
+#define SKF_AD_MARK    20
+#define SKF_AD_QUEUE   24
+#define SKF_AD_HATYPE  28
+#define SKF_AD_RXHASH  32
+#define SKF_AD_CPU     36
+#define SKF_AD_ALU_XOR_X       40
+#define SKF_AD_VLAN_TAG        44
+#define SKF_AD_VLAN_TAG_PRESENT 48
+#define SKF_AD_PAY_OFFSET      52
+#define SKF_AD_RANDOM  56
+#define SKF_AD_VLAN_TPID       60
+#define SKF_AD_MAX     64
+
+#define SKF_NET_OFF    (-0x100000)
+#define SKF_LL_OFF     (-0x200000)
+
+#define BPF_NET_OFF    SKF_NET_OFF
+#define BPF_LL_OFF     SKF_LL_OFF
+
+#endif /* __LINUX_FILTER_H__ */
index 7b2d6fc..21a1edd 100644 (file)
@@ -532,9 +532,10 @@ struct perf_event_mmap_page {
                                cap_bit0_is_deprecated  : 1, /* Always 1, signals that bit 0 is zero */
 
                                cap_user_rdpmc          : 1, /* The RDPMC instruction can be used to read counts */
-                               cap_user_time           : 1, /* The time_* fields are used */
+                               cap_user_time           : 1, /* The time_{shift,mult,offset} fields are used */
                                cap_user_time_zero      : 1, /* The time_zero field is used */
-                               cap_____res             : 59;
+                               cap_user_time_short     : 1, /* the time_{cycle,mask} fields are used */
+                               cap_____res             : 58;
                };
        };
 
@@ -593,13 +594,29 @@ struct perf_event_mmap_page {
         *               ((rem * time_mult) >> time_shift);
         */
        __u64   time_zero;
+
        __u32   size;                   /* Header size up to __reserved[] fields. */
+       __u32   __reserved_1;
+
+       /*
+        * If cap_usr_time_short, the hardware clock is less than 64bit wide
+        * and we must compute the 'cyc' value, as used by cap_usr_time, as:
+        *
+        *   cyc = time_cycles + ((cyc - time_cycles) & time_mask)
+        *
+        * NOTE: this form is explicitly chosen such that cap_usr_time_short
+        *       is a correction on top of cap_usr_time, and code that doesn't
+        *       know about cap_usr_time_short still works under the assumption
+        *       the counter doesn't wrap.
+        */
+       __u64   time_cycles;
+       __u64   time_mask;
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u8    __reserved[118*8+4];    /* align to 1k. */
+       __u8    __reserved[116*8];      /* align to 1k. */
 
        /*
         * Control data for the mmap() data buffer.
index 5f305c8..28a837b 100644 (file)
@@ -10,6 +10,7 @@ extern "C" {
 #include <string.h>
 #include "../../include/uapi/linux/io_uring.h"
 #include <inttypes.h>
+#include <linux/swab.h>
 #include "barrier.h"
 
 /*
@@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
 }
 
 static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
-                                         short poll_mask)
+                                         unsigned poll_mask)
 {
        memset(sqe, 0, sizeof(*sqe));
        sqe->opcode = IORING_OP_POLL_ADD;
        sqe->fd = fd;
+#if __BYTE_ORDER == __BIG_ENDIAN
+       poll_mask = __swahw32(poll_mask);
+#endif
        sqe->poll_events = poll_mask;
 }
 
index 5b36c58..ba4f338 100644 (file)
@@ -2861,6 +2861,7 @@ process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg,
        if (read_expected(TEP_EVENT_DELIM, ")") < 0)
                goto out_err;
 
+       free_token(token);
        type = read_token(&token);
        *tok = token;
 
index 349bb81..680d883 100644 (file)
@@ -197,7 +197,7 @@ define do_generate_dynamic_list_file
        xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
        if [ "$$symbol_type" = "U W" ];then                             \
                (echo '{';                                              \
-               $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
+               $(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\
                echo '};';                                              \
                ) > $2;                                                 \
        else                                                            \
index e91a2eb..f9d610d 100644 (file)
@@ -1122,12 +1122,10 @@ maintain at least the appearance of FIFO order.
 In practice, this difficulty is solved by inserting a special fence
 between P1's two loads when the kernel is compiled for the Alpha
 architecture.  In fact, as of version 4.15, the kernel automatically
-adds this fence (called smp_read_barrier_depends() and defined as
-nothing at all on non-Alpha builds) after every READ_ONCE() and atomic
-load.  The effect of the fence is to cause the CPU not to execute any
-po-later instructions until after the local cache has finished
-processing all the stores it has already received.  Thus, if the code
-was changed to:
+adds this fence after every READ_ONCE() and atomic load on Alpha.  The
+effect of the fence is to cause the CPU not to execute any po-later
+instructions until after the local cache has finished processing all
+the stores it has already received.  Thus, if the code was changed to:
 
        P1()
        {
@@ -1146,14 +1144,14 @@ READ_ONCE() or another synchronization primitive rather than accessed
 directly.
 
 The LKMM requires that smp_rmb(), acquire fences, and strong fences
-share this property with smp_read_barrier_depends(): They do not allow
-the CPU to execute any po-later instructions (or po-later loads in the
-case of smp_rmb()) until all outstanding stores have been processed by
-the local cache.  In the case of a strong fence, the CPU first has to
-wait for all of its po-earlier stores to propagate to every other CPU
-in the system; then it has to wait for the local cache to process all
-the stores received as of that time -- not just the stores received
-when the strong fence began.
+share this property: They do not allow the CPU to execute any po-later
+instructions (or po-later loads in the case of smp_rmb()) until all
+outstanding stores have been processed by the local cache.  In the
+case of a strong fence, the CPU first has to wait for all of its
+po-earlier stores to propagate to every other CPU in the system; then
+it has to wait for the local cache to process all the stores received
+as of that time -- not just the stores received when the strong fence
+began.
 
 And of course, none of this matters for any architecture other than
 Alpha.
@@ -1987,28 +1985,36 @@ outcome undefined.
 
 In technical terms, the compiler is allowed to assume that when the
 program executes, there will not be any data races.  A "data race"
-occurs when two conflicting memory accesses execute concurrently;
-two memory accesses "conflict" if:
+occurs when there are two memory accesses such that:
 
-       they access the same location,
+1.     they access the same location,
 
-       they occur on different CPUs (or in different threads on the
-       same CPU),
+2.     at least one of them is a store,
 
-       at least one of them is a plain access,
+3.     at least one of them is plain,
 
-       and at least one of them is a store.
+4.     they occur on different CPUs (or in different threads on the
+       same CPU), and
 
-The LKMM tries to determine whether a program contains two conflicting
-accesses which may execute concurrently; if it does then the LKMM says
-there is a potential data race and makes no predictions about the
-program's outcome.
+5.     they execute concurrently.
 
-Determining whether two accesses conflict is easy; you can see that
-all the concepts involved in the definition above are already part of
-the memory model.  The hard part is telling whether they may execute
-concurrently.  The LKMM takes a conservative attitude, assuming that
-accesses may be concurrent unless it can prove they cannot.
+In the literature, two accesses are said to "conflict" if they satisfy
+1 and 2 above.  We'll go a little farther and say that two accesses
+are "race candidates" if they satisfy 1 - 4.  Thus, whether or not two
+race candidates actually do race in a given execution depends on
+whether they are concurrent.
+
+The LKMM tries to determine whether a program contains race candidates
+which may execute concurrently; if it does then the LKMM says there is
+a potential data race and makes no predictions about the program's
+outcome.
+
+Determining whether two accesses are race candidates is easy; you can
+see that all the concepts involved in the definition above are already
+part of the memory model.  The hard part is telling whether they may
+execute concurrently.  The LKMM takes a conservative attitude,
+assuming that accesses may be concurrent unless it can prove they
+are not.
 
 If two memory accesses aren't concurrent then one must execute before
 the other.  Therefore the LKMM decides two accesses aren't concurrent
@@ -2171,8 +2177,8 @@ again, now using plain accesses for buf:
        }
 
 This program does not contain a data race.  Although the U and V
-accesses conflict, the LKMM can prove they are not concurrent as
-follows:
+accesses are race candidates, the LKMM can prove they are not
+concurrent as follows:
 
        The smp_wmb() fence in P0 is both a compiler barrier and a
        cumul-fence.  It guarantees that no matter what hash of
@@ -2326,12 +2332,11 @@ could now perform the load of x before the load of ptr (there might be
 a control dependency but no address dependency at the machine level).
 
 Finally, it turns out there is a situation in which a plain write does
-not need to be w-post-bounded: when it is separated from the
-conflicting access by a fence.  At first glance this may seem
-impossible.  After all, to be conflicting the second access has to be
-on a different CPU from the first, and fences don't link events on
-different CPUs.  Well, normal fences don't -- but rcu-fence can!
-Here's an example:
+not need to be w-post-bounded: when it is separated from the other
+race-candidate access by a fence.  At first glance this may seem
+impossible.  After all, to be race candidates the two accesses must
+be on different CPUs, and fences don't link events on different CPUs.
+Well, normal fences don't -- but rcu-fence can!  Here's an example:
 
        int x, y;
 
@@ -2367,7 +2372,7 @@ concurrent and there is no race, even though P1's plain store to y
 isn't w-post-bounded by any marked accesses.
 
 Putting all this material together yields the following picture.  For
-two conflicting stores W and W', where W ->co W', the LKMM says the
+race-candidate stores W and W', where W ->co W', the LKMM says the
 stores don't race if W can be linked to W' by a
 
        w-post-bounded ; vis ; w-pre-bounded
@@ -2380,8 +2385,8 @@ sequence, and if W' is plain then they also have to be linked by a
 
        w-post-bounded ; vis ; r-pre-bounded
 
-sequence.  For a conflicting load R and store W, the LKMM says the two
-accesses don't race if R can be linked to W by an
+sequence.  For race-candidate load R and store W, the LKMM says the
+two accesses don't race if R can be linked to W by an
 
        r-post-bounded ; xb* ; w-pre-bounded
 
@@ -2413,20 +2418,20 @@ is, the rules governing the memory subsystem's choice of a store to
 satisfy a load request and its determination of where a store will
 fall in the coherence order):
 
-       If R and W conflict and it is possible to link R to W by one
-       of the xb* sequences listed above, then W ->rfe R is not
-       allowed (i.e., a load cannot read from a store that it
+       If R and W are race candidates and it is possible to link R to
+       W by one of the xb* sequences listed above, then W ->rfe R is
+       not allowed (i.e., a load cannot read from a store that it
        executes before, even if one or both is plain).
 
-       If W and R conflict and it is possible to link W to R by one
-       of the vis sequences listed above, then R ->fre W is not
-       allowed (i.e., if a store is visible to a load then the load
-       must read from that store or one coherence-after it).
+       If W and R are race candidates and it is possible to link W to
+       R by one of the vis sequences listed above, then R ->fre W is
+       not allowed (i.e., if a store is visible to a load then the
+       load must read from that store or one coherence-after it).
 
-       If W and W' conflict and it is possible to link W to W' by one
-       of the vis sequences listed above, then W' ->co W is not
-       allowed (i.e., if one store is visible to a second then the
-       second must come after the first in the coherence order).
+       If W and W' are race candidates and it is possible to link W
+       to W' by one of the vis sequences listed above, then W' ->co W
+       is not allowed (i.e., if one store is visible to a second then
+       the second must come after the first in the coherence order).
 
 This is the extent to which the LKMM deals with plain accesses.
 Perhaps it could say more (for example, plain accesses might
index 7fe8d7a..63c4adf 100644 (file)
@@ -126,7 +126,7 @@ However, it is not necessarily the case that accesses ordered by
 locking will be seen as ordered by CPUs not holding that lock.
 Consider this example:
 
-       /* See Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus. */
+       /* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */
        void CPU0(void)
        {
                spin_lock(&mylock);
index b177f3e..ecbbaa5 100644 (file)
@@ -73,6 +73,18 @@ o    Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
 Linux-kernel memory model
 =========================
 
+o      Jade Alglave, Will Deacon, Boqun Feng, David Howells, Daniel
+       Lustig, Luc Maranget, Paul E. McKenney, Andrea Parri, Nicholas
+       Piggin, Alan Stern, Akira Yokosawa, and Peter Zijlstra.
+       2019. "Calibrating your fear of big bad optimizing compilers"
+       Linux Weekly News.  https://lwn.net/Articles/799218/
+
+o      Jade Alglave, Will Deacon, Boqun Feng, David Howells, Daniel
+       Lustig, Luc Maranget, Paul E. McKenney, Andrea Parri, Nicholas
+       Piggin, Alan Stern, Akira Yokosawa, and Peter Zijlstra.
+       2019. "Who's afraid of a big bad optimizing compiler?"
+       Linux Weekly News.  https://lwn.net/Articles/793253/
+
 o      Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
        Alan Stern.  2018. "Frightening small children and disconcerting
        grown-ups: Concurrency in the Linux kernel". In Proceedings of
@@ -88,6 +100,11 @@ o   Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
        Alan Stern.  2017.  "A formal kernel memory-ordering model (part 2)"
        Linux Weekly News.  https://lwn.net/Articles/720550/
 
+o      Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+       Alan Stern.  2017-2019.  "A Formal Model of Linux-Kernel Memory
+       Ordering" (backup material for the LWN articles)
+       https://mirrors.edge.kernel.org/pub/linux/kernel/people/paulmck/LWNLinuxMM/
+
 
 Memory-model tooling
 ====================
@@ -110,5 +127,5 @@ Memory-model comparisons
 ========================
 
 o      Paul E. McKenney, Ulrich Weigand, Andrea Parri, and Boqun
-       Feng. 2016. "Linux-Kernel Memory Model". (6 June 2016).
-       http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0124r2.html.
+       Feng. 2018. "Linux-Kernel Memory Model". (27 September 2018).
+       http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0124r6.html.
index fc07b52..ecb7385 100644 (file)
@@ -28,8 +28,34 @@ downloaded separately:
 See "herdtools7/INSTALL.md" for installation instructions.
 
 Note that although these tools usually provide backwards compatibility,
-this is not absolutely guaranteed.  Therefore, if a later version does
-not work, please try using the exact version called out above.
+this is not absolutely guaranteed.
+
+For example, a future version of herd7 might not work with the model
+in this release.  A compatible model will likely be made available in
+a later release of Linux kernel.
+
+If you absolutely need to run the model in this particular release,
+please try using the exact version called out above.
+
+klitmus7 is independent of the model provided here.  It has its own
+dependency on a target kernel release where converted code is built
+and executed.  Any change in kernel APIs essential to klitmus7 will
+necessitate an upgrade of klitmus7.
+
+If you find any compatibility issues in klitmus7, please inform the
+memory model maintainers.
+
+klitmus7 Compatibility Table
+----------------------------
+
+       ============  ==========
+       target Linux  herdtools7
+       ------------  ----------
+            -- 4.18  7.48 --
+       4.15 -- 4.19  7.49 --
+       4.20 -- 5.5   7.54 --
+       5.6  --       7.56 --
+       ============  ==========
 
 
 ==================
@@ -207,11 +233,15 @@ The Linux-kernel memory model (LKMM) has the following limitations:
                case as a store release.
 
        b.      The "unless" RMW operations are not currently modeled:
-               atomic_long_add_unless(), atomic_add_unless(),
-               atomic_inc_unless_negative(), and
-               atomic_dec_unless_positive().  These can be emulated
+               atomic_long_add_unless(), atomic_inc_unless_negative(),
+               and atomic_dec_unless_positive().  These can be emulated
                in litmus tests, for example, by using atomic_cmpxchg().
 
+               One exception of this limitation is atomic_add_unless(),
+               which is provided directly by herd7 (so no corresponding
+               definition in linux-kernel.def).  atomic_add_unless() is
+               modeled by herd7 therefore it can be used in litmus tests.
+
        c.      The call_rcu() function is not modeled.  It can be
                emulated in litmus tests by adding another process that
                invokes synchronize_rcu() and the body of the callback
index 3c59677..2e2ce08 100644 (file)
@@ -82,7 +82,7 @@ bool arch_callee_saved_reg(unsigned char reg);
 
 unsigned long arch_jump_destination(struct instruction *insn);
 
-unsigned long arch_dest_rela_offset(int addend);
+unsigned long arch_dest_reloc_offset(int addend);
 
 const char *arch_nop_insn(int len);
 
index 9872195..1967370 100644 (file)
@@ -67,7 +67,7 @@ bool arch_callee_saved_reg(unsigned char reg)
        }
 }
 
-unsigned long arch_dest_rela_offset(int addend)
+unsigned long arch_dest_reloc_offset(int addend)
 {
        return addend + 4;
 }
index 5e0d70a..e034a8f 100644 (file)
@@ -353,7 +353,7 @@ static struct instruction *find_last_insn(struct objtool_file *file,
 static int add_dead_ends(struct objtool_file *file)
 {
        struct section *sec;
-       struct rela *rela;
+       struct reloc *reloc;
        struct instruction *insn;
 
        /*
@@ -371,24 +371,24 @@ static int add_dead_ends(struct objtool_file *file)
        if (!sec)
                goto reachable;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s", sec->name);
                        return -1;
                }
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (insn)
                        insn = list_prev_entry(insn, list);
-               else if (rela->addend == rela->sym->sec->len) {
-                       insn = find_last_insn(file, rela->sym->sec);
+               else if (reloc->addend == reloc->sym->sec->len) {
+                       insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
                                WARN("can't find unreachable insn at %s+0x%x",
-                                    rela->sym->sec->name, rela->addend);
+                                    reloc->sym->sec->name, reloc->addend);
                                return -1;
                        }
                } else {
                        WARN("can't find unreachable insn at %s+0x%x",
-                            rela->sym->sec->name, rela->addend);
+                            reloc->sym->sec->name, reloc->addend);
                        return -1;
                }
 
@@ -406,24 +406,24 @@ reachable:
        if (!sec)
                return 0;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s", sec->name);
                        return -1;
                }
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (insn)
                        insn = list_prev_entry(insn, list);
-               else if (rela->addend == rela->sym->sec->len) {
-                       insn = find_last_insn(file, rela->sym->sec);
+               else if (reloc->addend == reloc->sym->sec->len) {
+                       insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
                                WARN("can't find reachable insn at %s+0x%x",
-                                    rela->sym->sec->name, rela->addend);
+                                    reloc->sym->sec->name, reloc->addend);
                                return -1;
                        }
                } else {
                        WARN("can't find reachable insn at %s+0x%x",
-                            rela->sym->sec->name, rela->addend);
+                            reloc->sym->sec->name, reloc->addend);
                        return -1;
                }
 
@@ -441,26 +441,26 @@ static void add_ignores(struct objtool_file *file)
        struct instruction *insn;
        struct section *sec;
        struct symbol *func;
-       struct rela *rela;
+       struct reloc *reloc;
 
        sec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
        if (!sec)
                return;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               switch (rela->sym->type) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               switch (reloc->sym->type) {
                case STT_FUNC:
-                       func = rela->sym;
+                       func = reloc->sym;
                        break;
 
                case STT_SECTION:
-                       func = find_func_by_offset(rela->sym->sec, rela->addend);
+                       func = find_func_by_offset(reloc->sym->sec, reloc->addend);
                        if (!func)
                                continue;
                        break;
 
                default:
-                       WARN("unexpected relocation symbol type in %s: %d", sec->name, rela->sym->type);
+                       WARN("unexpected relocation symbol type in %s: %d", sec->name, reloc->sym->type);
                        continue;
                }
 
@@ -580,20 +580,20 @@ static void add_uaccess_safe(struct objtool_file *file)
 static int add_ignore_alternatives(struct objtool_file *file)
 {
        struct section *sec;
-       struct rela *rela;
+       struct reloc *reloc;
        struct instruction *insn;
 
        sec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
        if (!sec)
                return 0;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s", sec->name);
                        return -1;
                }
 
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!insn) {
                        WARN("bad .discard.ignore_alts entry");
                        return -1;
@@ -611,7 +611,7 @@ static int add_ignore_alternatives(struct objtool_file *file)
 static int add_jump_destinations(struct objtool_file *file)
 {
        struct instruction *insn;
-       struct rela *rela;
+       struct reloc *reloc;
        struct section *dest_sec;
        unsigned long dest_off;
 
@@ -622,19 +622,19 @@ static int add_jump_destinations(struct objtool_file *file)
                if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
                        continue;
 
-               rela = find_rela_by_dest_range(file->elf, insn->sec,
+               reloc = find_reloc_by_dest_range(file->elf, insn->sec,
                                               insn->offset, insn->len);
-               if (!rela) {
+               if (!reloc) {
                        dest_sec = insn->sec;
                        dest_off = arch_jump_destination(insn);
-               } else if (rela->sym->type == STT_SECTION) {
-                       dest_sec = rela->sym->sec;
-                       dest_off = arch_dest_rela_offset(rela->addend);
-               } else if (rela->sym->sec->idx) {
-                       dest_sec = rela->sym->sec;
-                       dest_off = rela->sym->sym.st_value +
-                                  arch_dest_rela_offset(rela->addend);
-               } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+               } else if (reloc->sym->sec->idx) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = reloc->sym->sym.st_value +
+                                  arch_dest_reloc_offset(reloc->addend);
+               } else if (strstr(reloc->sym->name, "_indirect_thunk_")) {
                        /*
                         * Retpoline jumps are really dynamic jumps in
                         * disguise, so convert them accordingly.
@@ -648,7 +648,7 @@ static int add_jump_destinations(struct objtool_file *file)
                        continue;
                } else {
                        /* external sibling call */
-                       insn->call_dest = rela->sym;
+                       insn->call_dest = reloc->sym;
                        continue;
                }
 
@@ -724,15 +724,15 @@ static int add_call_destinations(struct objtool_file *file)
 {
        struct instruction *insn;
        unsigned long dest_off;
-       struct rela *rela;
+       struct reloc *reloc;
 
        for_each_insn(file, insn) {
                if (insn->type != INSN_CALL)
                        continue;
 
-               rela = find_rela_by_dest_range(file->elf, insn->sec,
+               reloc = find_reloc_by_dest_range(file->elf, insn->sec,
                                               insn->offset, insn->len);
-               if (!rela) {
+               if (!reloc) {
                        dest_off = arch_jump_destination(insn);
                        insn->call_dest = find_func_by_offset(insn->sec, dest_off);
                        if (!insn->call_dest)
@@ -752,19 +752,19 @@ static int add_call_destinations(struct objtool_file *file)
                                return -1;
                        }
 
-               } else if (rela->sym->type == STT_SECTION) {
-                       dest_off = arch_dest_rela_offset(rela->addend);
-                       insn->call_dest = find_func_by_offset(rela->sym->sec,
+               } else if (reloc->sym->type == STT_SECTION) {
+                       dest_off = arch_dest_reloc_offset(reloc->addend);
+                       insn->call_dest = find_func_by_offset(reloc->sym->sec,
                                                              dest_off);
                        if (!insn->call_dest) {
                                WARN_FUNC("can't find call dest symbol at %s+0x%lx",
                                          insn->sec, insn->offset,
-                                         rela->sym->sec->name,
+                                         reloc->sym->sec->name,
                                          dest_off);
                                return -1;
                        }
                } else
-                       insn->call_dest = rela->sym;
+                       insn->call_dest = reloc->sym;
 
                /*
                 * Many compilers cannot disable KCOV with a function attribute
@@ -773,9 +773,9 @@ static int add_call_destinations(struct objtool_file *file)
                 */
                if (insn->sec->noinstr &&
                    !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
-                       if (rela) {
-                               rela->type = R_NONE;
-                               elf_write_rela(file->elf, rela);
+                       if (reloc) {
+                               reloc->type = R_NONE;
+                               elf_write_reloc(file->elf, reloc);
                        }
 
                        elf_write_insn(file->elf, insn->sec,
@@ -890,7 +890,7 @@ static int handle_group_alt(struct objtool_file *file,
                 */
                if ((insn->offset != special_alt->new_off ||
                    (insn->type != INSN_CALL && !is_static_jump(insn))) &&
-                   find_rela_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
+                   find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
 
                        WARN_FUNC("unsupported relocation in alternatives section",
                                  insn->sec, insn->offset);
@@ -1036,34 +1036,34 @@ out:
 }
 
 static int add_jump_table(struct objtool_file *file, struct instruction *insn,
-                           struct rela *table)
+                           struct reloc *table)
 {
-       struct rela *rela = table;
+       struct reloc *reloc = table;
        struct instruction *dest_insn;
        struct alternative *alt;
        struct symbol *pfunc = insn->func->pfunc;
        unsigned int prev_offset = 0;
 
        /*
-        * Each @rela is a switch table relocation which points to the target
+        * Each @reloc is a switch table relocation which points to the target
         * instruction.
         */
-       list_for_each_entry_from(rela, &table->sec->rela_list, list) {
+       list_for_each_entry_from(reloc, &table->sec->reloc_list, list) {
 
                /* Check for the end of the table: */
-               if (rela != table && rela->jump_table_start)
+               if (reloc != table && reloc->jump_table_start)
                        break;
 
                /* Make sure the table entries are consecutive: */
-               if (prev_offset && rela->offset != prev_offset + 8)
+               if (prev_offset && reloc->offset != prev_offset + 8)
                        break;
 
                /* Detect function pointers from contiguous objects: */
-               if (rela->sym->sec == pfunc->sec &&
-                   rela->addend == pfunc->offset)
+               if (reloc->sym->sec == pfunc->sec &&
+                   reloc->addend == pfunc->offset)
                        break;
 
-               dest_insn = find_insn(file, rela->sym->sec, rela->addend);
+               dest_insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!dest_insn)
                        break;
 
@@ -1079,7 +1079,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 
                alt->insn = dest_insn;
                list_add_tail(&alt->list, &insn->alts);
-               prev_offset = rela->offset;
+               prev_offset = reloc->offset;
        }
 
        if (!prev_offset) {
@@ -1134,11 +1134,11 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
  *
  *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
-static struct rela *find_jump_table(struct objtool_file *file,
+static struct reloc *find_jump_table(struct objtool_file *file,
                                      struct symbol *func,
                                      struct instruction *insn)
 {
-       struct rela *text_rela, *table_rela;
+       struct reloc *text_reloc, *table_reloc;
        struct instruction *dest_insn, *orig_insn = insn;
        struct section *table_sec;
        unsigned long table_offset;
@@ -1163,16 +1163,16 @@ static struct rela *find_jump_table(struct objtool_file *file,
                    break;
 
                /* look for a relocation which references .rodata */
-               text_rela = find_rela_by_dest_range(file->elf, insn->sec,
+               text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
                                                    insn->offset, insn->len);
-               if (!text_rela || text_rela->sym->type != STT_SECTION ||
-                   !text_rela->sym->sec->rodata)
+               if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+                   !text_reloc->sym->sec->rodata)
                        continue;
 
-               table_offset = text_rela->addend;
-               table_sec = text_rela->sym->sec;
+               table_offset = text_reloc->addend;
+               table_sec = text_reloc->sym->sec;
 
-               if (text_rela->type == R_X86_64_PC32)
+               if (text_reloc->type == R_X86_64_PC32)
                        table_offset += 4;
 
                /*
@@ -1189,14 +1189,14 @@ static struct rela *find_jump_table(struct objtool_file *file,
                        continue;
 
                /*
-                * Each table entry has a rela associated with it.  The rela
+                * Each table entry has a reloc associated with it.  The reloc
                 * should reference text in the same function as the original
                 * instruction.
                 */
-               table_rela = find_rela_by_dest(file->elf, table_sec, table_offset);
-               if (!table_rela)
+               table_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+               if (!table_reloc)
                        continue;
-               dest_insn = find_insn(file, table_rela->sym->sec, table_rela->addend);
+               dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
                if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
                        continue;
 
@@ -1205,10 +1205,10 @@ static struct rela *find_jump_table(struct objtool_file *file,
                 * indicates a rare GCC quirk/bug which can leave dead code
                 * behind.
                 */
-               if (text_rela->type == R_X86_64_PC32)
+               if (text_reloc->type == R_X86_64_PC32)
                        file->ignore_unreachables = true;
 
-               return table_rela;
+               return table_reloc;
        }
 
        return NULL;
@@ -1222,7 +1222,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
                                    struct symbol *func)
 {
        struct instruction *insn, *last = NULL;
-       struct rela *rela;
+       struct reloc *reloc;
 
        func_for_each_insn(file, func, insn) {
                if (!last)
@@ -1245,10 +1245,10 @@ static void mark_func_jump_tables(struct objtool_file *file,
                if (insn->type != INSN_JUMP_DYNAMIC)
                        continue;
 
-               rela = find_jump_table(file, func, insn);
-               if (rela) {
-                       rela->jump_table_start = true;
-                       insn->jump_table = rela;
+               reloc = find_jump_table(file, func, insn);
+               if (reloc) {
+                       reloc->jump_table_start = true;
+                       insn->jump_table = reloc;
                }
        }
 }
@@ -1302,8 +1302,8 @@ static int add_jump_table_alts(struct objtool_file *file)
 
 static int read_unwind_hints(struct objtool_file *file)
 {
-       struct section *sec, *relasec;
-       struct rela *rela;
+       struct section *sec, *relocsec;
+       struct reloc *reloc;
        struct unwind_hint *hint;
        struct instruction *insn;
        struct cfi_reg *cfa;
@@ -1313,8 +1313,8 @@ static int read_unwind_hints(struct objtool_file *file)
        if (!sec)
                return 0;
 
-       relasec = sec->rela;
-       if (!relasec) {
+       relocsec = sec->reloc;
+       if (!relocsec) {
                WARN("missing .rela.discard.unwind_hints section");
                return -1;
        }
@@ -1329,13 +1329,13 @@ static int read_unwind_hints(struct objtool_file *file)
        for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
                hint = (struct unwind_hint *)sec->data->d_buf + i;
 
-               rela = find_rela_by_dest(file->elf, sec, i * sizeof(*hint));
-               if (!rela) {
-                       WARN("can't find rela for unwind_hints[%d]", i);
+               reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
+               if (!reloc) {
+                       WARN("can't find reloc for unwind_hints[%d]", i);
                        return -1;
                }
 
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!insn) {
                        WARN("can't find insn for unwind_hints[%d]", i);
                        return -1;
@@ -1393,19 +1393,19 @@ static int read_retpoline_hints(struct objtool_file *file)
 {
        struct section *sec;
        struct instruction *insn;
-       struct rela *rela;
+       struct reloc *reloc;
 
        sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
        if (!sec)
                return 0;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s", sec->name);
                        return -1;
                }
 
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!insn) {
                        WARN("bad .discard.retpoline_safe entry");
                        return -1;
@@ -1428,19 +1428,19 @@ static int read_instr_hints(struct objtool_file *file)
 {
        struct section *sec;
        struct instruction *insn;
-       struct rela *rela;
+       struct reloc *reloc;
 
        sec = find_section_by_name(file->elf, ".rela.discard.instr_end");
        if (!sec)
                return 0;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s", sec->name);
                        return -1;
                }
 
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!insn) {
                        WARN("bad .discard.instr_end entry");
                        return -1;
@@ -1453,13 +1453,13 @@ static int read_instr_hints(struct objtool_file *file)
        if (!sec)
                return 0;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s", sec->name);
                        return -1;
                }
 
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!insn) {
                        WARN("bad .discard.instr_begin entry");
                        return -1;
@@ -1475,22 +1475,22 @@ static int read_intra_function_calls(struct objtool_file *file)
 {
        struct instruction *insn;
        struct section *sec;
-       struct rela *rela;
+       struct reloc *reloc;
 
        sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
        if (!sec)
                return 0;
 
-       list_for_each_entry(rela, &sec->rela_list, list) {
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
                unsigned long dest_off;
 
-               if (rela->sym->type != STT_SECTION) {
+               if (reloc->sym->type != STT_SECTION) {
                        WARN("unexpected relocation symbol type in %s",
                             sec->name);
                        return -1;
                }
 
-               insn = find_insn(file, rela->sym->sec, rela->addend);
+               insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (!insn) {
                        WARN("bad .discard.intra_function_call entry");
                        return -1;
index 906b521..061aa96 100644 (file)
@@ -37,7 +37,7 @@ struct instruction {
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct instruction *first_jump_src;
-       struct rela *jump_table;
+       struct reloc *jump_table;
        struct list_head alts;
        struct symbol *func;
        struct list_head stack_ops;
index 26d11d8..3ddbd66 100644 (file)
@@ -228,26 +228,26 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
        return NULL;
 }
 
-struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
+struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
                                     unsigned long offset, unsigned int len)
 {
-       struct rela *rela, *r = NULL;
+       struct reloc *reloc, *r = NULL;
        unsigned long o;
 
-       if (!sec->rela)
+       if (!sec->reloc)
                return NULL;
 
-       sec = sec->rela;
+       sec = sec->reloc;
 
        for_offset_range(o, offset, offset + len) {
-               elf_hash_for_each_possible(elf->rela_hash, rela, hash,
+               elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
                                       sec_offset_hash(sec, o)) {
-                       if (rela->sec != sec)
+                       if (reloc->sec != sec)
                                continue;
 
-                       if (rela->offset >= offset && rela->offset < offset + len) {
-                               if (!r || rela->offset < r->offset)
-                                       r = rela;
+                       if (reloc->offset >= offset && reloc->offset < offset + len) {
+                               if (!r || reloc->offset < r->offset)
+                                       r = reloc;
                        }
                }
                if (r)
@@ -257,9 +257,9 @@ struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
        return NULL;
 }
 
-struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset)
+struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset)
 {
-       return find_rela_by_dest_range(elf, sec, offset, 1);
+       return find_reloc_by_dest_range(elf, sec, offset, 1);
 }
 
 static int read_sections(struct elf *elf)
@@ -288,7 +288,7 @@ static int read_sections(struct elf *elf)
                memset(sec, 0, sizeof(*sec));
 
                INIT_LIST_HEAD(&sec->symbol_list);
-               INIT_LIST_HEAD(&sec->rela_list);
+               INIT_LIST_HEAD(&sec->reloc_list);
 
                s = elf_getscn(elf->elf, i);
                if (!s) {
@@ -434,7 +434,13 @@ static int read_symbols(struct elf *elf)
                        size_t pnamelen;
                        if (sym->type != STT_FUNC)
                                continue;
-                       sym->pfunc = sym->cfunc = sym;
+
+                       if (sym->pfunc == NULL)
+                               sym->pfunc = sym;
+
+                       if (sym->cfunc == NULL)
+                               sym->cfunc = sym;
+
                        coldstr = strstr(sym->name, ".cold");
                        if (!coldstr)
                                continue;
@@ -482,72 +488,101 @@ err:
        return -1;
 }
 
-void elf_add_rela(struct elf *elf, struct rela *rela)
+void elf_add_reloc(struct elf *elf, struct reloc *reloc)
+{
+       struct section *sec = reloc->sec;
+
+       list_add_tail(&reloc->list, &sec->reloc_list);
+       elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+}
+
+static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
 {
-       struct section *sec = rela->sec;
+       if (!gelf_getrel(sec->data, i, &reloc->rel)) {
+               WARN_ELF("gelf_getrel");
+               return -1;
+       }
+       reloc->type = GELF_R_TYPE(reloc->rel.r_info);
+       reloc->addend = 0;
+       reloc->offset = reloc->rel.r_offset;
+       *symndx = GELF_R_SYM(reloc->rel.r_info);
+       return 0;
+}
 
-       list_add_tail(&rela->list, &sec->rela_list);
-       elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+{
+       if (!gelf_getrela(sec->data, i, &reloc->rela)) {
+               WARN_ELF("gelf_getrela");
+               return -1;
+       }
+       reloc->type = GELF_R_TYPE(reloc->rela.r_info);
+       reloc->addend = reloc->rela.r_addend;
+       reloc->offset = reloc->rela.r_offset;
+       *symndx = GELF_R_SYM(reloc->rela.r_info);
+       return 0;
 }
 
-static int read_relas(struct elf *elf)
+static int read_relocs(struct elf *elf)
 {
        struct section *sec;
-       struct rela *rela;
+       struct reloc *reloc;
        int i;
        unsigned int symndx;
-       unsigned long nr_rela, max_rela = 0, tot_rela = 0;
+       unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
        list_for_each_entry(sec, &elf->sections, list) {
-               if (sec->sh.sh_type != SHT_RELA)
+               if ((sec->sh.sh_type != SHT_RELA) &&
+                   (sec->sh.sh_type != SHT_REL))
                        continue;
 
-               sec->base = find_section_by_name(elf, sec->name + 5);
+               sec->base = find_section_by_index(elf, sec->sh.sh_info);
                if (!sec->base) {
-                       WARN("can't find base section for rela section %s",
+                       WARN("can't find base section for reloc section %s",
                             sec->name);
                        return -1;
                }
 
-               sec->base->rela = sec;
+               sec->base->reloc = sec;
 
-               nr_rela = 0;
+               nr_reloc = 0;
                for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
-                       rela = malloc(sizeof(*rela));
-                       if (!rela) {
+                       reloc = malloc(sizeof(*reloc));
+                       if (!reloc) {
                                perror("malloc");
                                return -1;
                        }
-                       memset(rela, 0, sizeof(*rela));
-
-                       if (!gelf_getrela(sec->data, i, &rela->rela)) {
-                               WARN_ELF("gelf_getrela");
-                               return -1;
+                       memset(reloc, 0, sizeof(*reloc));
+                       switch (sec->sh.sh_type) {
+                       case SHT_REL:
+                               if (read_rel_reloc(sec, i, reloc, &symndx))
+                                       return -1;
+                               break;
+                       case SHT_RELA:
+                               if (read_rela_reloc(sec, i, reloc, &symndx))
+                                       return -1;
+                               break;
+                       default: return -1;
                        }
 
-                       rela->type = GELF_R_TYPE(rela->rela.r_info);
-                       rela->addend = rela->rela.r_addend;
-                       rela->offset = rela->rela.r_offset;
-                       symndx = GELF_R_SYM(rela->rela.r_info);
-                       rela->sec = sec;
-                       rela->idx = i;
-                       rela->sym = find_symbol_by_index(elf, symndx);
-                       if (!rela->sym) {
-                               WARN("can't find rela entry symbol %d for %s",
+                       reloc->sec = sec;
+                       reloc->idx = i;
+                       reloc->sym = find_symbol_by_index(elf, symndx);
+                       if (!reloc->sym) {
+                               WARN("can't find reloc entry symbol %d for %s",
                                     symndx, sec->name);
                                return -1;
                        }
 
-                       elf_add_rela(elf, rela);
-                       nr_rela++;
+                       elf_add_reloc(elf, reloc);
+                       nr_reloc++;
                }
-               max_rela = max(max_rela, nr_rela);
-               tot_rela += nr_rela;
+               max_reloc = max(max_reloc, nr_reloc);
+               tot_reloc += nr_reloc;
        }
 
        if (stats) {
-               printf("max_rela: %lu\n", max_rela);
-               printf("tot_rela: %lu\n", tot_rela);
+               printf("max_reloc: %lu\n", max_reloc);
+               printf("tot_reloc: %lu\n", tot_reloc);
        }
 
        return 0;
@@ -573,7 +608,7 @@ struct elf *elf_open_read(const char *name, int flags)
        elf_hash_init(elf->symbol_name_hash);
        elf_hash_init(elf->section_hash);
        elf_hash_init(elf->section_name_hash);
-       elf_hash_init(elf->rela_hash);
+       elf_hash_init(elf->reloc_hash);
 
        elf->fd = open(name, flags);
        if (elf->fd == -1) {
@@ -606,7 +641,7 @@ struct elf *elf_open_read(const char *name, int flags)
        if (read_symbols(elf))
                goto err;
 
-       if (read_relas(elf))
+       if (read_relocs(elf))
                goto err;
 
        return elf;
@@ -632,7 +667,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
        memset(sec, 0, sizeof(*sec));
 
        INIT_LIST_HEAD(&sec->symbol_list);
-       INIT_LIST_HEAD(&sec->rela_list);
+       INIT_LIST_HEAD(&sec->reloc_list);
 
        s = elf_newscn(elf->elf);
        if (!s) {
@@ -719,28 +754,28 @@ struct section *elf_create_section(struct elf *elf, const char *name,
        return sec;
 }
 
-struct section *elf_create_rela_section(struct elf *elf, struct section *base)
+static struct section *elf_create_rel_reloc_section(struct elf *elf, struct section *base)
 {
-       char *relaname;
+       char *relocname;
        struct section *sec;
 
-       relaname = malloc(strlen(base->name) + strlen(".rela") + 1);
-       if (!relaname) {
+       relocname = malloc(strlen(base->name) + strlen(".rel") + 1);
+       if (!relocname) {
                perror("malloc");
                return NULL;
        }
-       strcpy(relaname, ".rela");
-       strcat(relaname, base->name);
+       strcpy(relocname, ".rel");
+       strcat(relocname, base->name);
 
-       sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
-       free(relaname);
+       sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
+       free(relocname);
        if (!sec)
                return NULL;
 
-       base->rela = sec;
+       base->reloc = sec;
        sec->base = base;
 
-       sec->sh.sh_type = SHT_RELA;
+       sec->sh.sh_type = SHT_REL;
        sec->sh.sh_addralign = 8;
        sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
        sec->sh.sh_info = base->idx;
@@ -749,42 +784,125 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base)
        return sec;
 }
 
-int elf_rebuild_rela_section(struct elf *elf, struct section *sec)
+static struct section *elf_create_rela_reloc_section(struct elf *elf, struct section *base)
 {
-       struct rela *rela;
-       int nr, idx = 0, size;
-       GElf_Rela *relas;
+       char *relocname;
+       struct section *sec;
 
-       nr = 0;
-       list_for_each_entry(rela, &sec->rela_list, list)
-               nr++;
+       relocname = malloc(strlen(base->name) + strlen(".rela") + 1);
+       if (!relocname) {
+               perror("malloc");
+               return NULL;
+       }
+       strcpy(relocname, ".rela");
+       strcat(relocname, base->name);
+
+       sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
+       free(relocname);
+       if (!sec)
+               return NULL;
 
-       size = nr * sizeof(*relas);
-       relas = malloc(size);
-       if (!relas) {
+       base->reloc = sec;
+       sec->base = base;
+
+       sec->sh.sh_type = SHT_RELA;
+       sec->sh.sh_addralign = 8;
+       sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+       sec->sh.sh_info = base->idx;
+       sec->sh.sh_flags = SHF_INFO_LINK;
+
+       return sec;
+}
+
+struct section *elf_create_reloc_section(struct elf *elf,
+                                        struct section *base,
+                                        int reltype)
+{
+       switch (reltype) {
+       case SHT_REL:  return elf_create_rel_reloc_section(elf, base);
+       case SHT_RELA: return elf_create_rela_reloc_section(elf, base);
+       default:       return NULL;
+       }
+}
+
+static int elf_rebuild_rel_reloc_section(struct section *sec, int nr)
+{
+       struct reloc *reloc;
+       int idx = 0, size;
+       GElf_Rel *relocs;
+
+       /* Allocate a buffer for relocations */
+       size = nr * sizeof(*relocs);
+       relocs = malloc(size);
+       if (!relocs) {
                perror("malloc");
                return -1;
        }
 
-       sec->changed = true;
-       elf->changed = true;
+       sec->data->d_buf = relocs;
+       sec->data->d_size = size;
+
+       sec->sh.sh_size = size;
+
+       idx = 0;
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               relocs[idx].r_offset = reloc->offset;
+               relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               idx++;
+       }
+
+       return 0;
+}
+
+static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
+{
+       struct reloc *reloc;
+       int idx = 0, size;
+       GElf_Rela *relocs;
+
+       /* Allocate a buffer for relocations with addends */
+       size = nr * sizeof(*relocs);
+       relocs = malloc(size);
+       if (!relocs) {
+               perror("malloc");
+               return -1;
+       }
 
-       sec->data->d_buf = relas;
+       sec->data->d_buf = relocs;
        sec->data->d_size = size;
 
        sec->sh.sh_size = size;
 
        idx = 0;
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               relas[idx].r_offset = rela->offset;
-               relas[idx].r_addend = rela->addend;
-               relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type);
+       list_for_each_entry(reloc, &sec->reloc_list, list) {
+               relocs[idx].r_offset = reloc->offset;
+               relocs[idx].r_addend = reloc->addend;
+               relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
                idx++;
        }
 
        return 0;
 }
 
+int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+{
+       struct reloc *reloc;
+       int nr;
+
+       sec->changed = true;
+       elf->changed = true;
+
+       nr = 0;
+       list_for_each_entry(reloc, &sec->reloc_list, list)
+               nr++;
+
+       switch (sec->sh.sh_type) {
+       case SHT_REL:  return elf_rebuild_rel_reloc_section(sec, nr);
+       case SHT_RELA: return elf_rebuild_rela_reloc_section(sec, nr);
+       default:       return -1;
+       }
+}
+
 int elf_write_insn(struct elf *elf, struct section *sec,
                   unsigned long offset, unsigned int len,
                   const char *insn)
@@ -804,17 +922,27 @@ int elf_write_insn(struct elf *elf, struct section *sec,
        return 0;
 }
 
-int elf_write_rela(struct elf *elf, struct rela *rela)
+int elf_write_reloc(struct elf *elf, struct reloc *reloc)
 {
-       struct section *sec = rela->sec;
+       struct section *sec = reloc->sec;
 
-       rela->rela.r_info = GELF_R_INFO(rela->sym->idx, rela->type);
-       rela->rela.r_addend = rela->addend;
-       rela->rela.r_offset = rela->offset;
+       if (sec->sh.sh_type == SHT_REL) {
+               reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               reloc->rel.r_offset = reloc->offset;
 
-       if (!gelf_update_rela(sec->data, rela->idx, &rela->rela)) {
-               WARN_ELF("gelf_update_rela");
-               return -1;
+               if (!gelf_update_rel(sec->data, reloc->idx, &reloc->rel)) {
+                       WARN_ELF("gelf_update_rel");
+                       return -1;
+               }
+       } else {
+               reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               reloc->rela.r_addend = reloc->addend;
+               reloc->rela.r_offset = reloc->offset;
+
+               if (!gelf_update_rela(sec->data, reloc->idx, &reloc->rela)) {
+                       WARN_ELF("gelf_update_rela");
+                       return -1;
+               }
        }
 
        elf->changed = true;
@@ -862,7 +990,7 @@ void elf_close(struct elf *elf)
 {
        struct section *sec, *tmpsec;
        struct symbol *sym, *tmpsym;
-       struct rela *rela, *tmprela;
+       struct reloc *reloc, *tmpreloc;
 
        if (elf->elf)
                elf_end(elf->elf);
@@ -876,10 +1004,10 @@ void elf_close(struct elf *elf)
                        hash_del(&sym->hash);
                        free(sym);
                }
-               list_for_each_entry_safe(rela, tmprela, &sec->rela_list, list) {
-                       list_del(&rela->list);
-                       hash_del(&rela->hash);
-                       free(rela);
+               list_for_each_entry_safe(reloc, tmpreloc, &sec->reloc_list, list) {
+                       list_del(&reloc->list);
+                       hash_del(&reloc->hash);
+                       free(reloc);
                }
                list_del(&sec->list);
                free(sec);
index 7324e77..6cc80a0 100644 (file)
@@ -32,8 +32,8 @@ struct section {
        GElf_Shdr sh;
        struct rb_root symbol_tree;
        struct list_head symbol_list;
-       struct list_head rela_list;
-       struct section *base, *rela;
+       struct list_head reloc_list;
+       struct section *base, *reloc;
        struct symbol *sym;
        Elf_Data *data;
        char *name;
@@ -58,10 +58,13 @@ struct symbol {
        bool uaccess_safe;
 };
 
-struct rela {
+struct reloc {
        struct list_head list;
        struct hlist_node hash;
-       GElf_Rela rela;
+       union {
+               GElf_Rela rela;
+               GElf_Rel  rel;
+       };
        struct section *sec;
        struct symbol *sym;
        unsigned long offset;
@@ -84,7 +87,7 @@ struct elf {
        DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
        DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
        DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
-       DECLARE_HASHTABLE(rela_hash, ELF_HASH_BITS);
+       DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
 };
 
 #define OFFSET_STRIDE_BITS     4
@@ -111,19 +114,19 @@ static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
        return ol;
 }
 
-static inline u32 rela_hash(struct rela *rela)
+static inline u32 reloc_hash(struct reloc *reloc)
 {
-       return sec_offset_hash(rela->sec, rela->offset);
+       return sec_offset_hash(reloc->sec, reloc->offset);
 }
 
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
-struct section *elf_create_rela_section(struct elf *elf, struct section *base);
-void elf_add_rela(struct elf *elf, struct rela *rela);
+struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
+void elf_add_reloc(struct elf *elf, struct reloc *reloc);
 int elf_write_insn(struct elf *elf, struct section *sec,
                   unsigned long offset, unsigned int len,
                   const char *insn);
-int elf_write_rela(struct elf *elf, struct rela *rela);
+int elf_write_reloc(struct elf *elf, struct reloc *reloc);
 int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 
@@ -132,11 +135,11 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
 struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
+struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
+struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
                                     unsigned long offset, unsigned int len);
 struct symbol *find_func_containing(struct section *sec, unsigned long offset);
-int elf_rebuild_rela_section(struct elf *elf, struct section *sec);
+int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
 
 #define for_each_sec(file, sec)                                                \
        list_for_each_entry(sec, &file->elf->sections, list)
index 4c37f80..968f55e 100644 (file)
@@ -80,56 +80,56 @@ int create_orc(struct objtool_file *file)
        return 0;
 }
 
-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relasec,
+static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
                                unsigned int idx, struct section *insn_sec,
                                unsigned long insn_off, struct orc_entry *o)
 {
        struct orc_entry *orc;
-       struct rela *rela;
+       struct reloc *reloc;
 
        /* populate ORC data */
        orc = (struct orc_entry *)u_sec->data->d_buf + idx;
        memcpy(orc, o, sizeof(*orc));
 
-       /* populate rela for ip */
-       rela = malloc(sizeof(*rela));
-       if (!rela) {
+       /* populate reloc for ip */
+       reloc = malloc(sizeof(*reloc));
+       if (!reloc) {
                perror("malloc");
                return -1;
        }
-       memset(rela, 0, sizeof(*rela));
+       memset(reloc, 0, sizeof(*reloc));
 
        if (insn_sec->sym) {
-               rela->sym = insn_sec->sym;
-               rela->addend = insn_off;
+               reloc->sym = insn_sec->sym;
+               reloc->addend = insn_off;
        } else {
                /*
                 * The Clang assembler doesn't produce section symbols, so we
                 * have to reference the function symbol instead:
                 */
-               rela->sym = find_symbol_containing(insn_sec, insn_off);
-               if (!rela->sym) {
+               reloc->sym = find_symbol_containing(insn_sec, insn_off);
+               if (!reloc->sym) {
                        /*
                         * Hack alert.  This happens when we need to reference
                         * the NOP pad insn immediately after the function.
                         */
-                       rela->sym = find_symbol_containing(insn_sec,
+                       reloc->sym = find_symbol_containing(insn_sec,
                                                           insn_off - 1);
                }
-               if (!rela->sym) {
+               if (!reloc->sym) {
                        WARN("missing symbol for insn at offset 0x%lx\n",
                             insn_off);
                        return -1;
                }
 
-               rela->addend = insn_off - rela->sym->offset;
+               reloc->addend = insn_off - reloc->sym->offset;
        }
 
-       rela->type = R_X86_64_PC32;
-       rela->offset = idx * sizeof(int);
-       rela->sec = ip_relasec;
+       reloc->type = R_X86_64_PC32;
+       reloc->offset = idx * sizeof(int);
+       reloc->sec = ip_relocsec;
 
-       elf_add_rela(elf, rela);
+       elf_add_reloc(elf, reloc);
 
        return 0;
 }
@@ -137,7 +137,7 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
 int create_orc_sections(struct objtool_file *file)
 {
        struct instruction *insn, *prev_insn;
-       struct section *sec, *u_sec, *ip_relasec;
+       struct section *sec, *u_sec, *ip_relocsec;
        unsigned int idx;
 
        struct orc_entry empty = {
@@ -181,8 +181,8 @@ int create_orc_sections(struct objtool_file *file)
        if (!sec)
                return -1;
 
-       ip_relasec = elf_create_rela_section(file->elf, sec);
-       if (!ip_relasec)
+       ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+       if (!ip_relocsec)
                return -1;
 
        /* create .orc_unwind section */
@@ -200,7 +200,7 @@ int create_orc_sections(struct objtool_file *file)
                        if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
                                                 sizeof(struct orc_entry))) {
 
-                               if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
+                               if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
                                                     insn->sec, insn->offset,
                                                     &insn->orc))
                                        return -1;
@@ -212,7 +212,7 @@ int create_orc_sections(struct objtool_file *file)
 
                /* section terminator */
                if (prev_insn) {
-                       if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
+                       if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
                                             prev_insn->sec,
                                             prev_insn->offset + prev_insn->len,
                                             &empty))
@@ -222,7 +222,7 @@ int create_orc_sections(struct objtool_file *file)
                }
        }
 
-       if (elf_rebuild_rela_section(file->elf, ip_relasec))
+       if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
                return -1;
 
        return 0;
index e74e018..e893f1e 100644 (file)
@@ -72,7 +72,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
                         struct section *sec, int idx,
                         struct special_alt *alt)
 {
-       struct rela *orig_rela, *new_rela;
+       struct reloc *orig_reloc, *new_reloc;
        unsigned long offset;
 
        offset = idx * entry->size;
@@ -118,30 +118,30 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
                }
        }
 
-       orig_rela = find_rela_by_dest(elf, sec, offset + entry->orig);
-       if (!orig_rela) {
-               WARN_FUNC("can't find orig rela", sec, offset + entry->orig);
+       orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
+       if (!orig_reloc) {
+               WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
                return -1;
        }
-       if (orig_rela->sym->type != STT_SECTION) {
-               WARN_FUNC("don't know how to handle non-section rela symbol %s",
-                          sec, offset + entry->orig, orig_rela->sym->name);
+       if (orig_reloc->sym->type != STT_SECTION) {
+               WARN_FUNC("don't know how to handle non-section reloc symbol %s",
+                          sec, offset + entry->orig, orig_reloc->sym->name);
                return -1;
        }
 
-       alt->orig_sec = orig_rela->sym->sec;
-       alt->orig_off = orig_rela->addend;
+       alt->orig_sec = orig_reloc->sym->sec;
+       alt->orig_off = orig_reloc->addend;
 
        if (!entry->group || alt->new_len) {
-               new_rela = find_rela_by_dest(elf, sec, offset + entry->new);
-               if (!new_rela) {
-                       WARN_FUNC("can't find new rela",
+               new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
+               if (!new_reloc) {
+                       WARN_FUNC("can't find new reloc",
                                  sec, offset + entry->new);
                        return -1;
                }
 
-               alt->new_sec = new_rela->sym->sec;
-               alt->new_off = (unsigned int)new_rela->addend;
+               alt->new_sec = new_reloc->sym->sec;
+               alt->new_off = (unsigned int)new_reloc->addend;
 
                /* _ASM_EXTABLE_EX hack */
                if (alt->new_off >= 0x7ffffff0)
index 0a6e75b..28a5d0c 100644 (file)
@@ -56,7 +56,7 @@ struct auxtrace_record
        struct perf_pmu *cs_etm_pmu;
        struct evsel *evsel;
        bool found_etm = false;
-       bool found_spe = false;
+       struct perf_pmu *found_spe = NULL;
        static struct perf_pmu **arm_spe_pmus = NULL;
        static int nr_spes = 0;
        int i = 0;
@@ -74,12 +74,12 @@ struct auxtrace_record
                    evsel->core.attr.type == cs_etm_pmu->type)
                        found_etm = true;
 
-               if (!nr_spes)
+               if (!nr_spes || found_spe)
                        continue;
 
                for (i = 0; i < nr_spes; i++) {
                        if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
-                               found_spe = true;
+                               found_spe = arm_spe_pmus[i];
                                break;
                        }
                }
@@ -96,7 +96,7 @@ struct auxtrace_record
 
 #if defined(__aarch64__)
        if (found_spe)
-               return arm_spe_recording_init(err, arm_spe_pmus[i]);
+               return arm_spe_recording_init(err, found_spe);
 #endif
 
        /*
index 63a91ec..045723b 100755 (executable)
@@ -12,7 +12,8 @@ skip_if_no_z_record() {
 
 collect_z_record() {
        echo "Collecting compressed record file:"
-       $perf_tool record -o $trace_file -g -z -F 5000 -- \
+       [[ "$(uname -m)" != s390x ]] && gflag='-g'
+       $perf_tool record -o $trace_file $gflag -z -F 5000 -- \
                dd count=500 if=/dev/urandom of=/dev/null
 }
 
index f7ee8fa..6ccecbd 100644 (file)
@@ -5,10 +5,60 @@
 
 #include "test_btf_map_in_map.skel.h"
 
+static int duration;
+
+static __u32 bpf_map_id(struct bpf_map *map)
+{
+       struct bpf_map_info info;
+       __u32 info_len = sizeof(info);
+       int err;
+
+       memset(&info, 0, info_len);
+       err = bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
+       if (err)
+               return 0;
+       return info.id;
+}
+
+/*
+ * Trigger synchronize_rcu() in kernel.
+ *
+ * ARRAY_OF_MAPS/HASH_OF_MAPS lookup/update operations trigger synchronize_rcu()
+ * if looking up an existing non-NULL element or updating the map with a valid
+ * inner map FD. Use this fact to trigger synchronize_rcu(): create map-in-map,
+ * create a trivial ARRAY map, update map-in-map with ARRAY inner map. Then
+ * cleanup. At the end, at least one synchronize_rcu() would be called.
+ */
+static int kern_sync_rcu(void)
+{
+       int inner_map_fd, outer_map_fd, err, zero = 0;
+
+       inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0);
+       if (CHECK(inner_map_fd < 0, "inner_map_create", "failed %d\n", -errno))
+               return -1;
+
+       outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+                                            sizeof(int), inner_map_fd, 1, 0);
+       if (CHECK(outer_map_fd < 0, "outer_map_create", "failed %d\n", -errno)) {
+               close(inner_map_fd);
+               return -1;
+       }
+
+       err = bpf_map_update_elem(outer_map_fd, &zero, &inner_map_fd, 0);
+       if (err)
+               err = -errno;
+       CHECK(err, "outer_map_update", "failed %d\n", err);
+       close(inner_map_fd);
+       close(outer_map_fd);
+       return err;
+}
+
 void test_btf_map_in_map(void)
 {
-       int duration = 0, err, key = 0, val;
-       struct test_btf_map_in_map* skel;
+       int err, key = 0, val, i;
+       struct test_btf_map_in_map *skel;
+       int outer_arr_fd, outer_hash_fd;
+       int fd, map1_fd, map2_fd, map1_id, map2_id;
 
        skel = test_btf_map_in_map__open_and_load();
        if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
@@ -18,32 +68,78 @@ void test_btf_map_in_map(void)
        if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
                goto cleanup;
 
+       map1_fd = bpf_map__fd(skel->maps.inner_map1);
+       map2_fd = bpf_map__fd(skel->maps.inner_map2);
+       outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
+       outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
+
        /* inner1 = input, inner2 = input + 1 */
-       val = bpf_map__fd(skel->maps.inner_map1);
-       bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
-       val = bpf_map__fd(skel->maps.inner_map2);
-       bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+       map1_fd = bpf_map__fd(skel->maps.inner_map1);
+       bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
+       map2_fd = bpf_map__fd(skel->maps.inner_map2);
+       bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
        skel->bss->input = 1;
        usleep(1);
 
-       bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+       bpf_map_lookup_elem(map1_fd, &key, &val);
        CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
-       bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+       bpf_map_lookup_elem(map2_fd, &key, &val);
        CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
 
        /* inner1 = input + 1, inner2 = input */
-       val = bpf_map__fd(skel->maps.inner_map2);
-       bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
-       val = bpf_map__fd(skel->maps.inner_map1);
-       bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+       bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
+       bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
        skel->bss->input = 3;
        usleep(1);
 
-       bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+       bpf_map_lookup_elem(map1_fd, &key, &val);
        CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
-       bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+       bpf_map_lookup_elem(map2_fd, &key, &val);
        CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
 
+       for (i = 0; i < 5; i++) {
+               val = i % 2 ? map1_fd : map2_fd;
+               err = bpf_map_update_elem(outer_hash_fd, &key, &val, 0);
+               if (CHECK_FAIL(err)) {
+                       printf("failed to update hash_of_maps on iter #%d\n", i);
+                       goto cleanup;
+               }
+               err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
+               if (CHECK_FAIL(err)) {
+                       printf("failed to update hash_of_maps on iter #%d\n", i);
+                       goto cleanup;
+               }
+       }
+
+       map1_id = bpf_map_id(skel->maps.inner_map1);
+       map2_id = bpf_map_id(skel->maps.inner_map2);
+       CHECK(map1_id == 0, "map1_id", "failed to get ID 1\n");
+       CHECK(map2_id == 0, "map2_id", "failed to get ID 2\n");
+
+       test_btf_map_in_map__destroy(skel);
+       skel = NULL;
+
+       /* we need to either wait for or force synchronize_rcu(), before
+        * checking for "still exists" condition, otherwise map could still be
+        * resolvable by ID, causing false positives.
+        *
+        * Older kernels (5.8 and earlier) freed map only after two
+        * synchronize_rcu()s, so trigger two, to be entirely sure.
+        */
+       CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
+       CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
+
+       fd = bpf_map_get_fd_by_id(map1_id);
+       if (CHECK(fd >= 0, "map1_leak", "inner_map1 leaked!\n")) {
+               close(fd);
+               goto cleanup;
+       }
+       fd = bpf_map_get_fd_by_id(map2_id);
+       if (CHECK(fd >= 0, "map2_leak", "inner_map2 leaked!\n")) {
+               close(fd);
+               goto cleanup;
+       }
+
 cleanup:
        test_btf_map_in_map__destroy(skel);
 }
index 8294ae3..43c9cda 100755 (executable)
@@ -318,6 +318,9 @@ class DebugfsDir:
                 continue
 
             if os.path.isfile(p):
+                # We need to init trap_flow_action_cookie before read it
+                if f == "trap_flow_action_cookie":
+                    cmd('echo deadbeef > %s/%s' % (path, f))
                 _, out = cmd('cat %s/%s' % (path, f))
                 dfs[f] = out.strip()
             elif os.path.isdir(p):
index 99f8f58..c5e8059 100644 (file)
        "perfevent for cgroup sockopt",
        .insns =  { __PERF_EVENT_INSNS__ },
        .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+       .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
        .fixup_map_event_output = { 4 },
        .result = ACCEPT,
        .retval = 1,
index 54cdefd..d59f3eb 100644 (file)
@@ -76,10 +76,8 @@ void set_default_state(struct kvm_nested_state *state)
 void set_default_vmx_state(struct kvm_nested_state *state, int size)
 {
        memset(state, 0, size);
-       state->flags = KVM_STATE_NESTED_GUEST_MODE  |
-                       KVM_STATE_NESTED_RUN_PENDING;
        if (have_evmcs)
-               state->flags |= KVM_STATE_NESTED_EVMCS;
+               state->flags = KVM_STATE_NESTED_EVMCS;
        state->format = 0;
        state->size = size;
        state->hdr.vmx.vmxon_pa = 0x1000;
@@ -148,6 +146,11 @@ void test_vmx_nested_state(struct kvm_vm *vm)
        state->hdr.vmx.smm.flags = 1;
        test_nested_state_expect_einval(vm, state);
 
+       /* Invalid flags are rejected. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.flags = ~0;
+       test_nested_state_expect_einval(vm, state);
+
        /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
        set_default_vmx_state(state, state_sz);
        state->hdr.vmx.vmxon_pa = -1ull;
@@ -185,20 +188,41 @@ void test_vmx_nested_state(struct kvm_vm *vm)
        state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
        test_nested_state_expect_einval(vm, state);
 
-       /* Size must be large enough to fit kvm_nested_state and vmcs12. */
+       /*
+        * Size must be large enough to fit kvm_nested_state and vmcs12
+        * if VMCS12 physical address is set
+        */
        set_default_vmx_state(state, state_sz);
        state->size = sizeof(*state);
+       state->flags = 0;
+       test_nested_state_expect_einval(vm, state);
+
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       state->hdr.vmx.vmcs12_pa = -1;
        test_nested_state(vm, state);
 
-       /* vmxon_pa cannot be the same address as vmcs_pa. */
+       /*
+        * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+        * contents but L2 not running.
+        */
        set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = 0;
-       state->hdr.vmx.vmcs12_pa = 0;
+       state->flags = 0;
+       test_nested_state(vm, state);
+
+       /* Invalid flags are rejected, even if no VMCS loaded. */
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       state->hdr.vmx.vmcs12_pa = -1;
+       state->hdr.vmx.flags = ~0;
        test_nested_state_expect_einval(vm, state);
 
-       /* The revision id for vmcs12 must be VMCS12_REVISION. */
+       /* vmxon_pa cannot be the same address as vmcs_pa. */
        set_default_vmx_state(state, state_sz);
-       set_revision_id_for_vmcs12(state, 0);
+       state->hdr.vmx.vmxon_pa = 0;
+       state->hdr.vmx.vmcs12_pa = 0;
        test_nested_state_expect_einval(vm, state);
 
        /*
index eb8e2a2..43a948f 100755 (executable)
@@ -252,8 +252,6 @@ check_highest_speed_is_chosen()
        fi
 
        local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
-       # Remove the first speed, h1 does not advertise this speed.
-       unset speeds_arr[0]
 
        max_speed=${speeds_arr[0]}
        for current in ${speeds_arr[@]}; do
index 8c8c7d7..2c522f7 100644 (file)
@@ -350,7 +350,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
        int fds[2], fds_udp[2][2], ret;
 
        fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
-               typeflags, PORT_BASE, PORT_BASE + port_off);
+               typeflags, (uint16_t)PORT_BASE,
+               (uint16_t)(PORT_BASE + port_off));
 
        fds[0] = sock_fanout_open(typeflags, 0);
        fds[1] = sock_fanout_open(typeflags, 0);
index 422e776..bcb79ba 100644 (file)
@@ -329,8 +329,7 @@ int main(int argc, char **argv)
        bool all_tests = true;
        int arg_index = 0;
        int failures = 0;
-       int s, t;
-       char opt;
+       int s, t, opt;
 
        while ((opt = getopt_long(argc, argv, "", long_options,
                                  &arg_index)) != -1) {
index ceaad78..3155fbb 100644 (file)
@@ -121,7 +121,7 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
        if (rbuf[0] != ts->data)
                error(1, 0, "payload mismatch. expected %c", ts->data);
 
-       if (labs(tstop - texpect) > cfg_variance_us)
+       if (llabs(tstop - texpect) > cfg_variance_us)
                error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
 
        return false;
index 4555f88..a61b7b3 100644 (file)
@@ -344,7 +344,7 @@ int main(int argc, char *argv[])
 {
        struct sockaddr_storage listenaddr, addr;
        unsigned int max_pacing_rate = 0;
-       size_t total = 0;
+       uint64_t total = 0;
        char *host = NULL;
        int fd, c, on = 1;
        char *buffer;
@@ -473,12 +473,12 @@ int main(int argc, char *argv[])
                zflg = 0;
        }
        while (total < FILE_SZ) {
-               ssize_t wr = FILE_SZ - total;
+               int64_t wr = FILE_SZ - total;
 
                if (wr > chunk_size)
                        wr = chunk_size;
                /* Note : we just want to fill the pipe with 0 bytes */
-               wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+               wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
                if (wr <= 0)
                        break;
                total += wr;
index 93e80a4..d6e5ce0 100755 (executable)
@@ -32,11 +32,11 @@ if test -z "$TORTURE_TRUST_MAKE"
 then
        make clean > $resdir/Make.clean 2>&1
 fi
-make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+make $TORTURE_KMAKE_ARG $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
 mv .config .config.sav
 sh $T/upd.sh < .config.sav > .config
 cp .config .config.new
-yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+yes '' | make $TORTURE_KMAKE_ARG oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
 
 # verify new config matches specification.
 configcheck.sh .config $c
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
new file mode 100755 (executable)
index 0000000..0e4c0b2
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Scan standard input for error messages, dumping any found to standard
+# output.
+#
+# Usage: console-badness.sh
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+grep -v 'ODEBUG: ' |
+grep -v 'This means that this is a DEBUG kernel and it is' |
+grep -v 'Warning: unable to open an initial console'
index 1281022..51f3464 100644 (file)
@@ -215,9 +215,6 @@ identify_qemu_args () {
                then
                        echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC
                        echo -netdev bridge,br=br0,id=net0
-               elif test -n "$TORTURE_QEMU_INTERACTIVE"
-               then
-                       echo -net nic -net user
                fi
                ;;
        esac
@@ -234,7 +231,7 @@ identify_qemu_args () {
 # Returns the number of virtual CPUs available to the aggregate of the
 # guest OSes.
 identify_qemu_vcpus () {
-       lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://'
+       lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://' -e 's/[  ]*//g'
 }
 
 # print_bug
@@ -275,3 +272,21 @@ specify_qemu_cpus () {
                esac
        fi
 }
+
+# specify_qemu_net qemu-args
+#
+# Appends a string containing "-net none" to qemu-args, unless the incoming
+# qemu-args already contains "-smp" or unless the TORTURE_QEMU_INTERACTIVE
+# environment variable is set, in which case the string that is be added is
+# instead "-net nic -net user".
+specify_qemu_net () {
+       if echo $1 | grep -q -e -net
+       then
+               echo $1
+       elif test -n "$TORTURE_QEMU_INTERACTIVE"
+       then
+               echo $1 -net nic -net user
+       else
+               echo $1 -net none
+       fi
+}
index 30cb5b2..188b864 100755 (executable)
@@ -46,6 +46,12 @@ do
                exit 0;
        fi
 
+       # Check for stop request.
+       if test -f "$TORTURE_STOPFILE"
+       then
+               exit 1;
+       fi
+
        # Set affinity to randomly selected online CPU
        if cpus=`grep 1 /sys/devices/system/cpu/*/online 2>&1 |
                 sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
index 18d6518..115e182 100755 (executable)
@@ -9,6 +9,12 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
+if test -f "$TORTURE_STOPFILE"
+then
+       echo "kvm-build.sh early exit due to run STOP request"
+       exit 1
+fi
+
 config_template=${1}
 if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
new file mode 100755 (executable)
index 0000000..6e65c13
--- /dev/null
@@ -0,0 +1,108 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run a group of kvm.sh tests on the specified commits.  This currently
+# unconditionally does three-minute runs on each scenario in CFLIST,
+# taking advantage of all available CPUs and trusting the "make" utility.
+# In the short term, adjustments can be made by editing this script and
+# CFLIST.  If some adjustments appear to have ongoing value, this script
+# might grow some command-line arguments.
+#
+# Usage: kvm-check-branches.sh commit1 commit2..commit3 commit4 ...
+#
+# This script considers its arguments one at a time.  If more elaborate
+# specification of commits is needed, please use "git rev-list" to
+# produce something that this simple script can understand.  The reason
+# for retaining the simplicity is that it allows the user to more easily
+# see which commit came from which branch.
+#
+# This script creates a yyyy.mm.dd-hh.mm.ss-group entry in the "res"
+# directory.  The calls to kvm.sh create the usual entries, but this script
+# moves them under the yyyy.mm.dd-hh.mm.ss-group entry, each in its own
+# directory numbered in run order, that is, "0001", "0002", and so on.
+# For successful runs, the large build artifacts are removed.  Doing this
+# reduces the disk space required by about two orders of magnitude for
+# successful runs.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if ! git status > /dev/null 2>&1
+then
+       echo '!!!' This script needs to run in a git archive. 1>&2
+       echo '!!!' Giving up. 1>&2
+       exit 1
+fi
+
+# Remember where we started so that we can get back and the end.
+curcommit="`git status | head -1 | awk '{ print $NF }'`"
+
+nfail=0
+ntry=0
+resdir="tools/testing/selftests/rcutorture/res"
+ds="`date +%Y.%m.%d-%H.%M.%S`-group"
+if ! test -e $resdir
+then
+       mkdir $resdir || :
+fi
+mkdir $resdir/$ds
+echo Results directory: $resdir/$ds
+
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+. functions.sh
+cpus="`identify_qemu_vcpus`"
+echo Using up to $cpus CPUs.
+
+# Each pass through this loop does one command-line argument.
+for gitbr in $@
+do
+       echo ' --- git branch ' $gitbr
+
+       # Each pass through this loop tests one commit.
+       for i in `git rev-list "$gitbr"`
+       do
+               ntry=`expr $ntry + 1`
+               idir=`awk -v ntry="$ntry" 'END { printf "%04d", ntry; }' < /dev/null`
+               echo ' --- commit ' $i from branch $gitbr
+               date
+               mkdir $resdir/$ds/$idir
+               echo $gitbr > $resdir/$ds/$idir/gitbr
+               echo $i >> $resdir/$ds/$idir/gitbr
+
+               # Test the specified commit.
+               git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
+               echo git checkout return code: $? "(Commit $ntry: $i)"
+               kvm.sh --cpus $cpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
+               ret=$?
+               echo kvm.sh return code $ret for commit $i from branch $gitbr
+
+               # Move the build products to their resting place.
+               runresdir="`grep -m 1 '^Results directory:' < $resdir/$ds/$idir/kvm.sh.out | sed -e 's/^Results directory://'`"
+               mv $runresdir $resdir/$ds/$idir
+               rrd="`echo $runresdir | sed -e 's,^.*/,,'`"
+               echo Run results: $resdir/$ds/$idir/$rrd
+               if test "$ret" -ne 0
+               then
+                       # Failure, so leave all evidence intact.
+                       nfail=`expr $nfail + 1`
+               else
+                       # Success, so remove large files to save about 1GB.
+                       ( cd $resdir/$ds/$idir/$rrd; rm -f */vmlinux */bzImage */System.map */Module.symvers )
+               fi
+       done
+done
+date
+
+# Go back to the original commit.
+git checkout "$curcommit"
+
+if test $nfail -ne 0
+then
+       echo '!!! ' $nfail failures in $ntry 'runs!!!'
+       exit 1
+else
+       echo No failures in $ntry runs.
+       exit 0
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh
new file mode 100755 (executable)
index 0000000..35a463d
--- /dev/null
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for refscale performance measurements.
+#
+# Usage: kvm-recheck-refscale.sh resdir
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+       :
+else
+       echo Unreadable results directory: $i
+       exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log | tr -d '\015' |
+awk -v configfile="$configfile" '
+/^[    ]*Runs  Time\(ns\) *$/ {
+       if (dataphase + 0 == 0) {
+               dataphase = 1;
+               # print configfile, $0;
+       }
+       next;
+}
+
+/[^    ]*[0-9][0-9]*   [0-9][0-9]*\.[0-9][0-9]*$/ {
+       if (dataphase == 1) {
+               # print $0;
+               readertimes[++n] = $2;
+               sum += $2;
+       }
+       next;
+}
+
+{
+       if (dataphase == 1)
+               dataphase == 2;
+       next;
+}
+
+END {
+       print configfile " results:";
+       newNR = asort(readertimes);
+       if (newNR <= 0) {
+               print "No refscale records found???"
+               exit;
+       }
+       medianidx = int(newNR / 2);
+       if (newNR == medianidx * 2)
+               medianvalue = (readertimes[medianidx - 1] + readertimes[medianidx]) / 2;
+       else
+               medianvalue = readertimes[medianidx];
+       points = "Points:";
+       for (i = 1; i <= newNR; i++)
+               points = points " " readertimes[i];
+       print points;
+       print "Average reader duration: " sum / newNR " nanoseconds";
+       print "Minimum reader duration: " readertimes[1];
+       print "Median reader duration: " medianvalue;
+       print "Maximum reader duration: " readertimes[newNR];
+       print "Computed from refscale printk output.";
+}'
index 736f047..840a467 100755 (executable)
@@ -31,6 +31,7 @@ do
                        head -1 $resdir/log
                fi
                TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+               configfile=`echo $i | sed -e 's,^.*/,,'`
                rm -f $i/console.log.*.diags
                kvm-recheck-${TORTURE_SUITE}.sh $i
                if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
@@ -43,7 +44,8 @@ do
                        then
                                echo QEMU killed
                        fi
-                       configcheck.sh $i/.config $i/ConfigFragment
+                       configcheck.sh $i/.config $i/ConfigFragment > $T 2>&1
+                       cat $T
                        if test -r $i/Make.oldconfig.err
                        then
                                cat $i/Make.oldconfig.err
@@ -55,15 +57,15 @@ do
                                cat $i/Warnings
                        fi
                else
-                       if test -f "$i/qemu-cmd"
-                       then
-                               print_bug qemu failed
-                               echo "   $i"
-                       elif test -f "$i/buildonly"
+                       if test -f "$i/buildonly"
                        then
                                echo Build-only run, no boot/test
                                configcheck.sh $i/.config $i/ConfigFragment
                                parse-build.sh $i/Make.out $configfile
+                       elif test -f "$i/qemu-cmd"
+                       then
+                               print_bug qemu failed
+                               echo "   $i"
                        else
                                print_bug Build failed
                                echo "   $i"
@@ -72,7 +74,11 @@ do
        done
        if test -f "$rd/kcsan.sum"
        then
-               if test -s "$rd/kcsan.sum"
+               if grep -q CONFIG_KCSAN=y $T
+               then
+                       echo "Compiler or architecture does not support KCSAN!"
+                       echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
+               elif test -s "$rd/kcsan.sum"
                then
                        echo KCSAN summary in $rd/kcsan.sum
                else
index 6ff611c..e07779a 100755 (executable)
@@ -124,7 +124,6 @@ seconds=$4
 qemu_args=$5
 boot_args=$6
 
-cd $KVM
 kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
 if test -z "$TORTURE_BUILDONLY"
 then
@@ -141,6 +140,7 @@ then
        cpu_count=$TORTURE_ALLOTED_CPUS
 fi
 qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
+qemu_args="`specify_qemu_net "$qemu_args"`"
 
 # Generate architecture-specific and interaction-specific qemu arguments
 qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
@@ -152,6 +152,7 @@ qemu_append="`identify_qemu_append "$QEMU"`"
 boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
 # Generate kernel-version-specific boot parameters
 boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
 
 if test -n "$TORTURE_BUILDONLY"
 then
@@ -159,9 +160,16 @@ then
        touch $resdir/buildonly
        exit 0
 fi
+
+# Decorate qemu-cmd with redirection, backgrounding, and PID capture
+sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+
+# In case qemu refuses to run...
 echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+
+# Attempt to run qemu
+( . $T/qemu-cmd; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
 commandcompleted=0
 sleep 10 # Give qemu's pid a chance to reach the file
 if test -s "$resdir/qemu_pid"
@@ -181,7 +189,7 @@ do
        kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
        if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
        then
-               if test $kruntime -ge $seconds
+               if test $kruntime -ge $seconds -o -f "$TORTURE_STOPFILE"
                then
                        break;
                fi
@@ -210,10 +218,19 @@ then
 fi
 if test $commandcompleted -eq 0 -a -n "$qemu_pid"
 then
-       echo Grace period for qemu job at pid $qemu_pid
+       if ! test -f "$TORTURE_STOPFILE"
+       then
+               echo Grace period for qemu job at pid $qemu_pid
+       fi
        oldline="`tail $resdir/console.log`"
        while :
        do
+               if test -f "$TORTURE_STOPFILE"
+               then
+                       echo "PID $qemu_pid killed due to run STOP request" >> $resdir/Warnings 2>&1
+                       kill -KILL $qemu_pid
+                       break
+               fi
                kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
                if kill -0 $qemu_pid > /dev/null 2>&1
                then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
new file mode 100755 (executable)
index 0000000..c45a953
--- /dev/null
@@ -0,0 +1,51 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Transform a qemu-cmd file to allow reuse.
+#
+# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out
+#
+#      bzImage: Kernel and initrd from the same prior kvm.sh run.
+#      console.log: File into which to place console output.
+#
+# The original qemu-cmd file is provided on standard input.
+# The transformed qemu-cmd file is on standard output.
+# The transformation assumes that the qemu command is confined to a
+# single line.  It also assumes no whitespace in filenames.
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+image="$1"
+if test -z "$image"
+then
+       echo Need kernel image file.
+       exit 1
+fi
+consolelog="$2"
+if test -z "$consolelog"
+then
+       echo "Need console log file name."
+       exit 1
+fi
+
+awk -v image="$image" -v consolelog="$consolelog" '
+{
+       line = "";
+       for (i = 1; i <= NF; i++) {
+               if (line == "")
+                       line = $i;
+               else
+                       line = line " " $i;
+               if ($i == "-serial") {
+                       i++;
+                       line = line " file:" consolelog;
+               }
+               if ($i == "-kernel") {
+                       i++;
+                       line = line " " image;
+               }
+       }
+       print line;
+}'
index c279cf9..e655983 100755 (executable)
@@ -73,6 +73,10 @@ usage () {
 while test $# -gt 0
 do
        case "$1" in
+       --allcpus)
+               cpus=$TORTURE_ALLOTED_CPUS
+               max_cpus=$TORTURE_ALLOTED_CPUS
+               ;;
        --bootargs|--bootarg)
                checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
                TORTURE_BOOTARGS="$2"
@@ -180,13 +184,14 @@ do
                shift
                ;;
        --torture)
-               checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
+               checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--'
                TORTURE_SUITE=$2
                shift
-               if test "$TORTURE_SUITE" = rcuperf
+               if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale
                then
-                       # If you really want jitter for rcuperf, specify
-                       # it after specifying rcuperf.  (But why?)
+                       # If you really want jitter for refscale or
+                       # rcuperf, specify it after specifying the rcuperf
+                       # or the refscale.  (But why jitter in these cases?)
                        jitter=0
                fi
                ;;
@@ -333,6 +338,8 @@ then
        mkdir -p "$resdir" || :
 fi
 mkdir $resdir/$ds
+TORTURE_RESDIR="$resdir/$ds"; export TORTURE_RESDIR
+TORTURE_STOPFILE="$resdir/$ds/STOP"; export TORTURE_STOPFILE
 echo Results directory: $resdir/$ds
 echo $scriptname $args
 touch $resdir/$ds/log
@@ -497,3 +504,7 @@ fi
 # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
 # Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
 # Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"
+# Control buffer size: --bootargs trace_buf_size=3k
+# Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops
+# Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu
+# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn
index 4bf62d7..71a9f43 100755 (executable)
@@ -33,8 +33,8 @@ then
 fi
 cat /dev/null > $file.diags
 
-# Check for proper termination, except that rcuperf runs don't indicate this.
-if test "$TORTURE_SUITE" != rcuperf
+# Check for proper termination, except for rcuperf and refscale.
+if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale
 then
        # check for abject failure
 
@@ -44,11 +44,23 @@ then
                tail -1 |
                awk '
                {
-                       for (i=NF-8;i<=NF;i++)
+                       normalexit = 1;
+                       for (i=NF-8;i<=NF;i++) {
+                               if (i <= 0 || i !~ /^[0-9]*$/) {
+                                       bangstring = $0;
+                                       gsub(/^\[[^]]*] /, "", bangstring);
+                                       print bangstring;
+                                       normalexit = 0;
+                                       exit 0;
+                               }
                                sum+=$i;
+                       }
                }
-               END { print sum }'`
-               print_bug $title FAILURE, $nerrs instances
+               END {
+                       if (normalexit)
+                               print sum " instances"
+               }'`
+               print_bug $title FAILURE, $nerrs
                exit
        fi
 
@@ -104,10 +116,7 @@ then
        fi
 fi | tee -a $file.diags
 
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
-grep -v 'ODEBUG: ' |
-grep -v 'This means that this is a DEBUG kernel and it is' |
-grep -v 'Warning: unable to open an initial console' > $T.diags
+console-badness.sh < $file > $T.diags
 if test -s $T.diags
 then
        print_warning "Assertion failure in $file $title"
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFLIST b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST
new file mode 100644 (file)
index 0000000..4d62eb4
--- /dev/null
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
new file mode 100644 (file)
index 0000000..a98b58b
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_RCU_REF_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
new file mode 100644 (file)
index 0000000..1cd25b7
--- /dev/null
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_PREEMPT_RCU=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
new file mode 100644 (file)
index 0000000..d10bc69
--- /dev/null
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
new file mode 100644 (file)
index 0000000..321e826
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+       echo $1 refscale.shutdown=1 \
+               refscale.verbose=1
+}
index 96afb03..2599bc2 100644 (file)
@@ -100,6 +100,15 @@ config RD_LZ4
          Support loading of a LZ4 encoded initial ramdisk or cpio buffer
          If unsure, say N.
 
+config RD_ZSTD
+       bool "Support initial ramdisk/ramfs compressed using ZSTD"
+       default y
+       depends on BLK_DEV_INITRD
+       select DECOMPRESS_ZSTD
+       help
+         Support loading of a ZSTD encoded initial ramdisk or cpio buffer.
+         If unsure, say N.
+
 choice
        prompt "Built-in initramfs compression mode"
        depends on INITRAMFS_SOURCE != ""
@@ -196,6 +205,17 @@ config INITRAMFS_COMPRESSION_LZ4
          If you choose this, keep in mind that most distros don't provide lz4
          by default which could cause a build failure.
 
+config INITRAMFS_COMPRESSION_ZSTD
+       bool "ZSTD"
+       depends on RD_ZSTD
+       help
+         ZSTD is a compression algorithm targeting intermediate compression
+         with fast decompression speed. It will compress better than GZIP and
+         decompress around the same speed as LZO, but slower than LZ4.
+
+         If you choose this, keep in mind that you may need to install the zstd
+         tool to be able to compress the initram.
+
 config INITRAMFS_COMPRESSION_NONE
        bool "None"
        help
index c12e6b1..b1a81a4 100644 (file)
@@ -15,6 +15,7 @@ compress-$(CONFIG_INITRAMFS_COMPRESSION_LZMA) := lzma
 compress-$(CONFIG_INITRAMFS_COMPRESSION_XZ)    := xzmisc
 compress-$(CONFIG_INITRAMFS_COMPRESSION_LZO)   := lzo
 compress-$(CONFIG_INITRAMFS_COMPRESSION_LZ4)   := lz4
+compress-$(CONFIG_INITRAMFS_COMPRESSION_ZSTD)  := zstd
 
 obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o