Merge tag 'devicetree-fixes-for-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jun 2020 18:56:43 +0000 (11:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jun 2020 18:56:43 +0000 (11:56 -0700)
Pull Devicetree fixes from Rob Herring:

 - Another round of whack-a-mole removing 'allOf', redundant cases of
   'maxItems' and incorrect 'reg' sizes

 - Fix support for yaml.h in non-standard paths

* tag 'devicetree-fixes-for-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux:
  dt-bindings: Remove redundant 'maxItems'
  dt-bindings: Fix more incorrect 'reg' property sizes in examples
  dt-bindings: phy: qcom: Fix missing 'ranges' and example addresses
  dt-bindings: Remove more cases of 'allOf' containing a '$ref'
  scripts/dtc: use pkg-config to include <yaml.h> in non-standard path

296 files changed:
Documentation/dev-tools/index.rst
Documentation/dev-tools/kcsan.rst [new file with mode: 0644]
Documentation/lzo.txt
MAINTAINERS
Makefile
arch/alpha/kernel/setup.c
arch/arm64/Kconfig
arch/arm64/include/asm/acpi.h
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/ftrace.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kvm/aarch32.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/debug-sr.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/hyp/sysreg-sr.c
arch/arm64/kvm/pmu.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/sys_regs_generic_v8.c
arch/m68k/coldfire/pci.c
arch/m68k/configs/stmark2_defconfig
arch/m68k/include/asm/uaccess_no.h
arch/mips/Kconfig
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/uapi/asm/inst.h
arch/mips/kernel/cpu-probe.c
arch/mips/kvm/Kconfig
arch/mips/kvm/Makefile
arch/mips/kvm/emulate.c
arch/mips/kvm/entry.c
arch/mips/kvm/interrupt.c
arch/mips/kvm/interrupt.h
arch/mips/kvm/loongson_ipi.c [new file with mode: 0644]
arch/mips/kvm/mips.c
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/mips/kvm/vz.c
arch/nios2/kernel/signal.c
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s.h
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_emulate.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_uvmem.c
arch/powerpc/kvm/book3s_paired_singles.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/booke.h
arch/powerpc/kvm/booke_emulate.c
arch/powerpc/kvm/e500_emulate.c
arch/powerpc/kvm/emulate.c
arch/powerpc/kvm/emulate_loadstore.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/kvm/trace_hv.h
arch/powerpc/platforms/powernv/vas-fault.c
arch/riscv/Kconfig
arch/riscv/include/asm/clocksource.h [new file with mode: 0644]
arch/riscv/include/asm/irq.h
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/smp.h
arch/riscv/include/asm/vdso.h
arch/riscv/include/asm/vdso/clocksource.h [new file with mode: 0644]
arch/riscv/include/asm/vdso/gettimeofday.h [new file with mode: 0644]
arch/riscv/include/asm/vdso/processor.h [new file with mode: 0644]
arch/riscv/include/asm/vdso/vsyscall.h [new file with mode: 0644]
arch/riscv/kernel/cpu.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/irq.c
arch/riscv/kernel/patch.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/time.c
arch/riscv/kernel/traps.c
arch/riscv/kernel/vdso.c
arch/riscv/kernel/vdso/Makefile
arch/riscv/kernel/vdso/clock_getres.S [deleted file]
arch/riscv/kernel/vdso/clock_gettime.S [deleted file]
arch/riscv/kernel/vdso/gettimeofday.S [deleted file]
arch/riscv/kernel/vdso/vdso.lds.S
arch/riscv/kernel/vdso/vgettimeofday.c [new file with mode: 0644]
arch/riscv/mm/init.c
arch/s390/include/asm/kvm_host.h
arch/s390/kvm/kvm-s390.c
arch/x86/Kconfig
arch/x86/boot/Makefile
arch/x86/boot/compressed/Makefile
arch/x86/entry/vdso/Makefile
arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_32.h
arch/x86/include/asm/atomic64_64.h
arch/x86/include/asm/bitops.h
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/vdso/gettimeofday.h
arch/x86/kernel/Makefile
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/e820.c
arch/x86/kernel/kvm.c
arch/x86/kernel/process.c
arch/x86/kernel/reboot.c
arch/x86/kernel/time.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/cpuid.c
arch/x86/kvm/debugfs.c
arch/x86/kvm/emulate.c
arch/x86/kvm/i8254.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/lib/Makefile
arch/x86/mm/Makefile
arch/x86/purgatory/.gitignore [new file with mode: 0644]
arch/x86/purgatory/Makefile
arch/x86/realmode/Makefile
arch/x86/realmode/rm/Makefile
block/bio-integrity.c
block/bio.c
block/blk-mq-tag.c
block/blk-mq-tag.h
block/blk-mq.c
block/blk-mq.h
block/blk.h
drivers/block/loop.c
drivers/block/pktcdvd.c
drivers/block/umem.c
drivers/clocksource/timer-riscv.c
drivers/firmware/efi/libstub/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-riscv-intc.c [new file with mode: 0644]
drivers/irqchip/irq-sifive-plic.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/tcp.c
drivers/nvme/target/core.c
drivers/nvme/target/tcp.c
drivers/pci/xen-pcifront.c
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/legacy/inode.c
drivers/vfio/vfio_iommu_type1.c
drivers/vhost/vhost.c
drivers/xen/Kconfig
drivers/xen/cpu_hotplug.c
drivers/xen/platform-pci.c
drivers/xen/pvcalls-back.c
drivers/xen/xen-pciback/conf_space.c
drivers/xen/xen-pciback/conf_space_header.c
drivers/xen/xen-pciback/conf_space_quirks.c
drivers/xen/xen-pciback/pci_stub.c
drivers/xen/xen-pciback/pciback.h
drivers/xen/xen-pciback/pciback_ops.c
drivers/xen/xen-pciback/vpci.c
drivers/xen/xenbus/xenbus_probe.c
fs/afs/write.c
fs/aio.c
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
fs/nilfs2/segment.c
fs/ocfs2/Kconfig
fs/ocfs2/mmap.c
include/asm-generic/atomic-instrumented.h
include/asm-generic/atomic-long.h
include/asm-generic/bitops/instrumented-atomic.h
include/asm-generic/bitops/instrumented-lock.h
include/asm-generic/bitops/instrumented-non-atomic.h
include/linux/atomic-arch-fallback.h [new file with mode: 0644]
include/linux/atomic-fallback.h
include/linux/atomic.h
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/compiler_types.h
include/linux/cpuhotplug.h
include/linux/instrumented.h [new file with mode: 0644]
include/linux/kcsan-checks.h [new file with mode: 0644]
include/linux/kcsan.h [new file with mode: 0644]
include/linux/kthread.h
include/linux/kvm_host.h
include/linux/mmu_context.h
include/linux/mmzone.h
include/linux/sched.h
include/linux/seqlock.h
include/linux/stacktrace.h
include/linux/uaccess.h
include/trace/events/block.h
init/init_task.c
init/main.c
kernel/Makefile
kernel/kcov.c
kernel/kcsan/Makefile [new file with mode: 0644]
kernel/kcsan/atomic.h [new file with mode: 0644]
kernel/kcsan/core.c [new file with mode: 0644]
kernel/kcsan/debugfs.c [new file with mode: 0644]
kernel/kcsan/encoding.h [new file with mode: 0644]
kernel/kcsan/kcsan.h [new file with mode: 0644]
kernel/kcsan/report.c [new file with mode: 0644]
kernel/kcsan/test.c [new file with mode: 0644]
kernel/kthread.c
kernel/locking/Makefile
kernel/sched/Makefile
kernel/scs.c
kernel/time/clocksource.c
kernel/trace/Makefile
kernel/trace/blktrace.c
lib/Kconfig.debug
lib/Kconfig.kcsan [new file with mode: 0644]
lib/Kconfig.ubsan
lib/Makefile
lib/bitmap.c
lib/iov_iter.c
lib/lz4/lz4_decompress.c
lib/lzo/lzo1x_compress.c
lib/test_bitops.c
lib/usercopy.c
lib/vdso/gettimeofday.c
mm/Makefile
mm/debug_vm_pgtable.c
mm/memory-failure.c
mm/mmu_context.c [deleted file]
mm/oom_kill.c
mm/vmacache.c
scripts/Makefile.kcsan [new file with mode: 0644]
scripts/Makefile.lib
scripts/atomic/fallbacks/acquire
scripts/atomic/fallbacks/add_negative
scripts/atomic/fallbacks/add_unless
scripts/atomic/fallbacks/andnot
scripts/atomic/fallbacks/dec
scripts/atomic/fallbacks/dec_and_test
scripts/atomic/fallbacks/dec_if_positive
scripts/atomic/fallbacks/dec_unless_positive
scripts/atomic/fallbacks/fence
scripts/atomic/fallbacks/fetch_add_unless
scripts/atomic/fallbacks/inc
scripts/atomic/fallbacks/inc_and_test
scripts/atomic/fallbacks/inc_not_zero
scripts/atomic/fallbacks/inc_unless_negative
scripts/atomic/fallbacks/read_acquire
scripts/atomic/fallbacks/release
scripts/atomic/fallbacks/set_release
scripts/atomic/fallbacks/sub_and_test
scripts/atomic/fallbacks/try_cmpxchg
scripts/atomic/gen-atomic-fallback.sh
scripts/atomic/gen-atomic-instrumented.sh
scripts/atomic/gen-atomic-long.sh
scripts/atomic/gen-atomics.sh
scripts/checkpatch.pl
scripts/spelling.txt
tools/objtool/check.c
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/include/x86_64/svm_util.h
tools/testing/selftests/kvm/include/x86_64/vmx.h
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/x86_64/svm.c
tools/testing/selftests/kvm/lib/x86_64/vmx.c
tools/testing/selftests/kvm/x86_64/evmcs_test.c
tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
tools/testing/selftests/kvm/x86_64/smm_test.c
tools/testing/selftests/kvm/x86_64/state_test.c
tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
tools/testing/selftests/vm/khugepaged.c
virt/kvm/async_pf.c
virt/kvm/kvm_main.c

index 09dee10..f7809c7 100644 (file)
@@ -21,6 +21,7 @@ whole; patches welcome!
    kasan
    ubsan
    kmemleak
+   kcsan
    gdb-kernel-debugging
    kgdb
    kselftest
diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst
new file mode 100644 (file)
index 0000000..ce4bbd9
--- /dev/null
@@ -0,0 +1,321 @@
+The Kernel Concurrency Sanitizer (KCSAN)
+========================================
+
+The Kernel Concurrency Sanitizer (KCSAN) is a dynamic race detector, which
+relies on compile-time instrumentation, and uses a watchpoint-based sampling
+approach to detect races. KCSAN's primary purpose is to detect `data races`_.
+
+Usage
+-----
+
+KCSAN requires Clang version 11 or later.
+
+To enable KCSAN configure the kernel with::
+
+    CONFIG_KCSAN = y
+
+KCSAN provides several other configuration options to customize behaviour (see
+the respective help text in ``lib/Kconfig.kcsan`` for more info).
+
+Error reports
+~~~~~~~~~~~~~
+
+A typical data race report looks like this::
+
+    ==================================================================
+    BUG: KCSAN: data-race in generic_permission / kernfs_refresh_inode
+
+    write to 0xffff8fee4c40700c of 4 bytes by task 175 on cpu 4:
+     kernfs_refresh_inode+0x70/0x170
+     kernfs_iop_permission+0x4f/0x90
+     inode_permission+0x190/0x200
+     link_path_walk.part.0+0x503/0x8e0
+     path_lookupat.isra.0+0x69/0x4d0
+     filename_lookup+0x136/0x280
+     user_path_at_empty+0x47/0x60
+     vfs_statx+0x9b/0x130
+     __do_sys_newlstat+0x50/0xb0
+     __x64_sys_newlstat+0x37/0x50
+     do_syscall_64+0x85/0x260
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+    read to 0xffff8fee4c40700c of 4 bytes by task 166 on cpu 6:
+     generic_permission+0x5b/0x2a0
+     kernfs_iop_permission+0x66/0x90
+     inode_permission+0x190/0x200
+     link_path_walk.part.0+0x503/0x8e0
+     path_lookupat.isra.0+0x69/0x4d0
+     filename_lookup+0x136/0x280
+     user_path_at_empty+0x47/0x60
+     do_faccessat+0x11a/0x390
+     __x64_sys_access+0x3c/0x50
+     do_syscall_64+0x85/0x260
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+    Reported by Kernel Concurrency Sanitizer on:
+    CPU: 6 PID: 166 Comm: systemd-journal Not tainted 5.3.0-rc7+ #1
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
+    ==================================================================
+
+The header of the report provides a short summary of the functions involved in
+the race. It is followed by the access types and stack traces of the 2 threads
+involved in the data race.
+
+The other less common type of data race report looks like this::
+
+    ==================================================================
+    BUG: KCSAN: data-race in e1000_clean_rx_irq+0x551/0xb10
+
+    race at unknown origin, with read to 0xffff933db8a2ae6c of 1 bytes by interrupt on cpu 0:
+     e1000_clean_rx_irq+0x551/0xb10
+     e1000_clean+0x533/0xda0
+     net_rx_action+0x329/0x900
+     __do_softirq+0xdb/0x2db
+     irq_exit+0x9b/0xa0
+     do_IRQ+0x9c/0xf0
+     ret_from_intr+0x0/0x18
+     default_idle+0x3f/0x220
+     arch_cpu_idle+0x21/0x30
+     do_idle+0x1df/0x230
+     cpu_startup_entry+0x14/0x20
+     rest_init+0xc5/0xcb
+     arch_call_rest_init+0x13/0x2b
+     start_kernel+0x6db/0x700
+
+    Reported by Kernel Concurrency Sanitizer on:
+    CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc7+ #2
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
+    ==================================================================
+
+This report is generated where it was not possible to determine the other
+racing thread, but a race was inferred due to the data value of the watched
+memory location having changed. These can occur either due to missing
+instrumentation or e.g. DMA accesses. These reports will only be generated if
+``CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=y`` (selected by default).
+
+Selective analysis
+~~~~~~~~~~~~~~~~~~
+
+It may be desirable to disable data race detection for specific accesses,
+functions, compilation units, or entire subsystems.  For static blacklisting,
+the below options are available:
+
+* KCSAN understands the ``data_race(expr)`` annotation, which tells KCSAN that
+  any data races due to accesses in ``expr`` should be ignored and resulting
+  behaviour when encountering a data race is deemed safe.
+
+* Disabling data race detection for entire functions can be accomplished by
+  using the function attribute ``__no_kcsan``::
+
+    __no_kcsan
+    void foo(void) {
+        ...
+
+  To dynamically limit for which functions to generate reports, see the
+  `DebugFS interface`_ blacklist/whitelist feature.
+
+  For ``__always_inline`` functions, replace ``__always_inline`` with
+  ``__no_kcsan_or_inline`` (which implies ``__always_inline``)::
+
+    static __no_kcsan_or_inline void foo(void) {
+        ...
+
+* To disable data race detection for a particular compilation unit, add to the
+  ``Makefile``::
+
+    KCSAN_SANITIZE_file.o := n
+
+* To disable data race detection for all compilation units listed in a
+  ``Makefile``, add to the respective ``Makefile``::
+
+    KCSAN_SANITIZE := n
+
+Furthermore, it is possible to tell KCSAN to show or hide entire classes of
+data races, depending on preferences. These can be changed via the following
+Kconfig options:
+
+* ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY``: If enabled and a conflicting write
+  is observed via a watchpoint, but the data value of the memory location was
+  observed to remain unchanged, do not report the data race.
+
+* ``CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC``: Assume that plain aligned writes
+  up to word size are atomic by default. Assumes that such writes are not
+  subject to unsafe compiler optimizations resulting in data races. The option
+  causes KCSAN to not report data races due to conflicts where the only plain
+  accesses are aligned writes up to word size.
+
+DebugFS interface
+~~~~~~~~~~~~~~~~~
+
+The file ``/sys/kernel/debug/kcsan`` provides the following interface:
+
+* Reading ``/sys/kernel/debug/kcsan`` returns various runtime statistics.
+
+* Writing ``on`` or ``off`` to ``/sys/kernel/debug/kcsan`` allows turning KCSAN
+  on or off, respectively.
+
+* Writing ``!some_func_name`` to ``/sys/kernel/debug/kcsan`` adds
+  ``some_func_name`` to the report filter list, which (by default) blacklists
+  reporting data races where either one of the top stackframes are a function
+  in the list.
+
+* Writing either ``blacklist`` or ``whitelist`` to ``/sys/kernel/debug/kcsan``
+  changes the report filtering behaviour. For example, the blacklist feature
+  can be used to silence frequently occurring data races; the whitelist feature
+  can help with reproduction and testing of fixes.
+
+Tuning performance
+~~~~~~~~~~~~~~~~~~
+
+Core parameters that affect KCSAN's overall performance and bug detection
+ability are exposed as kernel command-line arguments whose defaults can also be
+changed via the corresponding Kconfig options.
+
+* ``kcsan.skip_watch`` (``CONFIG_KCSAN_SKIP_WATCH``): Number of per-CPU memory
+  operations to skip, before another watchpoint is set up. Setting up
+  watchpoints more frequently will result in the likelihood of races to be
+  observed to increase. This parameter has the most significant impact on
+  overall system performance and race detection ability.
+
+* ``kcsan.udelay_task`` (``CONFIG_KCSAN_UDELAY_TASK``): For tasks, the
+  microsecond delay to stall execution after a watchpoint has been set up.
+  Larger values result in the window in which we may observe a race to
+  increase.
+
+* ``kcsan.udelay_interrupt`` (``CONFIG_KCSAN_UDELAY_INTERRUPT``): For
+  interrupts, the microsecond delay to stall execution after a watchpoint has
+  been set up. Interrupts have tighter latency requirements, and their delay
+  should generally be smaller than the one chosen for tasks.
+
+They may be tweaked at runtime via ``/sys/module/kcsan/parameters/``.
+
+Data Races
+----------
+
+In an execution, two memory accesses form a *data race* if they *conflict*,
+they happen concurrently in different threads, and at least one of them is a
+*plain access*; they *conflict* if both access the same memory location, and at
+least one is a write. For a more thorough discussion and definition, see `"Plain
+Accesses and Data Races" in the LKMM`_.
+
+.. _"Plain Accesses and Data Races" in the LKMM: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/memory-model/Documentation/explanation.txt#n1922
+
+Relationship with the Linux-Kernel Memory Consistency Model (LKMM)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The LKMM defines the propagation and ordering rules of various memory
+operations, which gives developers the ability to reason about concurrent code.
+Ultimately this allows to determine the possible executions of concurrent code,
+and if that code is free from data races.
+
+KCSAN is aware of *marked atomic operations* (``READ_ONCE``, ``WRITE_ONCE``,
+``atomic_*``, etc.), but is oblivious of any ordering guarantees and simply
+assumes that memory barriers are placed correctly. In other words, KCSAN
+assumes that as long as a plain access is not observed to race with another
+conflicting access, memory operations are correctly ordered.
+
+This means that KCSAN will not report *potential* data races due to missing
+memory ordering. Developers should therefore carefully consider the required
+memory ordering requirements that remain unchecked. If, however, missing
+memory ordering (that is observable with a particular compiler and
+architecture) leads to an observable data race (e.g. entering a critical
+section erroneously), KCSAN would report the resulting data race.
+
+Race Detection Beyond Data Races
+--------------------------------
+
+For code with complex concurrency design, race-condition bugs may not always
+manifest as data races. Race conditions occur if concurrently executing
+operations result in unexpected system behaviour. On the other hand, data races
+are defined at the C-language level. The following macros can be used to check
+properties of concurrent code where bugs would not manifest as data races.
+
+.. kernel-doc:: include/linux/kcsan-checks.h
+    :functions: ASSERT_EXCLUSIVE_WRITER ASSERT_EXCLUSIVE_WRITER_SCOPED
+                ASSERT_EXCLUSIVE_ACCESS ASSERT_EXCLUSIVE_ACCESS_SCOPED
+                ASSERT_EXCLUSIVE_BITS
+
+Implementation Details
+----------------------
+
+KCSAN relies on observing that two accesses happen concurrently. Crucially, we
+want to (a) increase the chances of observing races (especially for races that
+manifest rarely), and (b) be able to actually observe them. We can accomplish
+(a) by injecting various delays, and (b) by using address watchpoints (or
+breakpoints).
+
+If we deliberately stall a memory access, while we have a watchpoint for its
+address set up, and then observe the watchpoint to fire, two accesses to the
+same address just raced. Using hardware watchpoints, this is the approach taken
+in `DataCollider
+<http://usenix.org/legacy/events/osdi10/tech/full_papers/Erickson.pdf>`_.
+Unlike DataCollider, KCSAN does not use hardware watchpoints, but instead
+relies on compiler instrumentation and "soft watchpoints".
+
+In KCSAN, watchpoints are implemented using an efficient encoding that stores
+access type, size, and address in a long; the benefits of using "soft
+watchpoints" are portability and greater flexibility. KCSAN then relies on the
+compiler instrumenting plain accesses. For each instrumented plain access:
+
+1. Check if a matching watchpoint exists; if yes, and at least one access is a
+   write, then we encountered a racing access.
+
+2. Periodically, if no matching watchpoint exists, set up a watchpoint and
+   stall for a small randomized delay.
+
+3. Also check the data value before the delay, and re-check the data value
+   after delay; if the values mismatch, we infer a race of unknown origin.
+
+To detect data races between plain and marked accesses, KCSAN also annotates
+marked accesses, but only to check if a watchpoint exists; i.e. KCSAN never
+sets up a watchpoint on marked accesses. By never setting up watchpoints for
+marked operations, if all accesses to a variable that is accessed concurrently
+are properly marked, KCSAN will never trigger a watchpoint and therefore never
+report the accesses.
+
+Key Properties
+~~~~~~~~~~~~~~
+
+1. **Memory Overhead:**  The overall memory overhead is only a few MiB
+   depending on configuration. The current implementation uses a small array of
+   longs to encode watchpoint information, which is negligible.
+
+2. **Performance Overhead:** KCSAN's runtime aims to be minimal, using an
+   efficient watchpoint encoding that does not require acquiring any shared
+   locks in the fast-path. For kernel boot on a system with 8 CPUs:
+
+   - 5.0x slow-down with the default KCSAN config;
+   - 2.8x slow-down from runtime fast-path overhead only (set very large
+     ``KCSAN_SKIP_WATCH`` and unset ``KCSAN_SKIP_WATCH_RANDOMIZE``).
+
+3. **Annotation Overheads:** Minimal annotations are required outside the KCSAN
+   runtime. As a result, maintenance overheads are minimal as the kernel
+   evolves.
+
+4. **Detects Racy Writes from Devices:** Due to checking data values upon
+   setting up watchpoints, racy writes from devices can also be detected.
+
+5. **Memory Ordering:** KCSAN is *not* explicitly aware of the LKMM's ordering
+   rules; this may result in missed data races (false negatives).
+
+6. **Analysis Accuracy:** For observed executions, due to using a sampling
+   strategy, the analysis is *unsound* (false negatives possible), but aims to
+   be complete (no false positives).
+
+Alternatives Considered
+-----------------------
+
+An alternative data race detection approach for the kernel can be found in the
+`Kernel Thread Sanitizer (KTSAN) <https://github.com/google/ktsan/wiki>`_.
+KTSAN is a happens-before data race detector, which explicitly establishes the
+happens-before order between memory operations, which can then be used to
+determine data races as defined in `Data Races`_.
+
+To build a correct happens-before relation, KTSAN must be aware of all ordering
+rules of the LKMM and synchronization primitives. Unfortunately, any omission
+leads to large numbers of false positives, which is especially detrimental in
+the context of the kernel which includes numerous custom synchronization
+mechanisms. To track the happens-before relation, KTSAN's implementation
+requires metadata for each memory location (shadow memory), which for each page
+corresponds to 4 pages of shadow memory, and can translate into overhead of
+tens of GiB on a large system.
index ca98332..f65b515 100644 (file)
@@ -159,11 +159,15 @@ Byte sequences
            distance = 16384 + (H << 14) + D
            state = S (copy S literals after this block)
            End of stream is reached if distance == 16384
+           In version 1 only, to prevent ambiguity with the RLE case when
+           ((distance & 0x803f) == 0x803f) && (261 <= length <= 264), the
+           compressor must not emit block copies where distance and length
+           meet these conditions.
 
         In version 1 only, this instruction is also used to encode a run of
-        zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
+           zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
            In this case, it is followed by a fourth byte, X.
-           run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
+           run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4
 
       0 0 1 L L L L L  (32..63)
            Copy of small block within 16kB distance (preferably less than 34B)
index 573cf64..58bc99a 100644 (file)
@@ -9305,6 +9305,17 @@ F:       Documentation/kbuild/kconfig*
 F:     scripts/Kconfig.include
 F:     scripts/kconfig/
 
+KCSAN
+M:     Marco Elver <elver@google.com>
+R:     Dmitry Vyukov <dvyukov@google.com>
+L:     kasan-dev@googlegroups.com
+S:     Maintained
+F:     Documentation/dev-tools/kcsan.rst
+F:     include/linux/kcsan*.h
+F:     kernel/kcsan/
+F:     lib/Kconfig.kcsan
+F:     scripts/Makefile.kcsan
+
 KDUMP
 M:     Dave Young <dyoung@redhat.com>
 M:     Baoquan He <bhe@redhat.com>
@@ -12900,7 +12911,7 @@ F:      include/uapi/linux/ppdev.h
 
 PARAVIRT_OPS INTERFACE
 M:     Juergen Gross <jgross@suse.com>
-M:     Thomas Hellstrom <thellstrom@vmware.com>
+M:     Deep Shah <sdeep@vmware.com>
 M:     "VMware, Inc." <pv-drivers@vmware.com>
 L:     virtualization@lists.linux-foundation.org
 S:     Supported
@@ -18276,7 +18287,7 @@ S:      Maintained
 F:     drivers/misc/vmw_balloon.c
 
 VMWARE HYPERVISOR INTERFACE
-M:     Thomas Hellstrom <thellstrom@vmware.com>
+M:     Deep Shah <sdeep@vmware.com>
 M:     "VMware, Inc." <pv-drivers@vmware.com>
 L:     virtualization@lists.linux-foundation.org
 S:     Supported
index 839f9fe..f0c1a3a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -531,7 +531,7 @@ export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS
 export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
-export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN
+export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN CFLAGS_KCSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@ -965,6 +965,7 @@ endif
 include scripts/Makefile.kasan
 include scripts/Makefile.extrawarn
 include scripts/Makefile.ubsan
+include scripts/Makefile.kcsan
 
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 KBUILD_CPPFLAGS += $(KCPPFLAGS)
index f5c42a8..53520f8 100644 (file)
@@ -430,8 +430,13 @@ register_cpus(void)
 arch_initcall(register_cpus);
 
 #ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_reboot_handler(int unused)
+{
+       machine_halt();
+}
+
 static const struct sysrq_key_op srm_sysrq_reboot_op = {
-       .handler        = machine_halt,
+       .handler        = sysrq_reboot_handler,
        .help_msg       = "reboot(b)",
        .action_msg     = "Resetting",
        .enable_mask    = SYSRQ_ENABLE_BOOT,
index 7f9d384..8a46ed3 100644 (file)
@@ -1299,6 +1299,14 @@ config COMPAT_VDSO
          You must have a 32-bit build of glibc 2.22 or later for programs
          to seamlessly take advantage of this.
 
+config THUMB2_COMPAT_VDSO
+       bool "Compile the 32-bit vDSO for Thumb-2 mode" if EXPERT
+       depends on COMPAT_VDSO
+       default y
+       help
+         Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y,
+         otherwise with '-marm'.
+
 menuconfig ARMV8_DEPRECATED
        bool "Emulate deprecated/obsolete ARMv8 instructions"
        depends on SYSCTL
@@ -1740,8 +1748,9 @@ config ARM64_DEBUG_PRIORITY_MASKING
 endif
 
 config RELOCATABLE
-       bool
+       bool "Build a relocatable kernel image" if EXPERT
        select ARCH_HAS_RELR
+       default y
        help
          This builds the kernel as a Position Independent Executable (PIE),
          which retains all relocation metadata required to relocate the
index b263e23..a45366c 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/efi.h>
 #include <linux/memblock.h>
 #include <linux/psci.h>
+#include <linux/stddef.h>
 
 #include <asm/cputype.h>
 #include <asm/io.h>
  * is therefore used to delimit the MADT GICC structure minimum length
  * appropriately.
  */
-#define ACPI_MADT_GICC_MIN_LENGTH   ACPI_OFFSET(  \
+#define ACPI_MADT_GICC_MIN_LENGTH   offsetof(  \
        struct acpi_madt_generic_interrupt, efficiency_class)
 
 #define BAD_MADT_GICC_ENTRY(entry, end)                                        \
        (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
        (unsigned long)(entry) + (entry)->header.length > (end))
 
-#define ACPI_MADT_GICC_SPE  (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
+#define ACPI_MADT_GICC_SPE  (offsetof(struct acpi_madt_generic_interrupt, \
        spe_interrupt) + sizeof(u16))
 
 /* Basic configuration for ACPI */
index 9543b5e..a08890d 100644 (file)
@@ -101,8 +101,8 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 
 #define ATOMIC_INIT(i) { (i) }
 
-#define arch_atomic_read(v)                    READ_ONCE((v)->counter)
-#define arch_atomic_set(v, i)                  WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_read(v)                    __READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i)                  __WRITE_ONCE(((v)->counter), (i))
 
 #define arch_atomic_add_return_relaxed         arch_atomic_add_return_relaxed
 #define arch_atomic_add_return_acquire         arch_atomic_add_return_acquire
@@ -225,6 +225,6 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 
 #define arch_atomic64_dec_if_positive          arch_atomic64_dec_if_positive
 
-#include <asm-generic/atomic-instrumented.h>
+#define ARCH_ATOMIC
 
 #endif /* __ASM_ATOMIC_H */
index 0c9b5fc..352aaeb 100644 (file)
@@ -81,12 +81,39 @@ extern u32 __kvm_get_mdcr_el2(void);
 
 extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
 
-/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
+/*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s)                                             \
+       ({                                                              \
+               typeof(s) *addr;                                        \
+               asm("adrp       %0, %1\n"                               \
+                   "add        %0, %0, :lo12:%1\n"                     \
+                   : "=r" (addr) : "S" (&s));                          \
+               addr;                                                   \
+       })
+
+/*
+ * Home-grown __this_cpu_{ptr,read} variants that always work at HYP,
+ * provided that sym is really a *symbol* and not a pointer obtained from
+ * a data structure. As for SHIFT_PERCPU_PTR(), the creative casting keeps
+ * sparse quiet.
+ */
 #define __hyp_this_cpu_ptr(sym)                                                \
        ({                                                              \
-               void *__ptr = hyp_symbol_addr(sym);                     \
+               void *__ptr;                                            \
+               __verify_pcpu_ptr(&sym);                                \
+               __ptr = hyp_symbol_addr(sym);                           \
                __ptr += read_sysreg(tpidr_el2);                        \
-               (typeof(&sym))__ptr;                                    \
+               (typeof(sym) __kernel __force *)__ptr;                  \
         })
 
 #define __hyp_this_cpu_read(sym)                                       \
index 6ea53e6..4d0f8ea 100644 (file)
@@ -112,12 +112,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
        vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
 }
 
-static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu)
-{
-       if (vcpu_has_ptrauth(vcpu))
-               vcpu_ptrauth_disable(vcpu);
-}
-
 static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.vsesr_el2;
index abbdf97..c3e6fcc 100644 (file)
@@ -284,9 +284,6 @@ struct kvm_vcpu_arch {
        struct kvm_guest_debug_arch vcpu_debug_state;
        struct kvm_guest_debug_arch external_debug_state;
 
-       /* Pointer to host CPU context */
-       struct kvm_cpu_context *host_cpu_context;
-
        struct thread_info *host_thread_info;   /* hyp VA */
        struct user_fpsimd_state *host_fpsimd_state;    /* hyp VA */
 
@@ -404,8 +401,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
  * CP14 and CP15 live in the same array, as they are backed by the
  * same system registers.
  */
-#define vcpu_cp14(v,r)         ((v)->arch.ctxt.copro[(r)])
-#define vcpu_cp15(v,r)         ((v)->arch.ctxt.copro[(r)])
+#define CPx_BIAS               IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+
+#define vcpu_cp14(v,r)         ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS])
+#define vcpu_cp15(v,r)         ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS])
 
 struct kvm_vm_stat {
        ulong remote_tlb_flush;
index 094260a..b12bfc1 100644 (file)
@@ -107,26 +107,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 
 #define kern_hyp_va(v)         ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
-/*
- * Obtain the PC-relative address of a kernel symbol
- * s: symbol
- *
- * The goal of this macro is to return a symbol's address based on a
- * PC-relative computation, as opposed to a loading the VA from a
- * constant pool or something similar. This works well for HYP, as an
- * absolute VA is guaranteed to be wrong. Only use this if trying to
- * obtain the address of a symbol (i.e. not something you obtained by
- * following a pointer).
- */
-#define hyp_symbol_addr(s)                                             \
-       ({                                                              \
-               typeof(s) *addr;                                        \
-               asm("adrp       %0, %1\n"                               \
-                   "add        %0, %0, :lo12:%1\n"                     \
-                   : "=r" (addr) : "S" (&s));                          \
-               addr;                                                   \
-       })
-
 /*
  * We currently support using a VM-specified IPA size. For backward
  * compatibility, the default IPA size is fixed to 40bits.
index 15e80c8..5df4936 100644 (file)
@@ -130,7 +130,7 @@ static int clear_os_lock(unsigned int cpu)
        return 0;
 }
 
-static int debug_monitors_init(void)
+static int __init debug_monitors_init(void)
 {
        return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
                                 "arm64/debug_monitors:starting",
index 8618faa..86a5cf9 100644 (file)
@@ -69,7 +69,8 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
 
        if (addr == FTRACE_ADDR)
                return &plt[FTRACE_PLT_IDX];
-       if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
+       if (addr == FTRACE_REGS_ADDR &&
+           IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
                return &plt[FTRACE_REGS_PLT_IDX];
 #endif
        return NULL;
index 3fd2c11..93b3844 100644 (file)
@@ -319,6 +319,10 @@ void __init setup_arch(char **cmdline_p)
 
        xen_early_init();
        efi_init();
+
+       if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0)
+            pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+
        arm64_memblock_init();
 
        paging_init();
index 3964738..7ea1e82 100644 (file)
@@ -105,6 +105,14 @@ VDSO_CFLAGS += -D__uint128_t='void*'
 VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
 VDSO_CFLAGS += -Wno-int-to-pointer-cast
 
+# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is
+# unreliable.
+ifeq ($(CONFIG_THUMB2_COMPAT_VDSO), y)
+VDSO_CFLAGS += -mthumb -fomit-frame-pointer
+else
+VDSO_CFLAGS += -marm
+endif
+
 VDSO_AFLAGS := $(VDSO_CAFLAGS)
 VDSO_AFLAGS += -D__ASSEMBLY__
 
index 0a356aa..40a62a9 100644 (file)
@@ -33,6 +33,26 @@ static const u8 return_offsets[8][2] = {
        [7] = { 4, 4 },         /* FIQ, unused */
 };
 
+static bool pre_fault_synchronize(struct kvm_vcpu *vcpu)
+{
+       preempt_disable();
+       if (vcpu->arch.sysregs_loaded_on_cpu) {
+               kvm_arch_vcpu_put(vcpu);
+               return true;
+       }
+
+       preempt_enable();
+       return false;
+}
+
+static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded)
+{
+       if (loaded) {
+               kvm_arch_vcpu_load(vcpu, smp_processor_id());
+               preempt_enable();
+       }
+}
+
 /*
  * When an exception is taken, most CPSR fields are left unchanged in the
  * handler. However, some are explicitly overridden (e.g. M[4:0]).
@@ -155,7 +175,10 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 
 void kvm_inject_undef32(struct kvm_vcpu *vcpu)
 {
+       bool loaded = pre_fault_synchronize(vcpu);
+
        prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4);
+       post_fault_synchronize(vcpu, loaded);
 }
 
 /*
@@ -168,6 +191,9 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
        u32 vect_offset;
        u32 *far, *fsr;
        bool is_lpae;
+       bool loaded;
+
+       loaded = pre_fault_synchronize(vcpu);
 
        if (is_pabt) {
                vect_offset = 12;
@@ -191,6 +217,8 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
                /* no need to shuffle FS[4] into DFSR[10] as its 0 */
                *fsr = DFSR_FSC_EXTABT_nLPAE;
        }
+
+       post_fault_synchronize(vcpu, loaded);
 }
 
 void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr)
index 7a57381..90cb905 100644 (file)
@@ -144,11 +144,6 @@ out_fail_alloc:
        return ret;
 }
 
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 {
        return VM_FAULT_SIGBUS;
@@ -340,10 +335,8 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        int *last_ran;
-       kvm_host_data_t *cpu_data;
 
        last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
-       cpu_data = this_cpu_ptr(&kvm_host_data);
 
        /*
         * We might get preempted before the vCPU actually runs, but
@@ -355,7 +348,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        }
 
        vcpu->cpu = cpu;
-       vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
 
        kvm_vgic_load(vcpu);
        kvm_timer_vcpu_load(vcpu);
@@ -370,7 +362,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        else
                vcpu_set_wfx_traps(vcpu);
 
-       vcpu_ptrauth_setup_lazy(vcpu);
+       if (vcpu_has_ptrauth(vcpu))
+               vcpu_ptrauth_disable(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -990,11 +983,17 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
         * Ensure a rebooted VM will fault in RAM pages and detect if the
         * guest MMU is turned off and flush the caches as needed.
         *
-        * S2FWB enforces all memory accesses to RAM being cacheable, we
-        * ensure that the cache is always coherent.
+        * S2FWB enforces all memory accesses to RAM being cacheable,
+        * ensuring that the data side is always coherent. We still
+        * need to invalidate the I-cache though, as FWB does *not*
+        * imply CTR_EL0.DIC.
         */
-       if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
-               stage2_unmap_vm(vcpu->kvm);
+       if (vcpu->arch.has_run_once) {
+               if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+                       stage2_unmap_vm(vcpu->kvm);
+               else
+                       __flush_icache_all();
+       }
 
        vcpu_reset_hcr(vcpu);
 
index eb19469..5a02d4c 100644 (file)
@@ -162,40 +162,14 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return 1;
 }
 
-#define __ptrauth_save_key(regs, key)                                          \
-({                                                                             \
-       regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1);       \
-       regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1);       \
-})
-
-/*
- * Handle the guest trying to use a ptrauth instruction, or trying to access a
- * ptrauth register.
- */
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpu_context *ctxt;
-
-       if (vcpu_has_ptrauth(vcpu)) {
-               vcpu_ptrauth_enable(vcpu);
-               ctxt = vcpu->arch.host_cpu_context;
-               __ptrauth_save_key(ctxt->sys_regs, APIA);
-               __ptrauth_save_key(ctxt->sys_regs, APIB);
-               __ptrauth_save_key(ctxt->sys_regs, APDA);
-               __ptrauth_save_key(ctxt->sys_regs, APDB);
-               __ptrauth_save_key(ctxt->sys_regs, APGA);
-       } else {
-               kvm_inject_undefined(vcpu);
-       }
-}
-
 /*
  * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
- * a NOP).
+ * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
+ * that we can do is give the guest an UNDEF.
  */
 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-       kvm_arm_vcpu_ptrauth_trap(vcpu);
+       kvm_inject_undefined(vcpu);
        return 1;
 }
 
index 0fc9872..e95af20 100644 (file)
@@ -185,7 +185,7 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
                return;
 
-       host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+       host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
        guest_ctxt = &vcpu->arch.ctxt;
        host_dbg = &vcpu->arch.host_debug_state.regs;
        guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
@@ -207,7 +207,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
                return;
 
-       host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+       host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
        guest_ctxt = &vcpu->arch.ctxt;
        host_dbg = &vcpu->arch.host_debug_state.regs;
        guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
index 676b658..db1c448 100644 (file)
@@ -490,6 +490,64 @@ static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
        return true;
 }
 
+static bool __hyp_text esr_is_ptrauth_trap(u32 esr)
+{
+       u32 ec = ESR_ELx_EC(esr);
+
+       if (ec == ESR_ELx_EC_PAC)
+               return true;
+
+       if (ec != ESR_ELx_EC_SYS64)
+               return false;
+
+       switch (esr_sys64_to_sysreg(esr)) {
+       case SYS_APIAKEYLO_EL1:
+       case SYS_APIAKEYHI_EL1:
+       case SYS_APIBKEYLO_EL1:
+       case SYS_APIBKEYHI_EL1:
+       case SYS_APDAKEYLO_EL1:
+       case SYS_APDAKEYHI_EL1:
+       case SYS_APDBKEYLO_EL1:
+       case SYS_APDBKEYHI_EL1:
+       case SYS_APGAKEYLO_EL1:
+       case SYS_APGAKEYHI_EL1:
+               return true;
+       }
+
+       return false;
+}
+
+#define __ptrauth_save_key(regs, key)                                          \
+({                                                                             \
+       regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1);       \
+       regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1);       \
+})
+
+static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpu_context *ctxt;
+       u64 val;
+
+       if (!vcpu_has_ptrauth(vcpu) ||
+           !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu)))
+               return false;
+
+       ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+       __ptrauth_save_key(ctxt->sys_regs, APIA);
+       __ptrauth_save_key(ctxt->sys_regs, APIB);
+       __ptrauth_save_key(ctxt->sys_regs, APDA);
+       __ptrauth_save_key(ctxt->sys_regs, APDB);
+       __ptrauth_save_key(ctxt->sys_regs, APGA);
+
+       vcpu_ptrauth_enable(vcpu);
+
+       val = read_sysreg(hcr_el2);
+       val |= (HCR_API | HCR_APK);
+       write_sysreg(val, hcr_el2);
+
+       return true;
+}
+
 /*
  * Return true when we were able to fixup the guest exit and should return to
  * the guest, false when we should restore the host state and return to the
@@ -524,6 +582,9 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
        if (__hyp_handle_fpsimd(vcpu))
                return true;
 
+       if (__hyp_handle_ptrauth(vcpu))
+               return true;
+
        if (!__populate_fault_info(vcpu))
                return true;
 
@@ -642,7 +703,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
        struct kvm_cpu_context *guest_ctxt;
        u64 exit_code;
 
-       host_ctxt = vcpu->arch.host_cpu_context;
+       host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
        host_ctxt->__hyp_running_vcpu = vcpu;
        guest_ctxt = &vcpu->arch.ctxt;
 
@@ -747,7 +808,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
        vcpu = kern_hyp_va(vcpu);
 
-       host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+       host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
        host_ctxt->__hyp_running_vcpu = vcpu;
        guest_ctxt = &vcpu->arch.ctxt;
 
index ea5d22f..cc7e957 100644 (file)
@@ -39,7 +39,6 @@ static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
        ctxt->sys_regs[CSSELR_EL1]      = read_sysreg(csselr_el1);
        ctxt->sys_regs[SCTLR_EL1]       = read_sysreg_el1(SYS_SCTLR);
-       ctxt->sys_regs[ACTLR_EL1]       = read_sysreg(actlr_el1);
        ctxt->sys_regs[CPACR_EL1]       = read_sysreg_el1(SYS_CPACR);
        ctxt->sys_regs[TTBR0_EL1]       = read_sysreg_el1(SYS_TTBR0);
        ctxt->sys_regs[TTBR1_EL1]       = read_sysreg_el1(SYS_TTBR1);
@@ -123,7 +122,6 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
                isb();
        }
 
-       write_sysreg(ctxt->sys_regs[ACTLR_EL1],         actlr_el1);
        write_sysreg_el1(ctxt->sys_regs[CPACR_EL1],     SYS_CPACR);
        write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1],     SYS_TTBR0);
        write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1],     SYS_TTBR1);
@@ -267,12 +265,13 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
  */
 void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
 {
-       struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
        struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+       struct kvm_cpu_context *host_ctxt;
 
        if (!has_vhe())
                return;
 
+       host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
        __sysreg_save_user_state(host_ctxt);
 
        /*
@@ -303,12 +302,13 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
  */
 void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
 {
-       struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
        struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+       struct kvm_cpu_context *host_ctxt;
 
        if (!has_vhe())
                return;
 
+       host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
        deactivate_traps_vhe_put();
 
        __sysreg_save_el1_state(guest_ctxt);
index e71d00b..b5ae3a5 100644 (file)
@@ -163,15 +163,13 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
  */
 void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
 {
-       struct kvm_cpu_context *host_ctxt;
        struct kvm_host_data *host;
        u32 events_guest, events_host;
 
        if (!has_vhe())
                return;
 
-       host_ctxt = vcpu->arch.host_cpu_context;
-       host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+       host = this_cpu_ptr(&kvm_host_data);
        events_guest = host->pmu_events.events_guest;
        events_host = host->pmu_events.events_host;
 
@@ -184,15 +182,13 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
  */
 void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
 {
-       struct kvm_cpu_context *host_ctxt;
        struct kvm_host_data *host;
        u32 events_guest, events_host;
 
        if (!has_vhe())
                return;
 
-       host_ctxt = vcpu->arch.host_cpu_context;
-       host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+       host = this_cpu_ptr(&kvm_host_data);
        events_guest = host->pmu_events.events_guest;
        events_host = host->pmu_events.events_host;
 
index 8098543..baf5ce9 100644 (file)
@@ -78,7 +78,6 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
        switch (reg) {
        case CSSELR_EL1:        *val = read_sysreg_s(SYS_CSSELR_EL1);   break;
        case SCTLR_EL1:         *val = read_sysreg_s(SYS_SCTLR_EL12);   break;
-       case ACTLR_EL1:         *val = read_sysreg_s(SYS_ACTLR_EL1);    break;
        case CPACR_EL1:         *val = read_sysreg_s(SYS_CPACR_EL12);   break;
        case TTBR0_EL1:         *val = read_sysreg_s(SYS_TTBR0_EL12);   break;
        case TTBR1_EL1:         *val = read_sysreg_s(SYS_TTBR1_EL12);   break;
@@ -118,7 +117,6 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
        switch (reg) {
        case CSSELR_EL1:        write_sysreg_s(val, SYS_CSSELR_EL1);    break;
        case SCTLR_EL1:         write_sysreg_s(val, SYS_SCTLR_EL12);    break;
-       case ACTLR_EL1:         write_sysreg_s(val, SYS_ACTLR_EL1);     break;
        case CPACR_EL1:         write_sysreg_s(val, SYS_CPACR_EL12);    break;
        case TTBR0_EL1:         write_sysreg_s(val, SYS_TTBR0_EL12);    break;
        case TTBR1_EL1:         write_sysreg_s(val, SYS_TTBR1_EL12);    break;
@@ -1034,16 +1032,13 @@ static bool trap_ptrauth(struct kvm_vcpu *vcpu,
                         struct sys_reg_params *p,
                         const struct sys_reg_desc *rd)
 {
-       kvm_arm_vcpu_ptrauth_trap(vcpu);
-
        /*
-        * Return false for both cases as we never skip the trapped
-        * instruction:
-        *
-        * - Either we re-execute the same key register access instruction
-        *   after enabling ptrauth.
-        * - Or an UNDEF is injected as ptrauth is not supported/enabled.
+        * If we land here, that is because we didn't fixup the access on exit
+        * by allowing the PtrAuth sysregs. The only way this happens is when
+        * the guest does not have PtrAuth support enabled.
         */
+       kvm_inject_undefined(vcpu);
+
        return false;
 }
 
@@ -1319,10 +1314,16 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
                          const struct sys_reg_desc *r)
 {
+       int reg = r->reg;
+
+       /* See the 32bit mapping in kvm_host.h */
+       if (p->is_aarch32)
+               reg = r->reg / 2;
+
        if (p->is_write)
-               vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+               vcpu_write_sys_reg(vcpu, p->regval, reg);
        else
-               p->regval = vcpu_read_sys_reg(vcpu, r->reg);
+               p->regval = vcpu_read_sys_reg(vcpu, reg);
        return true;
 }
 
index 9cb6b4c..aa9d356 100644 (file)
@@ -27,6 +27,14 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
                return ignore_write(vcpu, p);
 
        p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
+
+       if (p->is_aarch32) {
+               if (r->Op2 & 2)
+                       p->regval = upper_32_bits(p->regval);
+               else
+                       p->regval = lower_32_bits(p->regval);
+       }
+
        return true;
 }
 
@@ -47,6 +55,8 @@ static const struct sys_reg_desc genericv8_cp15_regs[] = {
        /* ACTLR */
        { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
          access_actlr },
+       { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011),
+         access_actlr },
 };
 
 static struct kvm_sys_reg_target_table genericv8_target_table = {
index 62b0eb6..84eab0f 100644 (file)
@@ -216,8 +216,10 @@ static int __init mcf_pci_init(void)
 
        /* Keep a virtual mapping to IO/config space active */
        iospace = (unsigned long) ioremap(PCI_IO_PA, PCI_IO_SIZE);
-       if (iospace == 0)
+       if (iospace == 0) {
+               pci_free_host_bridge(bridge);
                return -ENODEV;
+       }
        pr_info("Coldfire: PCI IO/config window mapped to 0x%x\n",
                (u32) iospace);
 
index 27fa946..2b746f5 100644 (file)
@@ -48,7 +48,6 @@ CONFIG_MTD_CFI_STAA=y
 CONFIG_MTD_ROM=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PLATRAM=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
index a24cfe4..dcfb693 100644 (file)
@@ -42,7 +42,7 @@ static inline int _access_ok(unsigned long addr, unsigned long size)
        __put_user_asm(__pu_err, __pu_val, ptr, l);     \
        break;                                          \
     case 8:                                            \
-       memcpy(ptr, &__pu_val, sizeof (*(ptr))); \
+       memcpy((void __force *)ptr, &__pu_val, sizeof(*(ptr))); \
        break;                                          \
     default:                                           \
        __pu_err = __put_user_bad();                    \
@@ -60,7 +60,7 @@ extern int __put_user_bad(void);
  * aliasing issues.
  */
 
-#define __ptr(x) ((unsigned long *)(x))
+#define __ptr(x) ((unsigned long __user *)(x))
 
 #define __put_user_asm(err,x,ptr,bwl)                          \
        __asm__ ("move" #bwl " %0,%1"                           \
@@ -85,7 +85,7 @@ extern int __put_user_bad(void);
            u64 l;                                              \
            __typeof__(*(ptr)) t;                               \
        } __gu_val;                                             \
-       memcpy(&__gu_val.l, ptr, sizeof(__gu_val.l));           \
+       memcpy(&__gu_val.l, (const void __force *)ptr, sizeof(__gu_val.l)); \
        (x) = __gu_val.t;                                       \
        break;                                                  \
     }                                                          \
index eb1e86c..26c63e8 100644 (file)
@@ -1403,6 +1403,7 @@ config CPU_LOONGSON64
        select MIPS_L1_CACHE_SHIFT_6
        select GPIOLIB
        select SWIOTLB
+       select HAVE_KVM
        help
                The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor
                cores implements the MIPS64R2 instruction set with many extensions,
index caecbae..724dfdd 100644 (file)
 #ifndef cpu_guest_has_htw
 #define cpu_guest_has_htw      (cpu_data[0].guest.options & MIPS_CPU_HTW)
 #endif
+#ifndef cpu_guest_has_ldpte
+#define cpu_guest_has_ldpte    (cpu_data[0].guest.options & MIPS_CPU_LDPTE)
+#endif
 #ifndef cpu_guest_has_mvh
 #define cpu_guest_has_mvh      (cpu_data[0].guest.options & MIPS_CPU_MVH)
 #endif
index e28b5a9..363e7a8 100644 (file)
@@ -23,6 +23,8 @@
 #include <asm/inst.h>
 #include <asm/mipsregs.h>
 
+#include <kvm/iodev.h>
+
 /* MIPS KVM register ids */
 #define MIPS_CP0_32(_R, _S)                                    \
        (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
 #define KVM_REG_MIPS_CP0_CONFIG3       MIPS_CP0_32(16, 3)
 #define KVM_REG_MIPS_CP0_CONFIG4       MIPS_CP0_32(16, 4)
 #define KVM_REG_MIPS_CP0_CONFIG5       MIPS_CP0_32(16, 5)
+#define KVM_REG_MIPS_CP0_CONFIG6       MIPS_CP0_32(16, 6)
 #define KVM_REG_MIPS_CP0_CONFIG7       MIPS_CP0_32(16, 7)
 #define KVM_REG_MIPS_CP0_MAARI         MIPS_CP0_64(17, 2)
 #define KVM_REG_MIPS_CP0_XCONTEXT      MIPS_CP0_64(20, 0)
+#define KVM_REG_MIPS_CP0_DIAG          MIPS_CP0_32(22, 0)
 #define KVM_REG_MIPS_CP0_ERROREPC      MIPS_CP0_64(30, 0)
 #define KVM_REG_MIPS_CP0_KSCRATCH1     MIPS_CP0_64(31, 2)
 #define KVM_REG_MIPS_CP0_KSCRATCH2     MIPS_CP0_64(31, 3)
@@ -78,8 +82,8 @@
 #define KVM_REG_MIPS_CP0_KSCRATCH6     MIPS_CP0_64(31, 7)
 
 
-#define KVM_MAX_VCPUS          8
-#define KVM_USER_MEM_SLOTS     8
+#define KVM_MAX_VCPUS          16
+#define KVM_USER_MEM_SLOTS     16
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS  0
 
@@ -171,6 +175,9 @@ struct kvm_vcpu_stat {
        u64 vz_ghfc_exits;
        u64 vz_gpa_exits;
        u64 vz_resvd_exits;
+#ifdef CONFIG_CPU_LOONGSON64
+       u64 vz_cpucfg_exits;
+#endif
 #endif
        u64 halt_successful_poll;
        u64 halt_attempted_poll;
@@ -183,11 +190,39 @@ struct kvm_vcpu_stat {
 struct kvm_arch_memory_slot {
 };
 
+#ifdef CONFIG_CPU_LOONGSON64
+struct ipi_state {
+       uint32_t status;
+       uint32_t en;
+       uint32_t set;
+       uint32_t clear;
+       uint64_t buf[4];
+};
+
+struct loongson_kvm_ipi;
+
+struct ipi_io_device {
+       int node_id;
+       struct loongson_kvm_ipi *ipi;
+       struct kvm_io_device device;
+};
+
+struct loongson_kvm_ipi {
+       spinlock_t lock;
+       struct kvm *kvm;
+       struct ipi_state ipistate[16];
+       struct ipi_io_device dev_ipi[4];
+};
+#endif
+
 struct kvm_arch {
        /* Guest physical mm */
        struct mm_struct gpa_mm;
        /* Mask of CPUs needing GPA ASID flush */
        cpumask_t asid_flush_mask;
+#ifdef CONFIG_CPU_LOONGSON64
+       struct loongson_kvm_ipi ipi;
+#endif
 };
 
 #define N_MIPS_COPROC_REGS     32
@@ -225,6 +260,7 @@ struct mips_coproc {
 #define MIPS_CP0_WATCH_LO      18
 #define MIPS_CP0_WATCH_HI      19
 #define MIPS_CP0_TLB_XCONTEXT  20
+#define MIPS_CP0_DIAG          22
 #define MIPS_CP0_ECC           26
 #define MIPS_CP0_CACHE_ERR     27
 #define MIPS_CP0_TAG_LO                28
@@ -276,8 +312,12 @@ enum emulation_result {
 #define MIPS3_PG_SHIFT         6
 #define MIPS3_PG_FRAME         0x3fffffc0
 
+#if defined(CONFIG_64BIT)
+#define VPN2_MASK              GENMASK(cpu_vmbits - 1, 13)
+#else
 #define VPN2_MASK              0xffffe000
-#define KVM_ENTRYHI_ASID       MIPS_ENTRYHI_ASID
+#endif
+#define KVM_ENTRYHI_ASID       cpu_asid_mask(&boot_cpu_data)
 #define TLB_IS_GLOBAL(x)       ((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G)
 #define TLB_VPN2(x)            ((x).tlb_hi & VPN2_MASK)
 #define TLB_ASID(x)            ((x).tlb_hi & KVM_ENTRYHI_ASID)
@@ -892,6 +932,10 @@ void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
                          unsigned int count);
 void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
                          unsigned int count);
+#ifdef CONFIG_CPU_LOONGSON64
+void kvm_loongson_clear_guest_vtlb(void);
+void kvm_loongson_clear_guest_ftlb(void);
+#endif
 #endif
 
 void kvm_mips_suspend_mm(int cpu);
@@ -1131,6 +1175,8 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc,
 /* Misc */
 extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
+extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
+                            struct kvm_mips_interrupt *irq);
 
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
index 796dbb8..20d6d40 100644 (file)
 /* Disable Branch Return Cache */
 #define R10K_DIAG_D_BRC                (_ULCAST_(1) << 22)
 
+/* Flush BTB */
+#define LOONGSON_DIAG_BTB      (_ULCAST_(1) << 1)
 /* Flush ITLB */
 #define LOONGSON_DIAG_ITLB     (_ULCAST_(1) << 2)
 /* Flush DTLB */
@@ -2874,7 +2876,9 @@ __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
 __BUILD_SET_C0(config5)
+__BUILD_SET_C0(config6)
 __BUILD_SET_C0(config7)
+__BUILD_SET_C0(diag)
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)
index 98f97c8..43d1faa 100644 (file)
@@ -1012,6 +1012,16 @@ struct loongson3_lsdc2_format {  /* Loongson-3 overridden ldc2/sdc2 Load/Store fo
        ;))))))
 };
 
+struct loongson3_lscsr_format {        /* Loongson-3 CPUCFG&CSR read/write format */
+       __BITFIELD_FIELD(unsigned int opcode : 6,
+       __BITFIELD_FIELD(unsigned int rs : 5,
+       __BITFIELD_FIELD(unsigned int fr : 5,
+       __BITFIELD_FIELD(unsigned int rd : 5,
+       __BITFIELD_FIELD(unsigned int fd : 5,
+       __BITFIELD_FIELD(unsigned int func : 6,
+       ;))))))
+};
+
 /*
  * MIPS16e instruction formats (16-bit length)
  */
@@ -1114,6 +1124,7 @@ union mips_instruction {
        struct mm16_r5_format mm16_r5_format;
        struct loongson3_lswc2_format loongson3_lswc2_format;
        struct loongson3_lsdc2_format loongson3_lsdc2_format;
+       struct loongson3_lscsr_format loongson3_lscsr_format;
 };
 
 union mips16e_instruction {
index 6b93162..def1659 100644 (file)
@@ -2017,8 +2017,10 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c)
        if (cfg2 & LOONGSON_CFG2_LEXT2)
                c->ases |= MIPS_ASE_LOONGSON_EXT2;
 
-       if (cfg2 & LOONGSON_CFG2_LSPW)
+       if (cfg2 & LOONGSON_CFG2_LSPW) {
                c->options |= MIPS_CPU_LDPTE;
+               c->guest.options |= MIPS_CPU_LDPTE;
+       }
 
        if (cfg3 & LOONGSON_CFG3_LCAMP)
                c->ases |= MIPS_ASE_LOONGSON_CAM;
@@ -2074,6 +2076,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
                c->writecombine = _CACHE_UNCACHED_ACCELERATED;
                c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
                        MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
+               c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */
                break;
        case PRID_IMP_LOONGSON_64G:
                c->cputype = CPU_LOONGSON64;
index b91d145..d697752 100644 (file)
@@ -22,6 +22,7 @@ config KVM
        select EXPORT_UASM
        select PREEMPT_NOTIFIERS
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+       select HAVE_KVM_EVENTFD
        select HAVE_KVM_VCPU_ASYNC_IOCTL
        select KVM_MMIO
        select MMU_NOTIFIER
index 01affc1..506c4ac 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
@@ -13,6 +13,9 @@ kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
            fpu.o
 kvm-objs += hypcall.o
 kvm-objs += mmu.o
+ifdef CONFIG_CPU_LOONGSON64
+kvm-objs += loongson_ipi.o
+endif
 
 ifdef CONFIG_KVM_MIPS_VZ
 kvm-objs               += vz.o
index 7ccf9b0..5ae82d9 100644 (file)
@@ -1600,9 +1600,11 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
                                             struct kvm_run *run,
                                             struct kvm_vcpu *vcpu)
 {
+       int r;
        enum emulation_result er;
        u32 rt;
        void *data = run->mmio.data;
+       unsigned int imme;
        unsigned long curr_pc;
 
        /*
@@ -1660,15 +1662,229 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
                          vcpu->arch.gprs[rt], *(u8 *)data);
                break;
 
+       case swl_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x3);
+               run->mmio.len = 4;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x3;
+               switch (imme) {
+               case 0:
+                       *(u32 *)data = ((*(u32 *)data) & 0xffffff00) |
+                                       (vcpu->arch.gprs[rt] >> 24);
+                       break;
+               case 1:
+                       *(u32 *)data = ((*(u32 *)data) & 0xffff0000) |
+                                       (vcpu->arch.gprs[rt] >> 16);
+                       break;
+               case 2:
+                       *(u32 *)data = ((*(u32 *)data) & 0xff000000) |
+                                       (vcpu->arch.gprs[rt] >> 8);
+                       break;
+               case 3:
+                       *(u32 *)data = vcpu->arch.gprs[rt];
+                       break;
+               default:
+                       break;
+               }
+
+               kvm_debug("[%#lx] OP_SWL: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+                         vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                         vcpu->arch.gprs[rt], *(u32 *)data);
+               break;
+
+       case swr_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x3);
+               run->mmio.len = 4;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x3;
+               switch (imme) {
+               case 0:
+                       *(u32 *)data = vcpu->arch.gprs[rt];
+                       break;
+               case 1:
+                       *(u32 *)data = ((*(u32 *)data) & 0xff) |
+                                       (vcpu->arch.gprs[rt] << 8);
+                       break;
+               case 2:
+                       *(u32 *)data = ((*(u32 *)data) & 0xffff) |
+                                       (vcpu->arch.gprs[rt] << 16);
+                       break;
+               case 3:
+                       *(u32 *)data = ((*(u32 *)data) & 0xffffff) |
+                                       (vcpu->arch.gprs[rt] << 24);
+                       break;
+               default:
+                       break;
+               }
+
+               kvm_debug("[%#lx] OP_SWR: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+                         vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                         vcpu->arch.gprs[rt], *(u32 *)data);
+               break;
+
+       case sdl_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x7);
+
+               run->mmio.len = 8;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x7;
+               switch (imme) {
+               case 0:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffffffffff00) |
+                                       ((vcpu->arch.gprs[rt] >> 56) & 0xff);
+                       break;
+               case 1:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffffffff0000) |
+                                       ((vcpu->arch.gprs[rt] >> 48) & 0xffff);
+                       break;
+               case 2:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffffff000000) |
+                                       ((vcpu->arch.gprs[rt] >> 40) & 0xffffff);
+                       break;
+               case 3:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffff00000000) |
+                                       ((vcpu->arch.gprs[rt] >> 32) & 0xffffffff);
+                       break;
+               case 4:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffff0000000000) |
+                                       ((vcpu->arch.gprs[rt] >> 24) & 0xffffffffff);
+                       break;
+               case 5:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffff000000000000) |
+                                       ((vcpu->arch.gprs[rt] >> 16) & 0xffffffffffff);
+                       break;
+               case 6:
+                       *(u64 *)data = ((*(u64 *)data) & 0xff00000000000000) |
+                                       ((vcpu->arch.gprs[rt] >> 8) & 0xffffffffffffff);
+                       break;
+               case 7:
+                       *(u64 *)data = vcpu->arch.gprs[rt];
+                       break;
+               default:
+                       break;
+               }
+
+               kvm_debug("[%#lx] OP_SDL: eaddr: %#lx, gpr: %#lx, data: %llx\n",
+                         vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                         vcpu->arch.gprs[rt], *(u64 *)data);
+               break;
+
+       case sdr_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x7);
+
+               run->mmio.len = 8;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x7;
+               switch (imme) {
+               case 0:
+                       *(u64 *)data = vcpu->arch.gprs[rt];
+                       break;
+               case 1:
+                       *(u64 *)data = ((*(u64 *)data) & 0xff) |
+                                       (vcpu->arch.gprs[rt] << 8);
+                       break;
+               case 2:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffff) |
+                                       (vcpu->arch.gprs[rt] << 16);
+                       break;
+               case 3:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffff) |
+                                       (vcpu->arch.gprs[rt] << 24);
+                       break;
+               case 4:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffff) |
+                                       (vcpu->arch.gprs[rt] << 32);
+                       break;
+               case 5:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffffff) |
+                                       (vcpu->arch.gprs[rt] << 40);
+                       break;
+               case 6:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffffffff) |
+                                       (vcpu->arch.gprs[rt] << 48);
+                       break;
+               case 7:
+                       *(u64 *)data = ((*(u64 *)data) & 0xffffffffffffff) |
+                                       (vcpu->arch.gprs[rt] << 56);
+                       break;
+               default:
+                       break;
+               }
+
+               kvm_debug("[%#lx] OP_SDR: eaddr: %#lx, gpr: %#lx, data: %llx\n",
+                         vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                         vcpu->arch.gprs[rt], *(u64 *)data);
+               break;
+
+#ifdef CONFIG_CPU_LOONGSON64
+       case sdc2_op:
+               rt = inst.loongson3_lsdc2_format.rt;
+               switch (inst.loongson3_lsdc2_format.opcode1) {
+               /*
+                * Loongson-3 overridden sdc2 instructions.
+                * opcode1              instruction
+                *   0x0          gssbx: store 1 bytes from GPR
+                *   0x1          gsshx: store 2 bytes from GPR
+                *   0x2          gsswx: store 4 bytes from GPR
+                *   0x3          gssdx: store 8 bytes from GPR
+                */
+               case 0x0:
+                       run->mmio.len = 1;
+                       *(u8 *)data = vcpu->arch.gprs[rt];
+
+                       kvm_debug("[%#lx] OP_GSSBX: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+                                 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                                 vcpu->arch.gprs[rt], *(u8 *)data);
+                       break;
+               case 0x1:
+                       run->mmio.len = 2;
+                       *(u16 *)data = vcpu->arch.gprs[rt];
+
+                       kvm_debug("[%#lx] OP_GSSSHX: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+                                 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                                 vcpu->arch.gprs[rt], *(u16 *)data);
+                       break;
+               case 0x2:
+                       run->mmio.len = 4;
+                       *(u32 *)data = vcpu->arch.gprs[rt];
+
+                       kvm_debug("[%#lx] OP_GSSWX: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+                                 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                                 vcpu->arch.gprs[rt], *(u32 *)data);
+                       break;
+               case 0x3:
+                       run->mmio.len = 8;
+                       *(u64 *)data = vcpu->arch.gprs[rt];
+
+                       kvm_debug("[%#lx] OP_GSSDX: eaddr: %#lx, gpr: %#lx, data: %#llx\n",
+                                 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+                                 vcpu->arch.gprs[rt], *(u64 *)data);
+                       break;
+               default:
+                       kvm_err("Godson Exteneded GS-Store not yet supported (inst=0x%08x)\n",
+                               inst.word);
+                       break;
+               }
+               break;
+#endif
        default:
                kvm_err("Store not yet supported (inst=0x%08x)\n",
                        inst.word);
                goto out_fail;
        }
 
-       run->mmio.is_write = 1;
        vcpu->mmio_needed = 1;
+       run->mmio.is_write = 1;
        vcpu->mmio_is_write = 1;
+
+       r = kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
+                       run->mmio.phys_addr, run->mmio.len, data);
+
+       if (!r) {
+               vcpu->mmio_needed = 0;
+               return EMULATE_DONE;
+       }
+
        return EMULATE_DO_MMIO;
 
 out_fail:
@@ -1681,9 +1897,11 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
                                            u32 cause, struct kvm_run *run,
                                            struct kvm_vcpu *vcpu)
 {
+       int r;
        enum emulation_result er;
        unsigned long curr_pc;
        u32 op, rt;
+       unsigned int imme;
 
        rt = inst.i_format.rt;
        op = inst.i_format.opcode;
@@ -1736,6 +1954,162 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
                run->mmio.len = 1;
                break;
 
+       case lwl_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x3);
+
+               run->mmio.len = 4;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x3;
+               switch (imme) {
+               case 0:
+                       vcpu->mmio_needed = 3;  /* 1 byte */
+                       break;
+               case 1:
+                       vcpu->mmio_needed = 4;  /* 2 bytes */
+                       break;
+               case 2:
+                       vcpu->mmio_needed = 5;  /* 3 bytes */
+                       break;
+               case 3:
+                       vcpu->mmio_needed = 6;  /* 4 bytes */
+                       break;
+               default:
+                       break;
+               }
+               break;
+
+       case lwr_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x3);
+
+               run->mmio.len = 4;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x3;
+               switch (imme) {
+               case 0:
+                       vcpu->mmio_needed = 7;  /* 4 bytes */
+                       break;
+               case 1:
+                       vcpu->mmio_needed = 8;  /* 3 bytes */
+                       break;
+               case 2:
+                       vcpu->mmio_needed = 9;  /* 2 bytes */
+                       break;
+               case 3:
+                       vcpu->mmio_needed = 10; /* 1 byte */
+                       break;
+               default:
+                       break;
+               }
+               break;
+
+       case ldl_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x7);
+
+               run->mmio.len = 8;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x7;
+               switch (imme) {
+               case 0:
+                       vcpu->mmio_needed = 11; /* 1 byte */
+                       break;
+               case 1:
+                       vcpu->mmio_needed = 12; /* 2 bytes */
+                       break;
+               case 2:
+                       vcpu->mmio_needed = 13; /* 3 bytes */
+                       break;
+               case 3:
+                       vcpu->mmio_needed = 14; /* 4 bytes */
+                       break;
+               case 4:
+                       vcpu->mmio_needed = 15; /* 5 bytes */
+                       break;
+               case 5:
+                       vcpu->mmio_needed = 16; /* 6 bytes */
+                       break;
+               case 6:
+                       vcpu->mmio_needed = 17; /* 7 bytes */
+                       break;
+               case 7:
+                       vcpu->mmio_needed = 18; /* 8 bytes */
+                       break;
+               default:
+                       break;
+               }
+               break;
+
+       case ldr_op:
+               run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+                                       vcpu->arch.host_cp0_badvaddr) & (~0x7);
+
+               run->mmio.len = 8;
+               imme = vcpu->arch.host_cp0_badvaddr & 0x7;
+               switch (imme) {
+               case 0:
+                       vcpu->mmio_needed = 19; /* 8 bytes */
+                       break;
+               case 1:
+                       vcpu->mmio_needed = 20; /* 7 bytes */
+                       break;
+               case 2:
+                       vcpu->mmio_needed = 21; /* 6 bytes */
+                       break;
+               case 3:
+                       vcpu->mmio_needed = 22; /* 5 bytes */
+                       break;
+               case 4:
+                       vcpu->mmio_needed = 23; /* 4 bytes */
+                       break;
+               case 5:
+                       vcpu->mmio_needed = 24; /* 3 bytes */
+                       break;
+               case 6:
+                       vcpu->mmio_needed = 25; /* 2 bytes */
+                       break;
+               case 7:
+                       vcpu->mmio_needed = 26; /* 1 byte */
+                       break;
+               default:
+                       break;
+               }
+               break;
+
+#ifdef CONFIG_CPU_LOONGSON64
+       case ldc2_op:
+               rt = inst.loongson3_lsdc2_format.rt;
+               switch (inst.loongson3_lsdc2_format.opcode1) {
+               /*
+                * Loongson-3 overridden ldc2 instructions.
+                * opcode1              instruction
+                *   0x0          gslbx: store 1 bytes from GPR
+                *   0x1          gslhx: store 2 bytes from GPR
+                *   0x2          gslwx: store 4 bytes from GPR
+                *   0x3          gsldx: store 8 bytes from GPR
+                */
+               case 0x0:
+                       run->mmio.len = 1;
+                       vcpu->mmio_needed = 27; /* signed */
+                       break;
+               case 0x1:
+                       run->mmio.len = 2;
+                       vcpu->mmio_needed = 28; /* signed */
+                       break;
+               case 0x2:
+                       run->mmio.len = 4;
+                       vcpu->mmio_needed = 29; /* signed */
+                       break;
+               case 0x3:
+                       run->mmio.len = 8;
+                       vcpu->mmio_needed = 30; /* signed */
+                       break;
+               default:
+                       kvm_err("Godson Exteneded GS-Load for float not yet supported (inst=0x%08x)\n",
+                               inst.word);
+                       break;
+               }
+               break;
+#endif
+
        default:
                kvm_err("Load not yet supported (inst=0x%08x)\n",
                        inst.word);
@@ -1745,6 +2119,16 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
 
        run->mmio.is_write = 0;
        vcpu->mmio_is_write = 0;
+
+       r = kvm_io_bus_read(vcpu, KVM_MMIO_BUS,
+                       run->mmio.phys_addr, run->mmio.len, run->mmio.data);
+
+       if (!r) {
+               kvm_mips_complete_mmio_load(vcpu, run);
+               vcpu->mmio_needed = 0;
+               return EMULATE_DONE;
+       }
+
        return EMULATE_DO_MMIO;
 }
 
@@ -2591,28 +2975,125 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 
        switch (run->mmio.len) {
        case 8:
-               *gpr = *(s64 *)run->mmio.data;
+               switch (vcpu->mmio_needed) {
+               case 11:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffffff) |
+                               (((*(s64 *)run->mmio.data) & 0xff) << 56);
+                       break;
+               case 12:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffff) |
+                               (((*(s64 *)run->mmio.data) & 0xffff) << 48);
+                       break;
+               case 13:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffff) |
+                               (((*(s64 *)run->mmio.data) & 0xffffff) << 40);
+                       break;
+               case 14:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffff) |
+                               (((*(s64 *)run->mmio.data) & 0xffffffff) << 32);
+                       break;
+               case 15:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff) |
+                               (((*(s64 *)run->mmio.data) & 0xffffffffff) << 24);
+                       break;
+               case 16:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff) |
+                               (((*(s64 *)run->mmio.data) & 0xffffffffffff) << 16);
+                       break;
+               case 17:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff) |
+                               (((*(s64 *)run->mmio.data) & 0xffffffffffffff) << 8);
+                       break;
+               case 18:
+               case 19:
+                       *gpr = *(s64 *)run->mmio.data;
+                       break;
+               case 20:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff00000000000000) |
+                               ((((*(s64 *)run->mmio.data)) >> 8) & 0xffffffffffffff);
+                       break;
+               case 21:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff000000000000) |
+                               ((((*(s64 *)run->mmio.data)) >> 16) & 0xffffffffffff);
+                       break;
+               case 22:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff0000000000) |
+                               ((((*(s64 *)run->mmio.data)) >> 24) & 0xffffffffff);
+                       break;
+               case 23:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffff00000000) |
+                               ((((*(s64 *)run->mmio.data)) >> 32) & 0xffffffff);
+                       break;
+               case 24:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffff000000) |
+                               ((((*(s64 *)run->mmio.data)) >> 40) & 0xffffff);
+                       break;
+               case 25:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffff0000) |
+                               ((((*(s64 *)run->mmio.data)) >> 48) & 0xffff);
+                       break;
+               case 26:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffffffffffff00) |
+                               ((((*(s64 *)run->mmio.data)) >> 56) & 0xff);
+                       break;
+               default:
+                       *gpr = *(s64 *)run->mmio.data;
+               }
                break;
 
        case 4:
-               if (vcpu->mmio_needed == 2)
-                       *gpr = *(s32 *)run->mmio.data;
-               else
+               switch (vcpu->mmio_needed) {
+               case 1:
                        *gpr = *(u32 *)run->mmio.data;
+                       break;
+               case 2:
+                       *gpr = *(s32 *)run->mmio.data;
+                       break;
+               case 3:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff) |
+                               (((*(s32 *)run->mmio.data) & 0xff) << 24);
+                       break;
+               case 4:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff) |
+                               (((*(s32 *)run->mmio.data) & 0xffff) << 16);
+                       break;
+               case 5:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff) |
+                               (((*(s32 *)run->mmio.data) & 0xffffff) << 8);
+                       break;
+               case 6:
+               case 7:
+                       *gpr = *(s32 *)run->mmio.data;
+                       break;
+               case 8:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xff000000) |
+                               ((((*(s32 *)run->mmio.data)) >> 8) & 0xffffff);
+                       break;
+               case 9:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffff0000) |
+                               ((((*(s32 *)run->mmio.data)) >> 16) & 0xffff);
+                       break;
+               case 10:
+                       *gpr = (vcpu->arch.gprs[vcpu->arch.io_gpr] & 0xffffff00) |
+                               ((((*(s32 *)run->mmio.data)) >> 24) & 0xff);
+                       break;
+               default:
+                       *gpr = *(s32 *)run->mmio.data;
+               }
                break;
 
        case 2:
-               if (vcpu->mmio_needed == 2)
-                       *gpr = *(s16 *) run->mmio.data;
-               else
+               if (vcpu->mmio_needed == 1)
                        *gpr = *(u16 *)run->mmio.data;
+               else
+                       *gpr = *(s16 *)run->mmio.data;
 
                break;
        case 1:
-               if (vcpu->mmio_needed == 2)
-                       *gpr = *(s8 *) run->mmio.data;
+               if (vcpu->mmio_needed == 1)
+                       *gpr = *(u8 *)run->mmio.data;
                else
-                       *gpr = *(u8 *) run->mmio.data;
+                       *gpr = *(s8 *)run->mmio.data;
                break;
        }
 
index 16e1c93..fd71694 100644 (file)
@@ -56,6 +56,7 @@
 #define C0_BADVADDR    8, 0
 #define C0_BADINSTR    8, 1
 #define C0_BADINSTRP   8, 2
+#define C0_PGD         9, 7
 #define C0_ENTRYHI     10, 0
 #define C0_GUESTCTL1   10, 4
 #define C0_STATUS      12, 0
@@ -307,7 +308,10 @@ static void *kvm_mips_build_enter_guest(void *addr)
 
 #ifdef CONFIG_KVM_MIPS_VZ
        /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */
-       UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg);
+       if (cpu_has_ldpte)
+               UASM_i_MFC0(&p, K0, C0_PWBASE);
+       else
+               UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg);
        UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1);
 
        /*
@@ -469,8 +473,10 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
        u32 *p = addr;
        struct uasm_label labels[2];
        struct uasm_reloc relocs[2];
+#ifndef CONFIG_CPU_LOONGSON64
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
+#endif
 
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
@@ -490,6 +496,16 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
         */
        preempt_disable();
 
+#ifdef CONFIG_CPU_LOONGSON64
+       UASM_i_MFC0(&p, K1, C0_PGD);
+       uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
+#ifndef __PAGETABLE_PMD_FOLDED
+       uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
+#endif
+       uasm_i_ldpte(&p, K1, 0);      /* even */
+       uasm_i_ldpte(&p, K1, 1);      /* odd */
+       uasm_i_tlbwr(&p);
+#else
        /*
         * Now for the actual refill bit. A lot of this can be common with the
         * Linux TLB refill handler, however we don't need to handle so many
@@ -512,6 +528,7 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
        build_get_ptep(&p, K0, K1);
        build_update_entries(&p, K0, K1);
        build_tlb_write_entry(&p, &l, &r, tlb_random);
+#endif
 
        preempt_enable();
 
index 7257e8b..d28c2c9 100644 (file)
@@ -61,27 +61,8 @@ void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu,
         * the EXC code will be set when we are actually
         * delivering the interrupt:
         */
-       switch (intr) {
-       case 2:
-               kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0));
-               /* Queue up an INT exception for the core */
-               kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IO);
-               break;
-
-       case 3:
-               kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ1));
-               kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IPI_1);
-               break;
-
-       case 4:
-               kvm_set_c0_guest_cause(vcpu->arch.cop0, (C_IRQ2));
-               kvm_mips_queue_irq(vcpu, MIPS_EXC_INT_IPI_2);
-               break;
-
-       default:
-               break;
-       }
-
+       kvm_set_c0_guest_cause(vcpu->arch.cop0, 1 << (intr + 8));
+       kvm_mips_queue_irq(vcpu, kvm_irq_to_priority(intr));
 }
 
 void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
@@ -89,26 +70,8 @@ void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
 {
        int intr = (int)irq->irq;
 
-       switch (intr) {
-       case -2:
-               kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ0));
-               kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IO);
-               break;
-
-       case -3:
-               kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ1));
-               kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1);
-               break;
-
-       case -4:
-               kvm_clear_c0_guest_cause(vcpu->arch.cop0, (C_IRQ2));
-               kvm_mips_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2);
-               break;
-
-       default:
-               break;
-       }
-
+       kvm_clear_c0_guest_cause(vcpu->arch.cop0, 1 << (-intr + 8));
+       kvm_mips_dequeue_irq(vcpu, kvm_irq_to_priority(-intr));
 }
 
 /* Deliver the interrupt of the corresponding priority, if possible. */
@@ -116,50 +79,20 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                            u32 cause)
 {
        int allowed = 0;
-       u32 exccode;
+       u32 exccode, ie;
 
        struct kvm_vcpu_arch *arch = &vcpu->arch;
        struct mips_coproc *cop0 = vcpu->arch.cop0;
 
-       switch (priority) {
-       case MIPS_EXC_INT_TIMER:
-               if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
-                   && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
-                   && (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) {
-                       allowed = 1;
-                       exccode = EXCCODE_INT;
-               }
-               break;
-
-       case MIPS_EXC_INT_IO:
-               if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
-                   && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
-                   && (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) {
-                       allowed = 1;
-                       exccode = EXCCODE_INT;
-               }
-               break;
-
-       case MIPS_EXC_INT_IPI_1:
-               if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
-                   && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
-                   && (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) {
-                       allowed = 1;
-                       exccode = EXCCODE_INT;
-               }
-               break;
-
-       case MIPS_EXC_INT_IPI_2:
-               if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
-                   && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
-                   && (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) {
-                       allowed = 1;
-                       exccode = EXCCODE_INT;
-               }
-               break;
+       if (priority == MIPS_EXC_MAX)
+               return 0;
 
-       default:
-               break;
+       ie = 1 << (kvm_priority_to_irq[priority] + 8);
+       if ((kvm_read_c0_guest_status(cop0) & ST0_IE)
+           && (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
+           && (kvm_read_c0_guest_status(cop0) & ie)) {
+               allowed = 1;
+               exccode = EXCCODE_INT;
        }
 
        /* Are we allowed to deliver the interrupt ??? */
index 3bf0a49..c3e878c 100644 (file)
 #define MIPS_EXC_NMI                5
 #define MIPS_EXC_MCHK               6
 #define MIPS_EXC_INT_TIMER          7
-#define MIPS_EXC_INT_IO             8
-#define MIPS_EXC_EXECUTE            9
-#define MIPS_EXC_INT_IPI_1          10
-#define MIPS_EXC_INT_IPI_2          11
-#define MIPS_EXC_MAX                12
+#define MIPS_EXC_INT_IO_1           8
+#define MIPS_EXC_INT_IO_2           9
+#define MIPS_EXC_EXECUTE            10
+#define MIPS_EXC_INT_IPI_1          11
+#define MIPS_EXC_INT_IPI_2          12
+#define MIPS_EXC_MAX                13
 /* XXXSL More to follow */
 
 #define C_TI        (_ULCAST_(1) << 30)
@@ -38,6 +39,9 @@
 #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE   (0)
 #endif
 
+extern u32 *kvm_priority_to_irq;
+u32 kvm_irq_to_priority(u32 irq);
+
 void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
 void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
 int kvm_mips_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/kvm/loongson_ipi.c b/arch/mips/kvm/loongson_ipi.c
new file mode 100644 (file)
index 0000000..3681fc8
--- /dev/null
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Loongson-3 Virtual IPI interrupt support.
+ *
+ * Copyright (C) 2019  Loongson Technologies, Inc.  All rights reserved.
+ *
+ * Authors: Chen Zhu <zhuchen@loongson.cn>
+ * Authors: Huacai Chen <chenhc@lemote.com>
+ */
+
+#include <linux/kvm_host.h>
+
+#define IPI_BASE            0x3ff01000ULL
+
+#define CORE0_STATUS_OFF       0x000
+#define CORE0_EN_OFF           0x004
+#define CORE0_SET_OFF          0x008
+#define CORE0_CLEAR_OFF        0x00c
+#define CORE0_BUF_20           0x020
+#define CORE0_BUF_28           0x028
+#define CORE0_BUF_30           0x030
+#define CORE0_BUF_38           0x038
+
+#define CORE1_STATUS_OFF       0x100
+#define CORE1_EN_OFF           0x104
+#define CORE1_SET_OFF          0x108
+#define CORE1_CLEAR_OFF        0x10c
+#define CORE1_BUF_20           0x120
+#define CORE1_BUF_28           0x128
+#define CORE1_BUF_30           0x130
+#define CORE1_BUF_38           0x138
+
+#define CORE2_STATUS_OFF       0x200
+#define CORE2_EN_OFF           0x204
+#define CORE2_SET_OFF          0x208
+#define CORE2_CLEAR_OFF        0x20c
+#define CORE2_BUF_20           0x220
+#define CORE2_BUF_28           0x228
+#define CORE2_BUF_30           0x230
+#define CORE2_BUF_38           0x238
+
+#define CORE3_STATUS_OFF       0x300
+#define CORE3_EN_OFF           0x304
+#define CORE3_SET_OFF          0x308
+#define CORE3_CLEAR_OFF        0x30c
+#define CORE3_BUF_20           0x320
+#define CORE3_BUF_28           0x328
+#define CORE3_BUF_30           0x330
+#define CORE3_BUF_38           0x338
+
+static int loongson_vipi_read(struct loongson_kvm_ipi *ipi,
+                               gpa_t addr, int len, void *val)
+{
+       uint32_t core = (addr >> 8) & 3;
+       uint32_t node = (addr >> 44) & 3;
+       uint32_t id = core + node * 4;
+       uint64_t offset = addr & 0xff;
+       void *pbuf;
+       struct ipi_state *s = &(ipi->ipistate[id]);
+
+       BUG_ON(offset & (len - 1));
+
+       switch (offset) {
+       case CORE0_STATUS_OFF:
+               *(uint64_t *)val = s->status;
+               break;
+
+       case CORE0_EN_OFF:
+               *(uint64_t *)val = s->en;
+               break;
+
+       case CORE0_SET_OFF:
+               *(uint64_t *)val = 0;
+               break;
+
+       case CORE0_CLEAR_OFF:
+               *(uint64_t *)val = 0;
+               break;
+
+       case CORE0_BUF_20 ... CORE0_BUF_38:
+               pbuf = (void *)s->buf + (offset - 0x20);
+               if (len == 8)
+                       *(uint64_t *)val = *(uint64_t *)pbuf;
+               else /* Assume len == 4 */
+                       *(uint32_t *)val = *(uint32_t *)pbuf;
+               break;
+
+       default:
+               pr_notice("%s with unknown addr %llx\n", __func__, addr);
+               break;
+       }
+
+       return 0;
+}
+
+static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
+                               gpa_t addr, int len, const void *val)
+{
+       uint32_t core = (addr >> 8) & 3;
+       uint32_t node = (addr >> 44) & 3;
+       uint32_t id = core + node * 4;
+       uint64_t data, offset = addr & 0xff;
+       void *pbuf;
+       struct kvm *kvm = ipi->kvm;
+       struct kvm_mips_interrupt irq;
+       struct ipi_state *s = &(ipi->ipistate[id]);
+
+       data = *(uint64_t *)val;
+       BUG_ON(offset & (len - 1));
+
+       switch (offset) {
+       case CORE0_STATUS_OFF:
+               break;
+
+       case CORE0_EN_OFF:
+               s->en = data;
+               break;
+
+       case CORE0_SET_OFF:
+               s->status |= data;
+               irq.cpu = id;
+               irq.irq = 6;
+               kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
+               break;
+
+       case CORE0_CLEAR_OFF:
+               s->status &= ~data;
+               if (!s->status) {
+                       irq.cpu = id;
+                       irq.irq = -6;
+                       kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
+               }
+               break;
+
+       case CORE0_BUF_20 ... CORE0_BUF_38:
+               pbuf = (void *)s->buf + (offset - 0x20);
+               if (len == 8)
+                       *(uint64_t *)pbuf = (uint64_t)data;
+               else /* Assume len == 4 */
+                       *(uint32_t *)pbuf = (uint32_t)data;
+               break;
+
+       default:
+               pr_notice("%s with unknown addr %llx\n", __func__, addr);
+               break;
+       }
+
+       return 0;
+}
+
+static int kvm_ipi_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+                       gpa_t addr, int len, void *val)
+{
+       unsigned long flags;
+       struct loongson_kvm_ipi *ipi;
+       struct ipi_io_device *ipi_device;
+
+       ipi_device = container_of(dev, struct ipi_io_device, device);
+       ipi = ipi_device->ipi;
+
+       spin_lock_irqsave(&ipi->lock, flags);
+       loongson_vipi_read(ipi, addr, len, val);
+       spin_unlock_irqrestore(&ipi->lock, flags);
+
+       return 0;
+}
+
+static int kvm_ipi_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+                       gpa_t addr, int len, const void *val)
+{
+       unsigned long flags;
+       struct loongson_kvm_ipi *ipi;
+       struct ipi_io_device *ipi_device;
+
+       ipi_device = container_of(dev, struct ipi_io_device, device);
+       ipi = ipi_device->ipi;
+
+       spin_lock_irqsave(&ipi->lock, flags);
+       loongson_vipi_write(ipi, addr, len, val);
+       spin_unlock_irqrestore(&ipi->lock, flags);
+
+       return 0;
+}
+
+static const struct kvm_io_device_ops kvm_ipi_ops = {
+       .read     = kvm_ipi_read,
+       .write    = kvm_ipi_write,
+};
+
+void kvm_init_loongson_ipi(struct kvm *kvm)
+{
+       int i;
+       unsigned long addr;
+       struct loongson_kvm_ipi *s;
+       struct kvm_io_device *device;
+
+       s = &kvm->arch.ipi;
+       s->kvm = kvm;
+       spin_lock_init(&s->lock);
+
+       /*
+        * Initialize IPI device
+        */
+       for (i = 0; i < 4; i++) {
+               device = &s->dev_ipi[i].device;
+               kvm_iodevice_init(device, &kvm_ipi_ops);
+               addr = (((unsigned long)i) << 44) + IPI_BASE;
+               mutex_lock(&kvm->slots_lock);
+               kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, 0x400, device);
+               mutex_unlock(&kvm->slots_lock);
+               s->dev_ipi[i].ipi = s;
+               s->dev_ipi[i].node_id = i;
+       }
+}
index 2416fa4..521bd58 100644 (file)
@@ -67,6 +67,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        VCPU_STAT("vz_ghfc", vz_ghfc_exits),
        VCPU_STAT("vz_gpa", vz_gpa_exits),
        VCPU_STAT("vz_resvd", vz_resvd_exits),
+       VCPU_STAT("vz_cpucfg", vz_cpucfg_exits),
 #endif
        VCPU_STAT("halt_successful_poll", halt_successful_poll),
        VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
@@ -129,6 +130,8 @@ int kvm_arch_check_processor_compat(void *opaque)
        return 0;
 }
 
+extern void kvm_init_loongson_ipi(struct kvm *kvm);
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
        switch (type) {
@@ -148,6 +151,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        if (!kvm->arch.gpa_mm.pgd)
                return -ENOMEM;
 
+#ifdef CONFIG_CPU_LOONGSON64
+       kvm_init_loongson_ipi(kvm);
+#endif
+
        return 0;
 }
 
@@ -490,7 +497,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
        int intr = (int)irq->irq;
        struct kvm_vcpu *dvcpu = NULL;
 
-       if (intr == 3 || intr == -3 || intr == 4 || intr == -4)
+       if (intr == kvm_priority_to_irq[MIPS_EXC_INT_IPI_1] ||
+           intr == kvm_priority_to_irq[MIPS_EXC_INT_IPI_2] ||
+           intr == (-kvm_priority_to_irq[MIPS_EXC_INT_IPI_1]) ||
+           intr == (-kvm_priority_to_irq[MIPS_EXC_INT_IPI_2]))
                kvm_debug("%s: CPU: %d, INTR: %d\n", __func__, irq->cpu,
                          (int)intr);
 
@@ -499,10 +509,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
        else
                dvcpu = vcpu->kvm->vcpus[irq->cpu];
 
-       if (intr == 2 || intr == 3 || intr == 4) {
+       if (intr == 2 || intr == 3 || intr == 4 || intr == 6) {
                kvm_mips_callbacks->queue_io_int(dvcpu, irq);
 
-       } else if (intr == -2 || intr == -3 || intr == -4) {
+       } else if (intr == -2 || intr == -3 || intr == -4 || intr == -6) {
                kvm_mips_callbacks->dequeue_io_int(dvcpu, irq);
        } else {
                kvm_err("%s: invalid interrupt ioctl (%d:%d)\n", __func__,
@@ -1620,6 +1630,34 @@ static struct notifier_block kvm_mips_csr_die_notifier = {
        .notifier_call = kvm_mips_csr_die_notify,
 };
 
+static u32 kvm_default_priority_to_irq[MIPS_EXC_MAX] = {
+       [MIPS_EXC_INT_TIMER] = C_IRQ5,
+       [MIPS_EXC_INT_IO_1]  = C_IRQ0,
+       [MIPS_EXC_INT_IPI_1] = C_IRQ1,
+       [MIPS_EXC_INT_IPI_2] = C_IRQ2,
+};
+
+static u32 kvm_loongson3_priority_to_irq[MIPS_EXC_MAX] = {
+       [MIPS_EXC_INT_TIMER] = C_IRQ5,
+       [MIPS_EXC_INT_IO_1]  = C_IRQ0,
+       [MIPS_EXC_INT_IO_2]  = C_IRQ1,
+       [MIPS_EXC_INT_IPI_1] = C_IRQ4,
+};
+
+u32 *kvm_priority_to_irq = kvm_default_priority_to_irq;
+
+u32 kvm_irq_to_priority(u32 irq)
+{
+       int i;
+
+       for (i = MIPS_EXC_INT_TIMER; i < MIPS_EXC_MAX; i++) {
+               if (kvm_priority_to_irq[i] == (1 << (irq + 8)))
+                       return i;
+       }
+
+       return MIPS_EXC_MAX;
+}
+
 static int __init kvm_mips_init(void)
 {
        int ret;
@@ -1638,6 +1676,9 @@ static int __init kvm_mips_init(void)
        if (ret)
                return ret;
 
+       if (boot_cpu_type() == CPU_LOONGSON64)
+               kvm_priority_to_irq = kvm_loongson3_priority_to_irq;
+
        register_die_notifier(&kvm_mips_csr_die_notifier);
 
        return 0;
index fc8aee0..1c1fbce 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <asm/cpu.h>
 #include <asm/bootinfo.h>
+#include <asm/mipsregs.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
@@ -621,6 +622,46 @@ void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
 }
 EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb);
 
+#ifdef CONFIG_CPU_LOONGSON64
+void kvm_loongson_clear_guest_vtlb(void)
+{
+       int idx = read_gc0_index();
+
+       /* Set root GuestID for root probe and write of guest TLB entry */
+       set_root_gid_to_guest_gid();
+
+       write_gc0_index(0);
+       guest_tlbinvf();
+       write_gc0_index(idx);
+
+       clear_root_gid();
+       set_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
+}
+EXPORT_SYMBOL_GPL(kvm_loongson_clear_guest_vtlb);
+
+void kvm_loongson_clear_guest_ftlb(void)
+{
+       int i;
+       int idx = read_gc0_index();
+
+       /* Set root GuestID for root probe and write of guest TLB entry */
+       set_root_gid_to_guest_gid();
+
+       for (i = current_cpu_data.tlbsizevtlb;
+            i < (current_cpu_data.tlbsizevtlb +
+                    current_cpu_data.tlbsizeftlbsets);
+            i++) {
+               write_gc0_index(i);
+               guest_tlbinvf();
+       }
+       write_gc0_index(idx);
+
+       clear_root_gid();
+       set_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
+}
+EXPORT_SYMBOL_GPL(kvm_loongson_clear_guest_ftlb);
+#endif
+
 #endif
 
 /**
index 60763ef..34ad0b4 100644 (file)
@@ -529,6 +529,9 @@ static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_MIPS_TE:
                r = 1;
                break;
+       case KVM_CAP_IOEVENTFD:
+               r = 1;
+               break;
        default:
                r = 0;
                break;
index 51f5100..d9c462c 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kvm_host.h>
 
 #include "interrupt.h"
+#include "loongson_regs.h"
 
 #include "trace.h"
 
@@ -126,6 +127,11 @@ static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu)
        return mask;
 }
 
+static inline unsigned int kvm_vz_config6_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+       return MIPS_CONF6_LOONGSON_INTIMER | MIPS_CONF6_LOONGSON_EXTIMER;
+}
+
 /*
  * VZ optionally allows these additional Config bits to be written by root:
  * Config:     M, [MT]
@@ -180,6 +186,12 @@ static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu)
        return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP;
 }
 
+static inline unsigned int kvm_vz_config6_user_wrmask(struct kvm_vcpu *vcpu)
+{
+       return kvm_vz_config6_guest_wrmask(vcpu) |
+               MIPS_CONF6_LOONGSON_SFBEN | MIPS_CONF6_LOONGSON_FTLBDIS;
+}
+
 static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva)
 {
        /* VZ guest has already converted gva to gpa */
@@ -225,23 +237,7 @@ static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu,
         * interrupts are asynchronous to vcpu execution therefore defer guest
         * cp0 accesses
         */
-       switch (intr) {
-       case 2:
-               kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO);
-               break;
-
-       case 3:
-               kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1);
-               break;
-
-       case 4:
-               kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2);
-               break;
-
-       default:
-               break;
-       }
-
+       kvm_vz_queue_irq(vcpu, kvm_irq_to_priority(intr));
 }
 
 static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
@@ -253,44 +249,22 @@ static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
         * interrupts are asynchronous to vcpu execution therefore defer guest
         * cp0 accesses
         */
-       switch (intr) {
-       case -2:
-               kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO);
-               break;
-
-       case -3:
-               kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1);
-               break;
-
-       case -4:
-               kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2);
-               break;
-
-       default:
-               break;
-       }
-
+       kvm_vz_dequeue_irq(vcpu, kvm_irq_to_priority(-intr));
 }
 
-static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = {
-       [MIPS_EXC_INT_TIMER] = C_IRQ5,
-       [MIPS_EXC_INT_IO]    = C_IRQ0,
-       [MIPS_EXC_INT_IPI_1] = C_IRQ1,
-       [MIPS_EXC_INT_IPI_2] = C_IRQ2,
-};
-
 static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                                 u32 cause)
 {
        u32 irq = (priority < MIPS_EXC_MAX) ?
-               kvm_vz_priority_to_irq[priority] : 0;
+               kvm_priority_to_irq[priority] : 0;
 
        switch (priority) {
        case MIPS_EXC_INT_TIMER:
                set_gc0_cause(C_TI);
                break;
 
-       case MIPS_EXC_INT_IO:
+       case MIPS_EXC_INT_IO_1:
+       case MIPS_EXC_INT_IO_2:
        case MIPS_EXC_INT_IPI_1:
        case MIPS_EXC_INT_IPI_2:
                if (cpu_has_guestctl2)
@@ -311,7 +285,7 @@ static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                               u32 cause)
 {
        u32 irq = (priority < MIPS_EXC_MAX) ?
-               kvm_vz_priority_to_irq[priority] : 0;
+               kvm_priority_to_irq[priority] : 0;
 
        switch (priority) {
        case MIPS_EXC_INT_TIMER:
@@ -329,7 +303,8 @@ static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
                }
                break;
 
-       case MIPS_EXC_INT_IO:
+       case MIPS_EXC_INT_IO_1:
+       case MIPS_EXC_INT_IO_2:
        case MIPS_EXC_INT_IPI_1:
        case MIPS_EXC_INT_IPI_2:
                /* Clear GuestCtl2.VIP irq if not using Hardware Clear */
@@ -966,7 +941,8 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
                                    (sel == 2 ||        /* SRSCtl */
                                     sel == 3)) ||      /* SRSMap */
                                   (rd == MIPS_CP0_CONFIG &&
-                                   (sel == 7)) ||      /* Config7 */
+                                   (sel == 6 ||        /* Config6 */
+                                    sel == 7)) ||      /* Config7 */
                                   (rd == MIPS_CP0_LLADDR &&
                                    (sel == 2) &&       /* MAARI */
                                    cpu_guest_has_maar &&
@@ -974,6 +950,11 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
                                   (rd == MIPS_CP0_ERRCTL &&
                                    (sel == 0))) {      /* ErrCtl */
                                val = cop0->reg[rd][sel];
+#ifdef CONFIG_CPU_LOONGSON64
+                       } else if (rd == MIPS_CP0_DIAG &&
+                                  (sel == 0)) {        /* Diag */
+                               val = cop0->reg[rd][sel];
+#endif
                        } else {
                                val = 0;
                                er = EMULATE_FAIL;
@@ -1036,9 +1017,40 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
                                   cpu_guest_has_maar &&
                                   !cpu_guest_has_dyn_maar) {
                                kvm_write_maari(vcpu, val);
+                       } else if (rd == MIPS_CP0_CONFIG &&
+                                  (sel == 6)) {
+                               cop0->reg[rd][sel] = (int)val;
                        } else if (rd == MIPS_CP0_ERRCTL &&
                                   (sel == 0)) {        /* ErrCtl */
                                /* ignore the written value */
+#ifdef CONFIG_CPU_LOONGSON64
+                       } else if (rd == MIPS_CP0_DIAG &&
+                                  (sel == 0)) {        /* Diag */
+                               unsigned long flags;
+
+                               local_irq_save(flags);
+                               if (val & LOONGSON_DIAG_BTB) {
+                                       /* Flush BTB */
+                                       set_c0_diag(LOONGSON_DIAG_BTB);
+                               }
+                               if (val & LOONGSON_DIAG_ITLB) {
+                                       /* Flush ITLB */
+                                       set_c0_diag(LOONGSON_DIAG_ITLB);
+                               }
+                               if (val & LOONGSON_DIAG_DTLB) {
+                                       /* Flush DTLB */
+                                       set_c0_diag(LOONGSON_DIAG_DTLB);
+                               }
+                               if (val & LOONGSON_DIAG_VTLB) {
+                                       /* Flush VTLB */
+                                       kvm_loongson_clear_guest_vtlb();
+                               }
+                               if (val & LOONGSON_DIAG_FTLB) {
+                                       /* Flush FTLB */
+                                       kvm_loongson_clear_guest_ftlb();
+                               }
+                               local_irq_restore(flags);
+#endif
                        } else {
                                er = EMULATE_FAIL;
                        }
@@ -1129,6 +1141,77 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
        return EMULATE_FAIL;
 }
 
+#ifdef CONFIG_CPU_LOONGSON64
+static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst,
+                                             u32 *opc, u32 cause,
+                                             struct kvm_run *run,
+                                             struct kvm_vcpu *vcpu)
+{
+       unsigned int rs, rd;
+       unsigned int hostcfg;
+       unsigned long curr_pc;
+       enum emulation_result er = EMULATE_DONE;
+
+       /*
+        * Update PC and hold onto current PC in case there is
+        * an error and we want to rollback the PC
+        */
+       curr_pc = vcpu->arch.pc;
+       er = update_pc(vcpu, cause);
+       if (er == EMULATE_FAIL)
+               return er;
+
+       rs = inst.loongson3_lscsr_format.rs;
+       rd = inst.loongson3_lscsr_format.rd;
+       switch (inst.loongson3_lscsr_format.fr) {
+       case 0x8:  /* Read CPUCFG */
+               ++vcpu->stat.vz_cpucfg_exits;
+               hostcfg = read_cpucfg(vcpu->arch.gprs[rs]);
+
+               switch (vcpu->arch.gprs[rs]) {
+               case LOONGSON_CFG0:
+                       vcpu->arch.gprs[rd] = 0x14c000;
+                       break;
+               case LOONGSON_CFG1:
+                       hostcfg &= (LOONGSON_CFG1_FP | LOONGSON_CFG1_MMI |
+                                   LOONGSON_CFG1_MSA1 | LOONGSON_CFG1_MSA2 |
+                                   LOONGSON_CFG1_SFBP);
+                       vcpu->arch.gprs[rd] = hostcfg;
+                       break;
+               case LOONGSON_CFG2:
+                       hostcfg &= (LOONGSON_CFG2_LEXT1 | LOONGSON_CFG2_LEXT2 |
+                                   LOONGSON_CFG2_LEXT3 | LOONGSON_CFG2_LSPW);
+                       vcpu->arch.gprs[rd] = hostcfg;
+                       break;
+               case LOONGSON_CFG3:
+                       vcpu->arch.gprs[rd] = hostcfg;
+                       break;
+               default:
+                       /* Don't export any other advanced features to guest */
+                       vcpu->arch.gprs[rd] = 0;
+                       break;
+               }
+               break;
+
+       default:
+               kvm_err("lwc2 emulate not impl %d rs %lx @%lx\n",
+                       inst.loongson3_lscsr_format.fr, vcpu->arch.gprs[rs], curr_pc);
+               er = EMULATE_FAIL;
+               break;
+       }
+
+       /* Rollback PC only if emulation was unsuccessful */
+       if (er == EMULATE_FAIL) {
+               kvm_err("[%#lx]%s: unsupported lwc2 instruction 0x%08x 0x%08x\n",
+                       curr_pc, __func__, inst.word, inst.loongson3_lscsr_format.fr);
+
+               vcpu->arch.pc = curr_pc;
+       }
+
+       return er;
+}
+#endif
+
 static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
                                                     struct kvm_vcpu *vcpu)
 {
@@ -1157,6 +1240,11 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
                trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
                er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
                break;
+#endif
+#ifdef CONFIG_CPU_LOONGSON64
+       case lwc2_op:
+               er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu);
+               break;
 #endif
        case spec3_op:
                switch (inst.spec3_format.func) {
@@ -1652,6 +1740,7 @@ static u64 kvm_vz_get_one_regs[] = {
        KVM_REG_MIPS_CP0_CONFIG3,
        KVM_REG_MIPS_CP0_CONFIG4,
        KVM_REG_MIPS_CP0_CONFIG5,
+       KVM_REG_MIPS_CP0_CONFIG6,
 #ifdef CONFIG_64BIT
        KVM_REG_MIPS_CP0_XCONTEXT,
 #endif
@@ -1706,7 +1795,7 @@ static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu)
                ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
        if (cpu_guest_has_segments)
                ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
-       if (cpu_guest_has_htw)
+       if (cpu_guest_has_htw || cpu_guest_has_ldpte)
                ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
        if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar)
                ret += 1 + ARRAY_SIZE(vcpu->arch.maar);
@@ -1755,7 +1844,7 @@ static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
                        return -EFAULT;
                indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
        }
-       if (cpu_guest_has_htw) {
+       if (cpu_guest_has_htw || cpu_guest_has_ldpte) {
                if (copy_to_user(indices, kvm_vz_get_one_regs_htw,
                                 sizeof(kvm_vz_get_one_regs_htw)))
                        return -EFAULT;
@@ -1878,17 +1967,17 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
                *v = read_gc0_segctl2();
                break;
        case KVM_REG_MIPS_CP0_PWBASE:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                *v = read_gc0_pwbase();
                break;
        case KVM_REG_MIPS_CP0_PWFIELD:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                *v = read_gc0_pwfield();
                break;
        case KVM_REG_MIPS_CP0_PWSIZE:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                *v = read_gc0_pwsize();
                break;
@@ -1896,7 +1985,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
                *v = (long)read_gc0_wired();
                break;
        case KVM_REG_MIPS_CP0_PWCTL:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                *v = read_gc0_pwctl();
                break;
@@ -1979,6 +2068,9 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
                        return -EINVAL;
                *v = read_gc0_config5();
                break;
+       case KVM_REG_MIPS_CP0_CONFIG6:
+               *v = kvm_read_sw_gc0_config6(cop0);
+               break;
        case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
                if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
                        return -EINVAL;
@@ -2101,17 +2193,17 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
                write_gc0_segctl2(v);
                break;
        case KVM_REG_MIPS_CP0_PWBASE:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                write_gc0_pwbase(v);
                break;
        case KVM_REG_MIPS_CP0_PWFIELD:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                write_gc0_pwfield(v);
                break;
        case KVM_REG_MIPS_CP0_PWSIZE:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                write_gc0_pwsize(v);
                break;
@@ -2119,7 +2211,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
                change_gc0_wired(MIPSR6_WIRED_WIRED, v);
                break;
        case KVM_REG_MIPS_CP0_PWCTL:
-               if (!cpu_guest_has_htw)
+               if (!cpu_guest_has_htw && !cpu_guest_has_ldpte)
                        return -EINVAL;
                write_gc0_pwctl(v);
                break;
@@ -2248,6 +2340,14 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
                        write_gc0_config5(v);
                }
                break;
+       case KVM_REG_MIPS_CP0_CONFIG6:
+               cur = kvm_read_sw_gc0_config6(cop0);
+               change = (cur ^ v) & kvm_vz_config6_user_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_sw_gc0_config6(cop0, (int)v);
+               }
+               break;
        case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
                if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
                        return -EINVAL;
@@ -2580,7 +2680,7 @@ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        }
 
        /* restore HTW registers */
-       if (cpu_guest_has_htw) {
+       if (cpu_guest_has_htw || cpu_guest_has_ldpte) {
                kvm_restore_gc0_pwbase(cop0);
                kvm_restore_gc0_pwfield(cop0);
                kvm_restore_gc0_pwsize(cop0);
@@ -2597,7 +2697,7 @@ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
         * prevents a SC on the next VCPU from succeeding by matching a LL on
         * the previous VCPU.
         */
-       if (cpu_guest_has_rw_llb)
+       if (vcpu->kvm->created_vcpus > 1)
                write_gc0_lladdr(0);
 
        return 0;
@@ -2685,8 +2785,8 @@ static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
        }
 
        /* save HTW registers if enabled in guest */
-       if (cpu_guest_has_htw &&
-           kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) {
+       if (cpu_guest_has_ldpte || (cpu_guest_has_htw &&
+           kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW)) {
                kvm_save_gc0_pwbase(cop0);
                kvm_save_gc0_pwfield(cop0);
                kvm_save_gc0_pwsize(cop0);
@@ -2853,8 +2953,12 @@ static int kvm_vz_hardware_enable(void)
        write_c0_guestctl0(MIPS_GCTL0_CP0 |
                           (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) |
                           MIPS_GCTL0_CG | MIPS_GCTL0_CF);
-       if (cpu_has_guestctl0ext)
-               set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI);
+       if (cpu_has_guestctl0ext) {
+               if (current_cpu_type() != CPU_LOONGSON64)
+                       set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI);
+               else
+                       clear_c0_guestctl0ext(MIPS_GCTL0EXT_CGI);
+       }
 
        if (cpu_has_guestid) {
                write_c0_guestctl1(0);
@@ -2871,6 +2975,12 @@ static int kvm_vz_hardware_enable(void)
        if (cpu_has_guestctl2)
                clear_c0_guestctl2(0x3f << 10);
 
+#ifdef CONFIG_CPU_LOONGSON64
+       /* Control guest CCA attribute */
+       if (cpu_has_csr())
+               csr_writel(csr_readl(0xffffffec) | 0x1, 0xffffffec);
+#endif
+
        return 0;
 }
 
@@ -2927,6 +3037,9 @@ static int kvm_vz_check_extension(struct kvm *kvm, long ext)
                r = 2;
                break;
 #endif
+       case KVM_CAP_IOEVENTFD:
+               r = 1;
+               break;
        default:
                r = 0;
                break;
index a42dd09..d8a087c 100644 (file)
@@ -252,6 +252,7 @@ static int do_signal(struct pt_regs *regs)
                switch (retval) {
                case ERESTART_RESTARTBLOCK:
                        restart = -2;
+                       fallthrough;
                case ERESTARTNOHAND:
                case ERESTARTSYS:
                case ERESTARTNOINTR:
index 8dd24c7..d32ec9a 100644 (file)
@@ -155,12 +155,11 @@ extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
-extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
-                       struct kvm_vcpu *vcpu, unsigned long addr,
-                       unsigned long status);
+extern int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
+                       unsigned long addr, unsigned long status);
 extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
                        unsigned long slb_v, unsigned long valid);
-extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
                        unsigned long gpa, gva_t ea, int is_store);
 
 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
@@ -174,8 +173,7 @@ extern void kvmppc_mmu_hpte_sysexit(void);
 extern int kvmppc_mmu_hv_init(void);
 extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
 
-extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
-                       struct kvm_vcpu *vcpu,
+extern int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
                        unsigned long ea, unsigned long dsisr);
 extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
                                        gva_t eaddr, void *to, void *from,
@@ -234,7 +232,7 @@ extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac);
 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
                           bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
-extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu);
 extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
                        bool writing, bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
@@ -300,12 +298,12 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
-int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
                          u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
 void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
                                   struct hv_guest_state *hr);
-long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu);
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
 
 void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
index 337047b..7e2d061 100644 (file)
@@ -795,7 +795,6 @@ struct kvm_vcpu_arch {
        struct mmio_hpte_cache_entry *pgfault_cache;
 
        struct task_struct *run_task;
-       struct kvm_run *kvm_run;
 
        spinlock_t vpa_update_lock;
        struct kvmppc_vpa vpa;
index 94f5a32..ccf66b3 100644 (file)
@@ -58,28 +58,28 @@ enum xlate_readwrite {
        XLATE_WRITE             /* check for write permissions */
 };
 
-extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
+extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_handler_highmem(void);
 
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
-extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_load(struct kvm_vcpu *vcpu,
                               unsigned int rt, unsigned int bytes,
                              int is_default_endian);
-extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
                                unsigned int rt, unsigned int bytes,
                               int is_default_endian);
-extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
                                unsigned int rt, unsigned int bytes,
                        int is_default_endian, int mmio_sign_extend);
-extern int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
                unsigned int rt, unsigned int bytes, int is_default_endian);
-extern int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
                unsigned int rs, unsigned int bytes, int is_default_endian);
-extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_store(struct kvm_vcpu *vcpu,
                               u64 val, unsigned int bytes,
                               int is_default_endian);
-extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
                                int rs, unsigned int bytes,
                                int is_default_endian);
 
@@ -90,10 +90,9 @@ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                     bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                     bool data);
-extern int kvmppc_emulate_instruction(struct kvm_run *run,
-                                      struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
-extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
@@ -267,7 +266,7 @@ struct kvmppc_ops {
        void (*vcpu_put)(struct kvm_vcpu *vcpu);
        void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
        void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
-       int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+       int (*vcpu_run)(struct kvm_vcpu *vcpu);
        int (*vcpu_create)(struct kvm_vcpu *vcpu);
        void (*vcpu_free)(struct kvm_vcpu *vcpu);
        int (*check_requests)(struct kvm_vcpu *vcpu);
@@ -291,7 +290,7 @@ struct kvmppc_ops {
        int (*init_vm)(struct kvm *kvm);
        void (*destroy_vm)(struct kvm *kvm);
        int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
-       int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
+       int (*emulate_op)(struct kvm_vcpu *vcpu,
                          unsigned int inst, int *advance);
        int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
        int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
index 37508a3..41fedec 100644 (file)
@@ -755,9 +755,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 }
 EXPORT_SYMBOL_GPL(kvmppc_set_msr);
 
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
 {
-       return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
+       return vcpu->kvm->arch.kvm_ops->vcpu_run(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
index eae259e..9b6323e 100644 (file)
@@ -18,7 +18,7 @@ extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
                                     unsigned int inst, int *advance);
 extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
                                        int sprn, ulong spr_val);
index ddfc4c9..7c5a181 100644 (file)
@@ -412,7 +412,7 @@ static int instruction_is_store(unsigned int instr)
        return (instr & mask) != 0;
 }
 
-int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
                           unsigned long gpa, gva_t ea, int is_store)
 {
        u32 last_inst;
@@ -472,10 +472,10 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        vcpu->arch.paddr_accessed = gpa;
        vcpu->arch.vaddr_accessed = ea;
-       return kvmppc_emulate_mmio(run, vcpu);
+       return kvmppc_emulate_mmio(vcpu);
 }
 
-int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
                                unsigned long ea, unsigned long dsisr)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -498,7 +498,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        pte_t pte, *ptep;
 
        if (kvm_is_radix(kvm))
-               return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
+               return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr);
 
        /*
         * Real-mode code has already searched the HPT and found the
@@ -518,7 +518,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        gpa_base = r & HPTE_R_RPN & ~(psize - 1);
                        gfn_base = gpa_base >> PAGE_SHIFT;
                        gpa = gpa_base | (ea & (psize - 1));
-                       return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+                       return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
                                                dsisr & DSISR_ISSTORE);
                }
        }
@@ -554,7 +554,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        /* No memslot means it's an emulated MMIO region */
        if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
-               return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+               return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
                                              dsisr & DSISR_ISSTORE);
 
        /*
index 2a2fad9..3cb0c98 100644 (file)
@@ -353,7 +353,13 @@ static struct kmem_cache *kvm_pmd_cache;
 
 static pte_t *kvmppc_pte_alloc(void)
 {
-       return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
+       pte_t *pte;
+
+       pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
+       /* pmd_populate() will only reference _pa(pte). */
+       kmemleak_ignore(pte);
+
+       return pte;
 }
 
 static void kvmppc_pte_free(pte_t *ptep)
@@ -363,7 +369,13 @@ static void kvmppc_pte_free(pte_t *ptep)
 
 static pmd_t *kvmppc_pmd_alloc(void)
 {
-       return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+       pmd_t *pmd;
+
+       pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+       /* pud_populate() will only reference _pa(pmd). */
+       kmemleak_ignore(pmd);
+
+       return pmd;
 }
 
 static void kvmppc_pmd_free(pmd_t *pmdp)
@@ -417,9 +429,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
  * Callers are responsible for flushing the PWC.
  *
  * When page tables are being unmapped/freed as part of page fault path
- * (full == false), ptes are not expected. There is code to unmap them
- * and emit a warning if encountered, but there may already be data
- * corruption due to the unexpected mappings.
+ * (full == false), valid ptes are generally not expected; however, there
+ * is one situation where they arise, which is when dirty page logging is
+ * turned off for a memslot while the VM is running.  The new memslot
+ * becomes visible to page faults before the memslot commit function
+ * gets to flush the memslot, which can lead to a 2MB page mapping being
+ * installed for a guest physical address where there are already 64kB
+ * (or 4kB) mappings (of sub-pages of the same 2MB page).
  */
 static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
                                  unsigned int lpid)
@@ -433,7 +449,6 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
                for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
                        if (pte_val(*p) == 0)
                                continue;
-                       WARN_ON_ONCE(1);
                        kvmppc_unmap_pte(kvm, p,
                                         pte_pfn(*p) << PAGE_SHIFT,
                                         PAGE_SHIFT, NULL, lpid);
@@ -891,7 +906,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
        return ret;
 }
 
-int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
                                   unsigned long ea, unsigned long dsisr)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -937,7 +952,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
                        return RESUME_GUEST;
                }
-               return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
+               return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
        }
 
        if (memslot->flags & KVM_MEM_READONLY) {
@@ -1142,6 +1157,11 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
                                         kvm->arch.lpid);
                gpa += PAGE_SIZE;
        }
+       /*
+        * Increase the mmu notifier sequence number to prevent any page
+        * fault that read the memslot earlier from writing a PTE.
+        */
+       kvm->mmu_notifier_seq++;
        spin_unlock(&kvm->mmu_lock);
 }
 
index 50555ad..1a529df 100644 (file)
@@ -73,6 +73,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
        struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
        struct iommu_table_group *table_group = NULL;
 
+       rcu_read_lock();
        list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
 
                table_group = iommu_group_get_iommudata(grp);
@@ -87,7 +88,9 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
                                kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
                        }
                }
+               cond_resched_rcu();
        }
+       rcu_read_unlock();
 }
 
 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
@@ -105,12 +108,14 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
        if (!f.file)
                return -EBADF;
 
+       rcu_read_lock();
        list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
                if (stt == f.file->private_data) {
                        found = true;
                        break;
                }
        }
+       rcu_read_unlock();
 
        fdput(f);
 
@@ -143,6 +148,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
        if (!tbl)
                return -EINVAL;
 
+       rcu_read_lock();
        list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
                if (tbl != stit->tbl)
                        continue;
@@ -150,14 +156,17 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
                if (!kref_get_unless_zero(&stit->kref)) {
                        /* stit is being destroyed */
                        iommu_tce_table_put(tbl);
+                       rcu_read_unlock();
                        return -ENOTTY;
                }
                /*
                 * The table is already known to this KVM, we just increased
                 * its KVM reference counter and can return.
                 */
+               rcu_read_unlock();
                return 0;
        }
+       rcu_read_unlock();
 
        stit = kzalloc(sizeof(*stit), GFP_KERNEL);
        if (!stit) {
@@ -365,18 +374,19 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
        if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
                return H_TOO_HARD;
 
+       rcu_read_lock();
        list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
                unsigned long hpa = 0;
                struct mm_iommu_table_group_mem_t *mem;
                long shift = stit->tbl->it_page_shift;
 
                mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
-               if (!mem)
-                       return H_TOO_HARD;
-
-               if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
+               if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
+                       rcu_read_unlock();
                        return H_TOO_HARD;
+               }
        }
+       rcu_read_unlock();
 
        return H_SUCCESS;
 }
index dad71d2..0effd48 100644 (file)
@@ -235,7 +235,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 
 #endif
 
-int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
                              unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
@@ -371,13 +371,13 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE)
                                break;
 
-                       run->papr_hcall.nr = cmd;
+                       vcpu->run->papr_hcall.nr = cmd;
                        for (i = 0; i < 9; ++i) {
                                ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
-                               run->papr_hcall.args[i] = gpr;
+                               vcpu->run->papr_hcall.args[i] = gpr;
                        }
 
-                       run->exit_reason = KVM_EXIT_PAPR_HCALL;
+                       vcpu->run->exit_reason = KVM_EXIT_PAPR_HCALL;
                        vcpu->arch.hcall_needed = 1;
                        emulated = EMULATE_EXIT_USER;
                        break;
@@ -629,7 +629,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        if (emulated == EMULATE_FAIL)
-               emulated = kvmppc_emulate_paired_single(run, vcpu);
+               emulated = kvmppc_emulate_paired_single(vcpu);
 
        return emulated;
 }
index 7f5d586..6bf6664 100644 (file)
@@ -1094,9 +1094,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                        ret = kvmppc_h_svm_init_done(vcpu->kvm);
                break;
        case H_SVM_INIT_ABORT:
-               ret = H_UNSUPPORTED;
-               if (kvmppc_get_srr1(vcpu) & MSR_S)
-                       ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+               /*
+                * Even if that call is made by the Ultravisor, the SSR1 value
+                * is the guest context one, with the secure bit clear as it has
+                * not yet been secured. So we can't check it here.
+                * Instead the kvm->arch.secure_guest flag is checked inside
+                * kvmppc_h_svm_init_abort().
+                */
+               ret = kvmppc_h_svm_init_abort(vcpu->kvm);
                break;
 
        default:
@@ -1151,8 +1156,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
        return kvmppc_hcall_impl_hv_realmode(cmd);
 }
 
-static int kvmppc_emulate_debug_inst(struct kvm_run *run,
-                                       struct kvm_vcpu *vcpu)
+static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu)
 {
        u32 last_inst;
 
@@ -1166,8 +1170,8 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
        }
 
        if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
-               run->exit_reason = KVM_EXIT_DEBUG;
-               run->debug.arch.address = kvmppc_get_pc(vcpu);
+               vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+               vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
                return RESUME_HOST;
        } else {
                kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
@@ -1268,9 +1272,10 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
        return RESUME_GUEST;
 }
 
-static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
                                 struct task_struct *tsk)
 {
+       struct kvm_run *run = vcpu->run;
        int r = RESUME_HOST;
 
        vcpu->stat.sum_exits++;
@@ -1405,7 +1410,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                swab32(vcpu->arch.emul_inst) :
                                vcpu->arch.emul_inst;
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
-                       r = kvmppc_emulate_debug_inst(run, vcpu);
+                       r = kvmppc_emulate_debug_inst(vcpu);
                } else {
                        kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
                        r = RESUME_GUEST;
@@ -1457,7 +1462,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return r;
 }
 
-static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 {
        int r;
        int srcu_idx;
@@ -1515,7 +1520,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
         */
        case BOOK3S_INTERRUPT_H_DATA_STORAGE:
                srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-               r = kvmhv_nested_page_fault(run, vcpu);
+               r = kvmhv_nested_page_fault(vcpu);
                srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
                break;
        case BOOK3S_INTERRUPT_H_INST_STORAGE:
@@ -1525,7 +1530,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
                        vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
                srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-               r = kvmhv_nested_page_fault(run, vcpu);
+               r = kvmhv_nested_page_fault(vcpu);
                srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
                break;
 
@@ -2929,7 +2934,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 
                ret = RESUME_GUEST;
                if (vcpu->arch.trap)
-                       ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+                       ret = kvmppc_handle_exit_hv(vcpu,
                                                    vcpu->arch.run_task);
 
                vcpu->arch.ret = ret;
@@ -3894,15 +3899,16 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
        return r;
 }
 
-static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        int n_ceded, i, r;
        struct kvmppc_vcore *vc;
        struct kvm_vcpu *v;
 
        trace_kvmppc_run_vcpu_enter(vcpu);
 
-       kvm_run->exit_reason = 0;
+       run->exit_reason = 0;
        vcpu->arch.ret = RESUME_GUEST;
        vcpu->arch.trap = 0;
        kvmppc_update_vpas(vcpu);
@@ -3914,7 +3920,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        spin_lock(&vc->lock);
        vcpu->arch.ceded = 0;
        vcpu->arch.run_task = current;
-       vcpu->arch.kvm_run = kvm_run;
        vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
        vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
        vcpu->arch.busy_preempt = TB_NIL;
@@ -3947,8 +3952,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                        r = kvmhv_setup_mmu(vcpu);
                        spin_lock(&vc->lock);
                        if (r) {
-                               kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-                               kvm_run->fail_entry.
+                               run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+                               run->fail_entry.
                                        hardware_entry_failure_reason = 0;
                                vcpu->arch.ret = r;
                                break;
@@ -3967,7 +3972,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                        if (signal_pending(v->arch.run_task)) {
                                kvmppc_remove_runnable(vc, v);
                                v->stat.signal_exits++;
-                               v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+                               v->run->exit_reason = KVM_EXIT_INTR;
                                v->arch.ret = -EINTR;
                                wake_up(&v->arch.cpu_run);
                        }
@@ -4008,7 +4013,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
                kvmppc_remove_runnable(vc, vcpu);
                vcpu->stat.signal_exits++;
-               kvm_run->exit_reason = KVM_EXIT_INTR;
+               run->exit_reason = KVM_EXIT_INTR;
                vcpu->arch.ret = -EINTR;
        }
 
@@ -4019,15 +4024,15 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                wake_up(&v->arch.cpu_run);
        }
 
-       trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+       trace_kvmppc_run_vcpu_exit(vcpu);
        spin_unlock(&vc->lock);
        return vcpu->arch.ret;
 }
 
-int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
-                         struct kvm_vcpu *vcpu, u64 time_limit,
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
                          unsigned long lpcr)
 {
+       struct kvm_run *run = vcpu->run;
        int trap, r, pcpu;
        int srcu_idx, lpid;
        struct kvmppc_vcore *vc;
@@ -4036,14 +4041,13 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
 
        trace_kvmppc_run_vcpu_enter(vcpu);
 
-       kvm_run->exit_reason = 0;
+       run->exit_reason = 0;
        vcpu->arch.ret = RESUME_GUEST;
        vcpu->arch.trap = 0;
 
        vc = vcpu->arch.vcore;
        vcpu->arch.ceded = 0;
        vcpu->arch.run_task = current;
-       vcpu->arch.kvm_run = kvm_run;
        vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
        vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
        vcpu->arch.busy_preempt = TB_NIL;
@@ -4161,9 +4165,9 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
        r = RESUME_GUEST;
        if (trap) {
                if (!nested)
-                       r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
+                       r = kvmppc_handle_exit_hv(vcpu, current);
                else
-                       r = kvmppc_handle_nested_exit(kvm_run, vcpu);
+                       r = kvmppc_handle_nested_exit(vcpu);
        }
        vcpu->arch.ret = r;
 
@@ -4173,7 +4177,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
                while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
                        if (signal_pending(current)) {
                                vcpu->stat.signal_exits++;
-                               kvm_run->exit_reason = KVM_EXIT_INTR;
+                               run->exit_reason = KVM_EXIT_INTR;
                                vcpu->arch.ret = -EINTR;
                                break;
                        }
@@ -4189,13 +4193,13 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
 
  done:
        kvmppc_remove_runnable(vc, vcpu);
-       trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+       trace_kvmppc_run_vcpu_exit(vcpu);
 
        return vcpu->arch.ret;
 
  sigpend:
        vcpu->stat.signal_exits++;
-       kvm_run->exit_reason = KVM_EXIT_INTR;
+       run->exit_reason = KVM_EXIT_INTR;
        vcpu->arch.ret = -EINTR;
  out:
        local_irq_enable();
@@ -4203,8 +4207,9 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
        goto done;
 }
 
-static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        int r;
        int srcu_idx;
        unsigned long ebb_regs[3] = {}; /* shut up GCC */
@@ -4288,10 +4293,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
                 */
                if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
                    !no_mixing_hpt_and_radix)
-                       r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0,
+                       r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
                                                  vcpu->arch.vcore->lpcr);
                else
-                       r = kvmppc_run_vcpu(run, vcpu);
+                       r = kvmppc_run_vcpu(vcpu);
 
                if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
                    !(vcpu->arch.shregs.msr & MSR_PR)) {
@@ -4301,7 +4306,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        kvmppc_core_prepare_to_enter(vcpu);
                } else if (r == RESUME_PAGE_FAULT) {
                        srcu_idx = srcu_read_lock(&kvm->srcu);
-                       r = kvmppc_book3s_hv_page_fault(run, vcpu,
+                       r = kvmppc_book3s_hv_page_fault(vcpu,
                                vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
                        srcu_read_unlock(&kvm->srcu, srcu_idx);
                } else if (r == RESUME_PASSTHROUGH) {
@@ -4975,7 +4980,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 }
 
 /* We don't need to emulate any privileged instructions or dcbz */
-static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu,
                                     unsigned int inst, int *advance)
 {
        return EMULATE_FAIL;
index a221cae..0989751 100644 (file)
@@ -290,8 +290,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
                        r = RESUME_HOST;
                        break;
                }
-               r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
-                                         lpcr);
+               r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
        } while (is_kvmppc_resume_guest(r));
 
        /* save L2 state for return */
@@ -1270,8 +1269,7 @@ static inline int kvmppc_radix_shift_to_level(int shift)
 }
 
 /* called with gp->tlb_lock held */
-static long int __kvmhv_nested_page_fault(struct kvm_run *run,
-                                         struct kvm_vcpu *vcpu,
+static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
                                          struct kvm_nested_guest *gp)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -1354,7 +1352,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run,
                }
 
                /* passthrough of emulated MMIO case */
-               return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
+               return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
        }
        if (memslot->flags & KVM_MEM_READONLY) {
                if (writing) {
@@ -1429,8 +1427,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run,
        rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
        ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
                                mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
-       if (n_rmap)
-               kfree(n_rmap);
+       kfree(n_rmap);
        if (ret == -EAGAIN)
                ret = RESUME_GUEST;     /* Let the guest try again */
 
@@ -1441,13 +1438,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run,
        return RESUME_GUEST;
 }
 
-long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu)
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
 {
        struct kvm_nested_guest *gp = vcpu->arch.nested;
        long int ret;
 
        mutex_lock(&gp->tlb_lock);
-       ret = __kvmhv_nested_page_fault(run, vcpu, gp);
+       ret = __kvmhv_nested_page_fault(vcpu, gp);
        mutex_unlock(&gp->tlb_lock);
        return ret;
 }
index f91224e..09d8119 100644 (file)
@@ -749,6 +749,20 @@ static u64 kvmppc_get_secmem_size(void)
        const __be32 *prop;
        u64 size = 0;
 
+       /*
+        * First try the new ibm,secure-memory nodes which supersede the
+        * secure-memory-ranges property.
+        * If we found some, no need to read the deprecated ones.
+        */
+       for_each_compatible_node(np, NULL, "ibm,secure-memory") {
+               prop = of_get_property(np, "reg", &len);
+               if (!prop)
+                       continue;
+               size += of_read_number(prop + 2, 2);
+       }
+       if (size)
+               return size;
+
        np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
        if (!np)
                goto out;
index bf02827..a114367 100644 (file)
@@ -169,7 +169,7 @@ static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
 }
 
-static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_fpr_load(struct kvm_vcpu *vcpu,
                                   int rs, ulong addr, int ls_type)
 {
        int emulated = EMULATE_FAIL;
@@ -188,7 +188,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                kvmppc_inject_pf(vcpu, addr, false);
                goto done_load;
        } else if (r == EMULATE_DO_MMIO) {
-               emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+               emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
                                              len, 1);
                goto done_load;
        }
@@ -213,7 +213,7 @@ done_load:
        return emulated;
 }
 
-static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_fpr_store(struct kvm_vcpu *vcpu,
                                    int rs, ulong addr, int ls_type)
 {
        int emulated = EMULATE_FAIL;
@@ -248,7 +248,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
        if (r < 0) {
                kvmppc_inject_pf(vcpu, addr, true);
        } else if (r == EMULATE_DO_MMIO) {
-               emulated = kvmppc_handle_store(run, vcpu, val, len, 1);
+               emulated = kvmppc_handle_store(vcpu, val, len, 1);
        } else {
                emulated = EMULATE_DONE;
        }
@@ -259,7 +259,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return emulated;
 }
 
-static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_psq_load(struct kvm_vcpu *vcpu,
                                   int rs, ulong addr, bool w, int i)
 {
        int emulated = EMULATE_FAIL;
@@ -279,12 +279,12 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                kvmppc_inject_pf(vcpu, addr, false);
                goto done_load;
        } else if ((r == EMULATE_DO_MMIO) && w) {
-               emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+               emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
                                              4, 1);
                vcpu->arch.qpr[rs] = tmp[1];
                goto done_load;
        } else if (r == EMULATE_DO_MMIO) {
-               emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
+               emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FQPR | rs,
                                              8, 1);
                goto done_load;
        }
@@ -302,7 +302,7 @@ done_load:
        return emulated;
 }
 
-static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_psq_store(struct kvm_vcpu *vcpu,
                                    int rs, ulong addr, bool w, int i)
 {
        int emulated = EMULATE_FAIL;
@@ -318,10 +318,10 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
        if (r < 0) {
                kvmppc_inject_pf(vcpu, addr, true);
        } else if ((r == EMULATE_DO_MMIO) && w) {
-               emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1);
+               emulated = kvmppc_handle_store(vcpu, tmp[0], 4, 1);
        } else if (r == EMULATE_DO_MMIO) {
                u64 val = ((u64)tmp[0] << 32) | tmp[1];
-               emulated = kvmppc_handle_store(run, vcpu, val, 8, 1);
+               emulated = kvmppc_handle_store(vcpu, val, 8, 1);
        } else {
                emulated = EMULATE_DONE;
        }
@@ -618,7 +618,7 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
        return EMULATE_DONE;
 }
 
-int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu)
 {
        u32 inst;
        enum emulation_result emulated = EMULATE_DONE;
@@ -680,7 +680,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                int i = inst_get_field(inst, 17, 19);
 
                addr += get_d_signext(inst);
-               emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+               emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
                break;
        }
        case OP_PSQ_LU:
@@ -690,7 +690,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                int i = inst_get_field(inst, 17, 19);
 
                addr += get_d_signext(inst);
-               emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+               emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
 
                if (emulated == EMULATE_DONE)
                        kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -703,7 +703,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                int i = inst_get_field(inst, 17, 19);
 
                addr += get_d_signext(inst);
-               emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+               emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
                break;
        }
        case OP_PSQ_STU:
@@ -713,7 +713,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                int i = inst_get_field(inst, 17, 19);
 
                addr += get_d_signext(inst);
-               emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+               emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
 
                if (emulated == EMULATE_DONE)
                        kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -733,7 +733,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        int i = inst_get_field(inst, 22, 24);
 
                        addr += kvmppc_get_gpr(vcpu, ax_rb);
-                       emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+                       emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
                        break;
                }
                case OP_4X_PS_CMPO0:
@@ -747,7 +747,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        int i = inst_get_field(inst, 22, 24);
 
                        addr += kvmppc_get_gpr(vcpu, ax_rb);
-                       emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+                       emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
 
                        if (emulated == EMULATE_DONE)
                                kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -824,7 +824,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        int i = inst_get_field(inst, 22, 24);
 
                        addr += kvmppc_get_gpr(vcpu, ax_rb);
-                       emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+                       emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
                        break;
                }
                case OP_4XW_PSQ_STUX:
@@ -834,7 +834,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        int i = inst_get_field(inst, 22, 24);
 
                        addr += kvmppc_get_gpr(vcpu, ax_rb);
-                       emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+                       emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
 
                        if (emulated == EMULATE_DONE)
                                kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -922,7 +922,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
 
-               emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
                                                   FPU_LS_SINGLE);
                break;
        }
@@ -930,7 +930,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
 
-               emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
                                                   FPU_LS_SINGLE);
 
                if (emulated == EMULATE_DONE)
@@ -941,7 +941,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
 
-               emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
                                                   FPU_LS_DOUBLE);
                break;
        }
@@ -949,7 +949,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
 
-               emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
                                                   FPU_LS_DOUBLE);
 
                if (emulated == EMULATE_DONE)
@@ -960,7 +960,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
 
-               emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
                                                    FPU_LS_SINGLE);
                break;
        }
@@ -968,7 +968,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
 
-               emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
                                                    FPU_LS_SINGLE);
 
                if (emulated == EMULATE_DONE)
@@ -979,7 +979,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
 
-               emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
                                                    FPU_LS_DOUBLE);
                break;
        }
@@ -987,7 +987,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
        {
                ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
 
-               emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+               emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
                                                    FPU_LS_DOUBLE);
 
                if (emulated == EMULATE_DONE)
@@ -1001,7 +1001,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
 
                        addr += kvmppc_get_gpr(vcpu, ax_rb);
-                       emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
                                                           addr, FPU_LS_SINGLE);
                        break;
                }
@@ -1010,7 +1010,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
                                                           addr, FPU_LS_SINGLE);
 
                        if (emulated == EMULATE_DONE)
@@ -1022,7 +1022,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
                                                           addr, FPU_LS_DOUBLE);
                        break;
                }
@@ -1031,7 +1031,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
                                                           addr, FPU_LS_DOUBLE);
 
                        if (emulated == EMULATE_DONE)
@@ -1043,7 +1043,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
                                                            addr, FPU_LS_SINGLE);
                        break;
                }
@@ -1052,7 +1052,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
                                                            addr, FPU_LS_SINGLE);
 
                        if (emulated == EMULATE_DONE)
@@ -1064,7 +1064,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
                                                            addr, FPU_LS_DOUBLE);
                        break;
                }
@@ -1073,7 +1073,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
                                                            addr, FPU_LS_DOUBLE);
 
                        if (emulated == EMULATE_DONE)
@@ -1085,7 +1085,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
                                     kvmppc_get_gpr(vcpu, ax_rb);
 
-                       emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+                       emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
                                                            addr,
                                                            FPU_LS_SINGLE_LOW);
                        break;
index a0f6813..ef54f91 100644 (file)
@@ -700,7 +700,7 @@ static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
        return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
 }
 
-int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu,
                            ulong eaddr, int vec)
 {
        bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -795,7 +795,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                /* The guest's PTE is not mapped yet. Map on the host */
                if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
                        /* Exit KVM if mapping failed */
-                       run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                        return RESUME_HOST;
                }
                if (data)
@@ -808,7 +808,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                vcpu->stat.mmio_exits++;
                vcpu->arch.paddr_accessed = pte.raddr;
                vcpu->arch.vaddr_accessed = pte.eaddr;
-               r = kvmppc_emulate_mmio(run, vcpu);
+               r = kvmppc_emulate_mmio(vcpu);
                if ( r == RESUME_HOST_NV )
                        r = RESUME_HOST;
        }
@@ -992,7 +992,7 @@ static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
        enum emulation_result er = EMULATE_FAIL;
 
        if (!(kvmppc_get_msr(vcpu) & MSR_PR))
-               er = kvmppc_emulate_instruction(vcpu->run, vcpu);
+               er = kvmppc_emulate_instruction(vcpu);
 
        if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
                /* Couldn't emulate, trigger interrupt in guest */
@@ -1089,8 +1089,7 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
        }
 }
 
-static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                                 unsigned int exit_nr)
+static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
 {
        enum emulation_result er;
        ulong flags;
@@ -1124,7 +1123,7 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        vcpu->stat.emulated_inst_exits++;
-       er = kvmppc_emulate_instruction(run, vcpu);
+       er = kvmppc_emulate_instruction(vcpu);
        switch (er) {
        case EMULATE_DONE:
                r = RESUME_GUEST_NV;
@@ -1139,7 +1138,7 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu,
                r = RESUME_GUEST;
                break;
        case EMULATE_DO_MMIO:
-               run->exit_reason = KVM_EXIT_MMIO;
+               vcpu->run->exit_reason = KVM_EXIT_MMIO;
                r = RESUME_HOST_NV;
                break;
        case EMULATE_EXIT_USER:
@@ -1198,7 +1197,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                /* only care about PTEG not found errors, but leave NX alone */
                if (shadow_srr1 & 0x40000000) {
                        int idx = srcu_read_lock(&vcpu->kvm->srcu);
-                       r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+                       r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr);
                        srcu_read_unlock(&vcpu->kvm->srcu, idx);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -1248,7 +1247,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 */
                if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
                        int idx = srcu_read_lock(&vcpu->kvm->srcu);
-                       r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+                       r = kvmppc_handle_pagefault(vcpu, dar, exit_nr);
                        srcu_read_unlock(&vcpu->kvm->srcu, idx);
                } else {
                        kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr);
@@ -1292,7 +1291,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                break;
        case BOOK3S_INTERRUPT_PROGRAM:
        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
-               r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
+               r = kvmppc_exit_pr_progint(vcpu, exit_nr);
                break;
        case BOOK3S_INTERRUPT_SYSCALL:
        {
@@ -1370,7 +1369,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
                                                    &last_inst);
                        if (emul == EMULATE_DONE)
-                               r = kvmppc_exit_pr_progint(run, vcpu, exit_nr);
+                               r = kvmppc_exit_pr_progint(vcpu, exit_nr);
                        else
                                r = RESUME_GUEST;
 
@@ -1825,8 +1824,9 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
        vfree(vcpu_book3s);
 }
 
-static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        int ret;
 #ifdef CONFIG_ALTIVEC
        unsigned long uninitialized_var(vrsave);
@@ -1834,7 +1834,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        /* Check if we can run the vcpu at all */
        if (!vcpu->arch.sane) {
-               kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                ret = -EINVAL;
                goto out;
        }
@@ -1861,7 +1861,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        kvmppc_fix_ee_before_entry();
 
-       ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+       ret = __kvmppc_vcpu_run(run, vcpu);
 
        kvmppc_clear_debug(vcpu);
 
index 888afe8..c0d62a9 100644 (file)
@@ -729,13 +729,14 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
        return r;
 }
 
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        int ret, s;
        struct debug_reg debug;
 
        if (!vcpu->arch.sane) {
-               kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                return -EINVAL;
        }
 
@@ -777,7 +778,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
        kvmppc_fix_ee_before_entry();
 
-       ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+       ret = __kvmppc_vcpu_run(run, vcpu);
 
        /* No need for guest_exit. It's done in handle_exit.
           We also get here with interrupts enabled. */
@@ -799,11 +800,11 @@ out:
        return ret;
 }
 
-static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int emulation_exit(struct kvm_vcpu *vcpu)
 {
        enum emulation_result er;
 
-       er = kvmppc_emulate_instruction(run, vcpu);
+       er = kvmppc_emulate_instruction(vcpu);
        switch (er) {
        case EMULATE_DONE:
                /* don't overwrite subtypes, just account kvm_stats */
@@ -820,8 +821,8 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                       __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst);
                /* For debugging, encode the failing instruction and
                 * report it to userspace. */
-               run->hw.hardware_exit_reason = ~0ULL << 32;
-               run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+               vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+               vcpu->run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
                kvmppc_core_queue_program(vcpu, ESR_PIL);
                return RESUME_HOST;
 
@@ -833,8 +834,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 }
 
-static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_handle_debug(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
        u32 dbsr = vcpu->arch.dbsr;
 
@@ -953,7 +955,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
        }
 }
 
-static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu,
                                  enum emulation_result emulated, u32 last_inst)
 {
        switch (emulated) {
@@ -965,8 +967,8 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                       __func__, vcpu->arch.regs.nip);
                /* For debugging, encode the failing instruction and
                 * report it to userspace. */
-               run->hw.hardware_exit_reason = ~0ULL << 32;
-               run->hw.hardware_exit_reason |= last_inst;
+               vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+               vcpu->run->hw.hardware_exit_reason |= last_inst;
                kvmppc_core_queue_program(vcpu, ESR_PIL);
                return RESUME_HOST;
 
@@ -1023,7 +1025,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        run->ready_for_interrupt_injection = 1;
 
        if (emulated != EMULATE_DONE) {
-               r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst);
+               r = kvmppc_resume_inst_load(vcpu, emulated, last_inst);
                goto out;
        }
 
@@ -1083,7 +1085,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                break;
 
        case BOOKE_INTERRUPT_HV_PRIV:
-               r = emulation_exit(run, vcpu);
+               r = emulation_exit(vcpu);
                break;
 
        case BOOKE_INTERRUPT_PROGRAM:
@@ -1093,7 +1095,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         * We are here because of an SW breakpoint instr,
                         * so lets return to host to handle.
                         */
-                       r = kvmppc_handle_debug(run, vcpu);
+                       r = kvmppc_handle_debug(vcpu);
                        run->exit_reason = KVM_EXIT_DEBUG;
                        kvmppc_account_exit(vcpu, DEBUG_EXITS);
                        break;
@@ -1114,7 +1116,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        break;
                }
 
-               r = emulation_exit(run, vcpu);
+               r = emulation_exit(vcpu);
                break;
 
        case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -1281,7 +1283,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         * actually RAM. */
                        vcpu->arch.paddr_accessed = gpaddr;
                        vcpu->arch.vaddr_accessed = eaddr;
-                       r = kvmppc_emulate_mmio(run, vcpu);
+                       r = kvmppc_emulate_mmio(vcpu);
                        kvmppc_account_exit(vcpu, MMIO_EXITS);
                }
 
@@ -1332,7 +1334,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        case BOOKE_INTERRUPT_DEBUG: {
-               r = kvmppc_handle_debug(run, vcpu);
+               r = kvmppc_handle_debug(vcpu);
                if (r == RESUME_HOST)
                        run->exit_reason = KVM_EXIT_DEBUG;
                kvmppc_account_exit(vcpu, DEBUG_EXITS);
index 65b4d33..be9da96 100644 (file)
@@ -70,7 +70,7 @@ void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
 void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
 
-int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
                             unsigned int inst, int *advance);
 int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
 int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
@@ -94,16 +94,12 @@ enum int_class {
 
 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
 
-extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
-                                      struct kvm_vcpu *vcpu,
+extern int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
                                       unsigned int inst, int *advance);
 extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
                                          ulong spr_val);
 extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
                                          ulong *spr_val);
-extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
-                                      struct kvm_vcpu *vcpu,
-                                      unsigned int inst, int *advance);
 extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
                                          ulong spr_val);
 extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
index 689ff5f..d8d38ac 100644 (file)
@@ -39,7 +39,7 @@ static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
        kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
 }
 
-int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
                             unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
index 3d0d3ec..64eb833 100644 (file)
@@ -83,16 +83,16 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
 }
 #endif
 
-static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_e500_emul_ehpriv(struct kvm_vcpu *vcpu,
                                   unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
 
        switch (get_oc(inst)) {
        case EHPRIV_OC_DEBUG:
-               run->exit_reason = KVM_EXIT_DEBUG;
-               run->debug.arch.address = vcpu->arch.regs.nip;
-               run->debug.arch.status = 0;
+               vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+               vcpu->run->debug.arch.address = vcpu->arch.regs.nip;
+               vcpu->run->debug.arch.status = 0;
                kvmppc_account_exit(vcpu, DEBUG_EXITS);
                emulated = EMULATE_EXIT_USER;
                *advance = 0;
@@ -125,7 +125,7 @@ static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst,
        return EMULATE_FAIL;
 }
 
-int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
                                unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
@@ -182,8 +182,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        break;
 
                case XOP_EHPRIV:
-                       emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
-                                                          advance);
+                       emulated = kvmppc_e500_emul_ehpriv(vcpu, inst, advance);
                        break;
 
                default:
@@ -197,7 +196,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        if (emulated == EMULATE_FAIL)
-               emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+               emulated = kvmppc_booke_emulate_op(vcpu, inst, advance);
 
        return emulated;
 }
index 6fca38c..ee1147c 100644 (file)
@@ -191,7 +191,7 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
 /* XXX Should probably auto-generate instruction decoding for a particular core
  * from opcode tables in the future. */
-int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu)
 {
        u32 inst;
        int rs, rt, sprn;
@@ -270,9 +270,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                 * these are illegal instructions.
                 */
                if (inst == KVMPPC_INST_SW_BREAKPOINT) {
-                       run->exit_reason = KVM_EXIT_DEBUG;
-                       run->debug.arch.status = 0;
-                       run->debug.arch.address = kvmppc_get_pc(vcpu);
+                       vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+                       vcpu->run->debug.arch.status = 0;
+                       vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
                        emulated = EMULATE_EXIT_USER;
                        advance = 0;
                } else
@@ -285,7 +285,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 
        if (emulated == EMULATE_FAIL) {
-               emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+               emulated = vcpu->kvm->arch.kvm_ops->emulate_op(vcpu, inst,
                                                               &advance);
                if (emulated == EMULATE_AGAIN) {
                        advance = 0;
index 135d0e6..48272a9 100644 (file)
@@ -71,7 +71,6 @@ static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
  */
 int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 {
-       struct kvm_run *run = vcpu->run;
        u32 inst;
        enum emulation_result emulated = EMULATE_FAIL;
        int advance = 1;
@@ -104,10 +103,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                        int instr_byte_swap = op.type & BYTEREV;
 
                        if (op.type & SIGNEXT)
-                               emulated = kvmppc_handle_loads(run, vcpu,
+                               emulated = kvmppc_handle_loads(vcpu,
                                                op.reg, size, !instr_byte_swap);
                        else
-                               emulated = kvmppc_handle_load(run, vcpu,
+                               emulated = kvmppc_handle_load(vcpu,
                                                op.reg, size, !instr_byte_swap);
 
                        if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
@@ -124,10 +123,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                                vcpu->arch.mmio_sp64_extend = 1;
 
                        if (op.type & SIGNEXT)
-                               emulated = kvmppc_handle_loads(run, vcpu,
+                               emulated = kvmppc_handle_loads(vcpu,
                                             KVM_MMIO_REG_FPR|op.reg, size, 1);
                        else
-                               emulated = kvmppc_handle_load(run, vcpu,
+                               emulated = kvmppc_handle_load(vcpu,
                                             KVM_MMIO_REG_FPR|op.reg, size, 1);
 
                        if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
@@ -164,12 +163,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
                        if (size == 16) {
                                vcpu->arch.mmio_vmx_copy_nums = 2;
-                               emulated = kvmppc_handle_vmx_load(run,
-                                               vcpu, KVM_MMIO_REG_VMX|op.reg,
+                               emulated = kvmppc_handle_vmx_load(vcpu,
+                                               KVM_MMIO_REG_VMX|op.reg,
                                                8, 1);
                        } else {
                                vcpu->arch.mmio_vmx_copy_nums = 1;
-                               emulated = kvmppc_handle_vmx_load(run, vcpu,
+                               emulated = kvmppc_handle_vmx_load(vcpu,
                                                KVM_MMIO_REG_VMX|op.reg,
                                                size, 1);
                        }
@@ -217,7 +216,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                                io_size_each = op.element_size;
                        }
 
-                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                       emulated = kvmppc_handle_vsx_load(vcpu,
                                        KVM_MMIO_REG_VSX|op.reg, io_size_each,
                                        1, op.type & SIGNEXT);
                        break;
@@ -227,8 +226,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                        /* if need byte reverse, op.val has been reversed by
                         * analyse_instr().
                         */
-                       emulated = kvmppc_handle_store(run, vcpu, op.val,
-                                       size, 1);
+                       emulated = kvmppc_handle_store(vcpu, op.val, size, 1);
 
                        if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
                                kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
@@ -250,7 +248,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                        if (op.type & FPCONV)
                                vcpu->arch.mmio_sp64_extend = 1;
 
-                       emulated = kvmppc_handle_store(run, vcpu,
+                       emulated = kvmppc_handle_store(vcpu,
                                        VCPU_FPR(vcpu, op.reg), size, 1);
 
                        if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
@@ -290,12 +288,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
                        if (size == 16) {
                                vcpu->arch.mmio_vmx_copy_nums = 2;
-                               emulated = kvmppc_handle_vmx_store(run,
-                                               vcpu, op.reg, 8, 1);
+                               emulated = kvmppc_handle_vmx_store(vcpu,
+                                               op.reg, 8, 1);
                        } else {
                                vcpu->arch.mmio_vmx_copy_nums = 1;
-                               emulated = kvmppc_handle_vmx_store(run,
-                                               vcpu, op.reg, size, 1);
+                               emulated = kvmppc_handle_vmx_store(vcpu,
+                                               op.reg, size, 1);
                        }
 
                        break;
@@ -338,7 +336,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                                io_size_each = op.element_size;
                        }
 
-                       emulated = kvmppc_handle_vsx_store(run, vcpu,
+                       emulated = kvmppc_handle_vsx_store(vcpu,
                                        op.reg, io_size_each, 1);
                        break;
                }
index 27ccff6..dd7d141 100644 (file)
@@ -279,7 +279,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
 
-int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu)
 {
        enum emulation_result er;
        int r;
@@ -295,7 +295,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
                r = RESUME_GUEST;
                break;
        case EMULATE_DO_MMIO:
-               run->exit_reason = KVM_EXIT_MMIO;
+               vcpu->run->exit_reason = KVM_EXIT_MMIO;
                /* We must reload nonvolatiles because "update" load/store
                 * instructions modify register state. */
                /* Future optimization: only reload non-volatiles if they were
@@ -1107,9 +1107,9 @@ static inline u32 dp_to_sp(u64 fprd)
 #define dp_to_sp(x)    (x)
 #endif /* CONFIG_PPC_FPU */
 
-static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
-                                      struct kvm_run *run)
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        u64 uninitialized_var(gpr);
 
        if (run->mmio.len > sizeof(gpr)) {
@@ -1219,10 +1219,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
        }
 }
 
-static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int __kvmppc_handle_load(struct kvm_vcpu *vcpu,
                                unsigned int rt, unsigned int bytes,
                                int is_default_endian, int sign_extend)
 {
+       struct kvm_run *run = vcpu->run;
        int idx, ret;
        bool host_swabbed;
 
@@ -1256,7 +1257,7 @@ static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
        if (!ret) {
-               kvmppc_complete_mmio_load(vcpu, run);
+               kvmppc_complete_mmio_load(vcpu);
                vcpu->mmio_needed = 0;
                return EMULATE_DONE;
        }
@@ -1264,24 +1265,24 @@ static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return EMULATE_DO_MMIO;
 }
 
-int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_load(struct kvm_vcpu *vcpu,
                       unsigned int rt, unsigned int bytes,
                       int is_default_endian)
 {
-       return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0);
+       return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0);
 }
 EXPORT_SYMBOL_GPL(kvmppc_handle_load);
 
 /* Same as above, but sign extends */
-int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
                        unsigned int rt, unsigned int bytes,
                        int is_default_endian)
 {
-       return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
+       return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 1);
 }
 
 #ifdef CONFIG_VSX
-int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
                        unsigned int rt, unsigned int bytes,
                        int is_default_endian, int mmio_sign_extend)
 {
@@ -1292,13 +1293,13 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                return EMULATE_FAIL;
 
        while (vcpu->arch.mmio_vsx_copy_nums) {
-               emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+               emulated = __kvmppc_handle_load(vcpu, rt, bytes,
                        is_default_endian, mmio_sign_extend);
 
                if (emulated != EMULATE_DONE)
                        break;
 
-               vcpu->arch.paddr_accessed += run->mmio.len;
+               vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
 
                vcpu->arch.mmio_vsx_copy_nums--;
                vcpu->arch.mmio_vsx_offset++;
@@ -1307,9 +1308,10 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 }
 #endif /* CONFIG_VSX */
 
-int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_store(struct kvm_vcpu *vcpu,
                        u64 val, unsigned int bytes, int is_default_endian)
 {
+       struct kvm_run *run = vcpu->run;
        void *data = run->mmio.data;
        int idx, ret;
        bool host_swabbed;
@@ -1423,7 +1425,7 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
        return result;
 }
 
-int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
                        int rs, unsigned int bytes, int is_default_endian)
 {
        u64 val;
@@ -1439,13 +1441,13 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
                if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
                        return EMULATE_FAIL;
 
-               emulated = kvmppc_handle_store(run, vcpu,
+               emulated = kvmppc_handle_store(vcpu,
                         val, bytes, is_default_endian);
 
                if (emulated != EMULATE_DONE)
                        break;
 
-               vcpu->arch.paddr_accessed += run->mmio.len;
+               vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
 
                vcpu->arch.mmio_vsx_copy_nums--;
                vcpu->arch.mmio_vsx_offset++;
@@ -1454,19 +1456,19 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return emulated;
 }
 
-static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
-                       struct kvm_run *run)
+static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        enum emulation_result emulated = EMULATE_FAIL;
        int r;
 
        vcpu->arch.paddr_accessed += run->mmio.len;
 
        if (!vcpu->mmio_is_write) {
-               emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr,
+               emulated = kvmppc_handle_vsx_load(vcpu, vcpu->arch.io_gpr,
                         run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
        } else {
-               emulated = kvmppc_handle_vsx_store(run, vcpu,
+               emulated = kvmppc_handle_vsx_store(vcpu,
                         vcpu->arch.io_gpr, run->mmio.len, 1);
        }
 
@@ -1490,7 +1492,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
 #endif /* CONFIG_VSX */
 
 #ifdef CONFIG_ALTIVEC
-int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
                unsigned int rt, unsigned int bytes, int is_default_endian)
 {
        enum emulation_result emulated = EMULATE_DONE;
@@ -1499,13 +1501,13 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                return EMULATE_FAIL;
 
        while (vcpu->arch.mmio_vmx_copy_nums) {
-               emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+               emulated = __kvmppc_handle_load(vcpu, rt, bytes,
                                is_default_endian, 0);
 
                if (emulated != EMULATE_DONE)
                        break;
 
-               vcpu->arch.paddr_accessed += run->mmio.len;
+               vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
                vcpu->arch.mmio_vmx_copy_nums--;
                vcpu->arch.mmio_vmx_offset++;
        }
@@ -1585,7 +1587,7 @@ int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
        return result;
 }
 
-int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
                unsigned int rs, unsigned int bytes, int is_default_endian)
 {
        u64 val = 0;
@@ -1620,12 +1622,12 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        return EMULATE_FAIL;
                }
 
-               emulated = kvmppc_handle_store(run, vcpu, val, bytes,
+               emulated = kvmppc_handle_store(vcpu, val, bytes,
                                is_default_endian);
                if (emulated != EMULATE_DONE)
                        break;
 
-               vcpu->arch.paddr_accessed += run->mmio.len;
+               vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
                vcpu->arch.mmio_vmx_copy_nums--;
                vcpu->arch.mmio_vmx_offset++;
        }
@@ -1633,19 +1635,19 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return emulated;
 }
 
-static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
-               struct kvm_run *run)
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *run = vcpu->run;
        enum emulation_result emulated = EMULATE_FAIL;
        int r;
 
        vcpu->arch.paddr_accessed += run->mmio.len;
 
        if (!vcpu->mmio_is_write) {
-               emulated = kvmppc_handle_vmx_load(run, vcpu,
+               emulated = kvmppc_handle_vmx_load(vcpu,
                                vcpu->arch.io_gpr, run->mmio.len, 1);
        } else {
-               emulated = kvmppc_handle_vmx_store(run, vcpu,
+               emulated = kvmppc_handle_vmx_store(vcpu,
                                vcpu->arch.io_gpr, run->mmio.len, 1);
        }
 
@@ -1775,7 +1777,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        if (vcpu->mmio_needed) {
                vcpu->mmio_needed = 0;
                if (!vcpu->mmio_is_write)
-                       kvmppc_complete_mmio_load(vcpu, run);
+                       kvmppc_complete_mmio_load(vcpu);
 #ifdef CONFIG_VSX
                if (vcpu->arch.mmio_vsx_copy_nums > 0) {
                        vcpu->arch.mmio_vsx_copy_nums--;
@@ -1783,7 +1785,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                }
 
                if (vcpu->arch.mmio_vsx_copy_nums > 0) {
-                       r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
+                       r = kvmppc_emulate_mmio_vsx_loadstore(vcpu);
                        if (r == RESUME_HOST) {
                                vcpu->mmio_needed = 1;
                                goto out;
@@ -1797,7 +1799,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                }
 
                if (vcpu->arch.mmio_vmx_copy_nums > 0) {
-                       r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
+                       r = kvmppc_emulate_mmio_vmx_loadstore(vcpu);
                        if (r == RESUME_HOST) {
                                vcpu->mmio_needed = 1;
                                goto out;
@@ -1830,7 +1832,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        if (run->immediate_exit)
                r = -EINTR;
        else
-               r = kvmppc_vcpu_run(run, vcpu);
+               r = kvmppc_vcpu_run(vcpu);
 
        kvm_sigset_deactivate(vcpu);
 
index 8a1e3b0..4a61a97 100644 (file)
@@ -472,9 +472,9 @@ TRACE_EVENT(kvmppc_run_vcpu_enter,
 );
 
 TRACE_EVENT(kvmppc_run_vcpu_exit,
-       TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+       TP_PROTO(struct kvm_vcpu *vcpu),
 
-       TP_ARGS(vcpu, run),
+       TP_ARGS(vcpu),
 
        TP_STRUCT__entry(
                __field(int,            vcpu_id)
@@ -484,7 +484,7 @@ TRACE_EVENT(kvmppc_run_vcpu_exit,
 
        TP_fast_assign(
                __entry->vcpu_id  = vcpu->vcpu_id;
-               __entry->exit     = run->exit_reason;
+               __entry->exit     = vcpu->run->exit_reason;
                __entry->ret      = vcpu->arch.ret;
        ),
 
index 25db70b..266a6ca 100644 (file)
@@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window,
                return;
        }
 
-       use_mm(window->mm);
+       kthread_use_mm(window->mm);
        rc = copy_to_user(csb_addr, &csb, sizeof(csb));
        /*
         * User space polls on csb.flags (first byte). So add barrier
@@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window,
                smp_mb();
                rc = copy_to_user(csb_addr, &csb, sizeof(u8));
        }
-       unuse_mm(window->mm);
+       kthread_unuse_mm(window->mm);
        put_task_struct(tsk);
 
        /* Success */
index c733007..128192e 100644 (file)
@@ -12,64 +12,70 @@ config 32BIT
 
 config RISCV
        def_bool y
-       select OF
-       select OF_EARLY_FLATTREE
-       select OF_IRQ
+       select ARCH_CLOCKSOURCE_INIT
        select ARCH_HAS_BINFMT_FLAT
+       select ARCH_HAS_DEBUG_VIRTUAL if MMU
        select ARCH_HAS_DEBUG_WX
+       select ARCH_HAS_GCOV_PROFILE_ALL
+       select ARCH_HAS_GIGANTIC_PAGE
+       select ARCH_HAS_MMIOWB
+       select ARCH_HAS_PTE_SPECIAL
+       select ARCH_HAS_SET_DIRECT_MAP
+       select ARCH_HAS_SET_MEMORY
+       select ARCH_HAS_STRICT_KERNEL_RWX if MMU
+       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_FRAME_POINTERS
+       select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
        select CLONE_BACKWARDS
        select COMMON_CLK
+       select EDAC_SUPPORT
+       select GENERIC_ARCH_TOPOLOGY if SMP
+       select GENERIC_ATOMIC64 if !64BIT
        select GENERIC_CLOCKEVENTS
+       select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
+       select GENERIC_IOREMAP
+       select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_IRQ_SHOW
        select GENERIC_PCI_IOMAP
+       select GENERIC_PTDUMP if MMU
        select GENERIC_SCHED_CLOCK
+       select GENERIC_SMP_IDLE_THREAD
        select GENERIC_STRNCPY_FROM_USER if MMU
        select GENERIC_STRNLEN_USER if MMU
-       select GENERIC_SMP_IDLE_THREAD
-       select GENERIC_ATOMIC64 if !64BIT
-       select GENERIC_IOREMAP
-       select GENERIC_PTDUMP if MMU
+       select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+       select HANDLE_DOMAIN_IRQ
        select HAVE_ARCH_AUDITSYSCALL
+       select HAVE_ARCH_KASAN if MMU && 64BIT
+       select HAVE_ARCH_KGDB
+       select HAVE_ARCH_KGDB_QXFER_PKT
+       select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_SECCOMP_FILTER
+       select HAVE_ARCH_TRACEHOOK
        select HAVE_ASM_MODVERSIONS
+       select HAVE_COPY_THREAD_TLS
        select HAVE_DMA_CONTIGUOUS if MMU
+       select HAVE_EBPF_JIT if MMU
        select HAVE_FUTEX_CMPXCHG if FUTEX
+       select HAVE_GENERIC_VDSO if MMU && 64BIT
+       select HAVE_PCI
        select HAVE_PERF_EVENTS
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_DOMAIN
-       select SPARSE_IRQ
-       select SYSCTL_EXCEPTION_TRACE
-       select HAVE_ARCH_TRACEHOOK
-       select HAVE_PCI
        select MODULES_USE_ELF_RELA if MODULES
        select MODULE_SECTIONS if MODULES
-       select THREAD_INFO_IN_TASK
+       select OF
+       select OF_EARLY_FLATTREE
+       select OF_IRQ
        select PCI_DOMAINS_GENERIC if PCI
        select PCI_MSI if PCI
+       select RISCV_INTC
        select RISCV_TIMER
-       select GENERIC_IRQ_MULTI_HANDLER
-       select GENERIC_ARCH_TOPOLOGY if SMP
-       select ARCH_HAS_PTE_SPECIAL
-       select ARCH_HAS_MMIOWB
-       select ARCH_HAS_DEBUG_VIRTUAL if MMU
-       select HAVE_EBPF_JIT if MMU
-       select EDAC_SUPPORT
-       select ARCH_HAS_GIGANTIC_PAGE
-       select ARCH_HAS_SET_DIRECT_MAP
-       select ARCH_HAS_SET_MEMORY
-       select ARCH_HAS_STRICT_KERNEL_RWX if MMU
-       select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
        select SPARSEMEM_STATIC if 32BIT
-       select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
-       select HAVE_ARCH_MMAP_RND_BITS if MMU
-       select ARCH_HAS_GCOV_PROFILE_ALL
-       select HAVE_COPY_THREAD_TLS
-       select HAVE_ARCH_KASAN if MMU && 64BIT
-       select HAVE_ARCH_KGDB
-       select HAVE_ARCH_KGDB_QXFER_PKT
+       select SPARSE_IRQ
+       select SYSCTL_EXCEPTION_TRACE
+       select THREAD_INFO_IN_TASK
 
 config ARCH_MMAP_RND_BITS_MIN
        default 18 if 64BIT
@@ -196,11 +202,11 @@ config ARCH_RV64I
        bool "RV64I"
        select 64BIT
        select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
-       select HAVE_FUNCTION_TRACER
-       select HAVE_FUNCTION_GRAPH_TRACER
-       select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_DYNAMIC_FTRACE if MMU
        select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+       select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_FUNCTION_GRAPH_TRACER
+       select HAVE_FUNCTION_TRACER
        select SWIOTLB if MMU
 
 endchoice
diff --git a/arch/riscv/include/asm/clocksource.h b/arch/riscv/include/asm/clocksource.h
new file mode 100644 (file)
index 0000000..4821855
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif
index 6e1b0e0..9807ad1 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/linkage.h>
 
-#define NR_IRQS         0
-
-void riscv_timer_interrupt(void);
-void riscv_software_interrupt(void);
-
 #include <asm-generic/irq.h>
 
 #endif /* _ASM_RISCV_IRQ_H */
index 3ddb798..bdddcd5 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/const.h>
 
+#include <vdso/processor.h>
+
 #include <asm/ptrace.h>
 
 /*
@@ -58,16 +60,6 @@ static inline void release_thread(struct task_struct *dead_task)
 extern unsigned long get_wchan(struct task_struct *p);
 
 
-static inline void cpu_relax(void)
-{
-#ifdef __riscv_muldiv
-       int dummy;
-       /* In lieu of a halt instruction, induce a long-latency stall. */
-       __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
-#endif
-       barrier();
-}
-
 static inline void wait_for_interrupt(void)
 {
        __asm__ __volatile__ ("wfi");
@@ -75,6 +67,7 @@ static inline void wait_for_interrupt(void)
 
 struct device_node;
 int riscv_of_processor_hartid(struct device_node *node);
+int riscv_of_parent_hartid(struct device_node *node);
 
 extern void riscv_fill_hwcap(void);
 
index f4c7cfd..40bb1c1 100644 (file)
@@ -28,6 +28,9 @@ void show_ipi_stats(struct seq_file *p, int prec);
 /* SMP initialization hook for setup_arch */
 void __init setup_smp(void);
 
+/* Called from C code, this handles an IPI. */
+void handle_IPI(struct pt_regs *regs);
+
 /* Hook for the generic smp_call_function_many() routine. */
 void arch_send_call_function_ipi_mask(struct cpumask *mask);
 
index 7a7fce6..8454f74 100644 (file)
 
 #include <linux/types.h>
 
+#ifndef GENERIC_TIME_VSYSCALL
 struct vdso_data {
 };
+#endif
 
 /*
  * The VDSO symbols are mapped into Linux so we can just use regular symbol
diff --git a/arch/riscv/include/asm/vdso/clocksource.h b/arch/riscv/include/asm/vdso/clocksource.h
new file mode 100644 (file)
index 0000000..df6ea65
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSOCLOCKSOURCE_H
+#define __ASM_VDSOCLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES   \
+       VDSO_CLOCKMODE_ARCHTIMER
+
+#endif
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
new file mode 100644 (file)
index 0000000..c8e8186
--- /dev/null
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <asm/csr.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES  1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+                         struct timezone *_tz)
+{
+       register struct __kernel_old_timeval *tv asm("a0") = _tv;
+       register struct timezone *tz asm("a1") = _tz;
+       register long ret asm("a0");
+       register long nr asm("a7") = __NR_gettimeofday;
+
+       asm volatile ("ecall\n"
+                     : "=r" (ret)
+                     : "r"(tv), "r"(tz), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+       register clockid_t clkid asm("a0") = _clkid;
+       register struct __kernel_timespec *ts asm("a1") = _ts;
+       register long ret asm("a0");
+       register long nr asm("a7") = __NR_clock_gettime;
+
+       asm volatile ("ecall\n"
+                     : "=r" (ret)
+                     : "r"(clkid), "r"(ts), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+       register clockid_t clkid asm("a0") = _clkid;
+       register struct __kernel_timespec *ts asm("a1") = _ts;
+       register long ret asm("a0");
+       register long nr asm("a7") = __NR_clock_getres;
+
+       asm volatile ("ecall\n"
+                     : "=r" (ret)
+                     : "r"(clkid), "r"(ts), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+       /*
+        * The purpose of csr_read(CSR_TIME) is to trap the system into
+        * M-mode to obtain the value of CSR_TIME. Hence, unlike other
+        * architecture, no fence instructions surround the csr_read()
+        */
+       return csr_read(CSR_TIME);
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+       return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
new file mode 100644 (file)
index 0000000..82a5693
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+static inline void cpu_relax(void)
+{
+#ifdef __riscv_muldiv
+       int dummy;
+       /* In lieu of a halt instruction, induce a long-latency stall. */
+       __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
+#endif
+       barrier();
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/riscv/include/asm/vdso/vsyscall.h b/arch/riscv/include/asm/vdso/vsyscall.h
new file mode 100644 (file)
index 0000000..82fd5d8
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline struct vdso_data *__riscv_get_k_vdso_data(void)
+{
+       return vdso_data;
+}
+
+#define __arch_get_k_vdso_data __riscv_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
index 40a3c44..6d59e69 100644 (file)
@@ -44,6 +44,22 @@ int riscv_of_processor_hartid(struct device_node *node)
        return hart;
 }
 
+/*
+ * Find hart ID of the CPU DT node under which given DT node falls.
+ *
+ * To achieve this, we walk up the DT tree until we find an active
+ * RISC-V core (HART) node and extract the cpuid from it.
+ */
+int riscv_of_parent_hartid(struct device_node *node)
+{
+       for (; node; node = node->parent) {
+               if (of_device_is_compatible(node, "riscv"))
+                       return riscv_of_processor_hartid(node);
+       }
+
+       return -1;
+}
+
 #ifdef CONFIG_PROC_FS
 
 static void print_isa(struct seq_file *f, const char *isa)
index 56d071b..cae7e6d 100644 (file)
@@ -106,7 +106,9 @@ _save_context:
 
        /* Handle interrupts */
        move a0, sp /* pt_regs */
-       tail do_IRQ
+       la a1, handle_arch_irq
+       REG_L a1, (a1)
+       jr a1
 1:
        /*
         * Exceptions run with interrupts enabled or disabled depending on the
index 345c4f2..7207fa0 100644 (file)
@@ -7,7 +7,6 @@
 
 #include <linux/interrupt.h>
 #include <linux/irqchip.h>
-#include <linux/irqdomain.h>
 #include <linux/seq_file.h>
 #include <asm/smp.h>
 
@@ -17,37 +16,9 @@ int arch_show_interrupts(struct seq_file *p, int prec)
        return 0;
 }
 
-asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
-{
-       struct pt_regs *old_regs = set_irq_regs(regs);
-
-       irq_enter();
-       switch (regs->cause & ~CAUSE_IRQ_FLAG) {
-       case RV_IRQ_TIMER:
-               riscv_timer_interrupt();
-               break;
-#ifdef CONFIG_SMP
-       case RV_IRQ_SOFT:
-               /*
-                * We only use software interrupts to pass IPIs, so if a non-SMP
-                * system gets one, then we don't know what to do.
-                */
-               riscv_software_interrupt();
-               break;
-#endif
-       case RV_IRQ_EXT:
-               handle_arch_irq(regs);
-               break;
-       default:
-               pr_alert("unexpected interrupt cause 0x%lx", regs->cause);
-               BUG();
-       }
-       irq_exit();
-
-       set_irq_regs(old_regs);
-}
-
 void __init init_IRQ(void)
 {
        irqchip_init();
+       if (!handle_arch_irq)
+               panic("No interrupt controller found.");
 }
index 5805791..d4a64df 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/kprobes.h>
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
+#include <asm/patch.h>
 
 struct patch_insn {
        void *addr;
index a65a8fa..b1d4f45 100644 (file)
@@ -123,11 +123,14 @@ static inline void clear_ipi(void)
                clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
 }
 
-void riscv_software_interrupt(void)
+void handle_IPI(struct pt_regs *regs)
 {
+       struct pt_regs *old_regs = set_irq_regs(regs);
        unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
        unsigned long *stats = ipi_data[smp_processor_id()].stats;
 
+       irq_enter();
+
        clear_ipi();
 
        while (true) {
@@ -138,7 +141,7 @@ void riscv_software_interrupt(void)
 
                ops = xchg(pending_ipis, 0);
                if (ops == 0)
-                       return;
+                       goto done;
 
                if (ops & (1 << IPI_RESCHEDULE)) {
                        stats[IPI_RESCHEDULE]++;
@@ -160,6 +163,10 @@ void riscv_software_interrupt(void)
                /* Order data access and bit testing. */
                mb();
        }
+
+done:
+       irq_exit();
+       set_irq_regs(old_regs);
 }
 
 static const char * const ipi_names[] = {
index 6a53c02..4d3a104 100644 (file)
@@ -26,3 +26,12 @@ void __init time_init(void)
        lpj_fine = riscv_timebase / HZ;
        timer_probe();
 }
+
+void clocksource_arch_init(struct clocksource *cs)
+{
+#ifdef CONFIG_GENERIC_GETTIMEOFDAY
+       cs->vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER;
+#else
+       cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
+#endif
+}
index 5080fdf..ecec177 100644 (file)
@@ -183,6 +183,4 @@ void trap_init(void)
        csr_write(CSR_SCRATCH, 0);
        /* Set the exception vector address */
        csr_write(CSR_TVEC, &handle_exception);
-       /* Enable interrupts */
-       csr_write(CSR_IE, IE_SIE);
 }
index e827fae..6782042 100644 (file)
 #include <linux/slab.h>
 #include <linux/binfmts.h>
 #include <linux/err.h>
-
+#include <asm/page.h>
+#ifdef GENERIC_TIME_VSYSCALL
+#include <vdso/datapage.h>
+#else
 #include <asm/vdso.h>
+#endif
 
 extern char vdso_start[], vdso_end[];
 
@@ -26,7 +30,7 @@ static union {
        struct vdso_data        data;
        u8                      page[PAGE_SIZE];
 } vdso_data_store __page_aligned_data;
-static struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
 
 static int __init vdso_init(void)
 {
@@ -75,13 +79,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
         */
        mm->context.vdso = (void *)vdso_base;
 
-       ret = install_special_mapping(mm, vdso_base, vdso_len,
+       ret =
+          install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
                (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
                vdso_pagelist);
 
-       if (unlikely(ret))
+       if (unlikely(ret)) {
                mm->context.vdso = NULL;
+               goto end;
+       }
 
+       vdso_base += (vdso_pages << PAGE_SHIFT);
+       ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+               (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
+
+       if (unlikely(ret))
+               mm->context.vdso = NULL;
 end:
        mmap_write_unlock(mm);
        return ret;
@@ -91,5 +104,8 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 {
        if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
                return "[vdso]";
+       if (vma->vm_mm && (vma->vm_start ==
+                          (long)vma->vm_mm->context.vdso + PAGE_SIZE))
+               return "[vdso_data]";
        return NULL;
 }
index 4c8b2a4..38ba55b 100644 (file)
@@ -1,12 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0-only
 # Copied from arch/tile/kernel/vdso/Makefile
 
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_RISCV_32|R_RISCV_64|R_RISCV_JUMP_SLOT
+include $(srctree)/lib/vdso/Makefile
 # Symbols present in the vdso
 vdso-syms  = rt_sigreturn
 ifdef CONFIG_64BIT
-vdso-syms += gettimeofday
-vdso-syms += clock_gettime
-vdso-syms += clock_getres
+vdso-syms += vgettimeofday
 endif
 vdso-syms += getcpu
 vdso-syms += flush_icache
@@ -14,6 +16,10 @@ vdso-syms += flush_icache
 # Files to link into the vdso
 obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
 
+ifneq ($(c-gettimeofday-y),)
+  CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+endif
+
 # Build rules
 targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S
deleted file mode 100644 (file)
index 91378a5..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-       .text
-/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__vdso_clock_getres)
-       .cfi_startproc
-       /* For now, just do the syscall. */
-       li a7, __NR_clock_getres
-       ecall
-       ret
-       .cfi_endproc
-ENDPROC(__vdso_clock_getres)
diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S
deleted file mode 100644 (file)
index 5371fd9..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-       .text
-/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__vdso_clock_gettime)
-       .cfi_startproc
-       /* For now, just do the syscall. */
-       li a7, __NR_clock_gettime
-       ecall
-       ret
-       .cfi_endproc
-ENDPROC(__vdso_clock_gettime)
diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S
deleted file mode 100644 (file)
index e6fb8af..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-       .text
-/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__vdso_gettimeofday)
-       .cfi_startproc
-       /* For now, just do the syscall. */
-       li a7, __NR_gettimeofday
-       ecall
-       ret
-       .cfi_endproc
-ENDPROC(__vdso_gettimeofday)
index f66a091..e6f558b 100644 (file)
@@ -2,11 +2,13 @@
 /*
  * Copyright (C) 2012 Regents of the University of California
  */
+#include <asm/page.h>
 
 OUTPUT_ARCH(riscv)
 
 SECTIONS
 {
+       PROVIDE(_vdso_data = . + PAGE_SIZE);
        . = SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c
new file mode 100644 (file)
index 0000000..d264943
--- /dev/null
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from arch/arm64/kernel/vdso/vgettimeofday.c
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+       return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
+{
+       return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res)
+{
+       return __cvdso_clock_getres(clock_id, res);
+}
index 9996f49..f4adb36 100644 (file)
@@ -480,17 +480,6 @@ static void __init setup_vm_final(void)
        csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
        local_flush_tlb_all();
 }
-
-void free_initmem(void)
-{
-       unsigned long init_begin = (unsigned long)__init_begin;
-       unsigned long init_end = (unsigned long)__init_end;
-
-       /* Make the region as non-execuatble. */
-       set_memory_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
-       free_initmem_default(POISON_FREE_INITMEM);
-}
-
 #else
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
index 3d55488..cee3cb6 100644 (file)
@@ -978,7 +978,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
                               struct kvm_async_pf *work);
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                                     struct kvm_async_pf *work);
 
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
index d0ff26d..d47c197 100644 (file)
@@ -3923,11 +3923,13 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
        }
 }
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                                     struct kvm_async_pf *work)
 {
        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+
+       return true;
 }
 
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
index 10dae8b..d41812a 100644 (file)
@@ -233,6 +233,7 @@ config X86
        select THREAD_INFO_IN_TASK
        select USER_STACKTRACE_SUPPORT
        select VIRT_TO_BUS
+       select HAVE_ARCH_KCSAN                  if X86_64
        select X86_FEATURE_NAMES                if PROC_FS
        select PROC_PID_ARCH_STATUS             if PROC_FS
        imply IMA_SECURE_AND_OR_TRUSTED_BOOT    if EFI
@@ -822,14 +823,6 @@ config PVH
          This option enables the PVH entry point for guest virtual machines
          as specified in the x86/HVM direct boot ABI.
 
-config KVM_DEBUG_FS
-       bool "Enable debug information for KVM Guests in debugfs"
-       depends on KVM_GUEST && DEBUG_FS
-       ---help---
-         This option enables collection of various statistics for KVM guest.
-         Statistics are displayed in debugfs filesystem. Enabling this option
-         may incur significant overhead.
-
 config PARAVIRT_TIME_ACCOUNTING
        bool "Paravirtual steal time accounting"
        depends on PARAVIRT
index 4c53556..fe60520 100644 (file)
@@ -9,7 +9,9 @@
 # Changed by many, many contributors over the years.
 #
 
+# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
 KASAN_SANITIZE                 := n
+KCSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
 
 # Kernel does not boot with kcov instrumentation here.
index 5f7c262..7619742 100644 (file)
@@ -17,7 +17,9 @@
 #      (see scripts/Makefile.lib size_append)
 #      compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
 
+# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
 KASAN_SANITIZE                 := n
+KCSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
index 54e03ab..04e65f0 100644 (file)
@@ -10,8 +10,11 @@ ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
 include $(srctree)/lib/vdso/Makefile
 
 KBUILD_CFLAGS += $(DISABLE_LTO)
+
+# Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE                 := n
 UBSAN_SANITIZE                 := n
+KCSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
@@ -29,6 +32,9 @@ vobjs32-y += vdso32/vclock_gettime.o
 
 # files to link into kernel
 obj-y                          += vma.o
+KASAN_SANITIZE_vma.o           := y
+UBSAN_SANITIZE_vma.o           := y
+KCSAN_SANITIZE_vma.o           := y
 OBJECT_FILES_NON_STANDARD_vma.o        := n
 
 # vDSO images to build
index 115127c..a9ae588 100644 (file)
@@ -28,7 +28,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
         * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
         * it's non-inlined function that increases binary size and stack usage.
         */
-       return READ_ONCE((v)->counter);
+       return __READ_ONCE((v)->counter);
 }
 
 /**
@@ -40,7 +40,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
  */
 static __always_inline void arch_atomic_set(atomic_t *v, int i)
 {
-       WRITE_ONCE(v->counter, i);
+       __WRITE_ONCE(v->counter, i);
 }
 
 /**
@@ -166,6 +166,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
 {
        return i + xadd(&v->counter, i);
 }
+#define arch_atomic_add_return arch_atomic_add_return
 
 /**
  * arch_atomic_sub_return - subtract integer and return
@@ -178,32 +179,37 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
 {
        return arch_atomic_add_return(-i, v);
 }
+#define arch_atomic_sub_return arch_atomic_sub_return
 
 static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
 {
        return xadd(&v->counter, i);
 }
+#define arch_atomic_fetch_add arch_atomic_fetch_add
 
 static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
 {
        return xadd(&v->counter, -i);
 }
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
 
 static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
        return arch_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
 
-#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
 static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
        return try_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
 
 static inline int arch_atomic_xchg(atomic_t *v, int new)
 {
        return arch_xchg(&v->counter, new);
 }
+#define arch_atomic_xchg arch_atomic_xchg
 
 static inline void arch_atomic_and(int i, atomic_t *v)
 {
@@ -221,6 +227,7 @@ static inline int arch_atomic_fetch_and(int i, atomic_t *v)
 
        return val;
 }
+#define arch_atomic_fetch_and arch_atomic_fetch_and
 
 static inline void arch_atomic_or(int i, atomic_t *v)
 {
@@ -238,6 +245,7 @@ static inline int arch_atomic_fetch_or(int i, atomic_t *v)
 
        return val;
 }
+#define arch_atomic_fetch_or arch_atomic_fetch_or
 
 static inline void arch_atomic_xor(int i, atomic_t *v)
 {
@@ -255,6 +263,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 
        return val;
 }
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
 
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
@@ -262,6 +271,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 # include <asm/atomic64_64.h>
 #endif
 
-#include <asm-generic/atomic-instrumented.h>
+#define ARCH_ATOMIC
 
 #endif /* _ASM_X86_ATOMIC_H */
index 52cfaec..5efd01b 100644 (file)
@@ -75,6 +75,7 @@ static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
        return arch_cmpxchg64(&v->counter, o, n);
 }
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
 /**
  * arch_atomic64_xchg - xchg atomic64 variable
@@ -94,6 +95,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
                             : "memory");
        return o;
 }
+#define arch_atomic64_xchg arch_atomic64_xchg
 
 /**
  * arch_atomic64_set - set atomic64 variable
@@ -138,6 +140,7 @@ static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
                             ASM_NO_INPUT_CLOBBER("memory"));
        return i;
 }
+#define arch_atomic64_add_return arch_atomic64_add_return
 
 /*
  * Other variants with different arithmetic operators:
@@ -149,6 +152,7 @@ static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
                             ASM_NO_INPUT_CLOBBER("memory"));
        return i;
 }
+#define arch_atomic64_sub_return arch_atomic64_sub_return
 
 static inline s64 arch_atomic64_inc_return(atomic64_t *v)
 {
@@ -242,6 +246,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
                             "S" (v) : "memory");
        return (int)a;
 }
+#define arch_atomic64_add_unless arch_atomic64_add_unless
 
 static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 {
@@ -281,6 +286,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
 
        return old;
 }
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
 
 static inline void arch_atomic64_or(s64 i, atomic64_t *v)
 {
@@ -299,6 +305,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
 
        return old;
 }
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
 
 static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
 {
@@ -317,6 +324,7 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
 
        return old;
 }
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
 
 static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
@@ -327,6 +335,7 @@ static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 
        return old;
 }
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
 
 #define arch_atomic64_fetch_sub(i, v)  arch_atomic64_fetch_add(-(i), (v))
 
index 95c6cea..809bd01 100644 (file)
@@ -19,7 +19,7 @@
  */
 static inline s64 arch_atomic64_read(const atomic64_t *v)
 {
-       return READ_ONCE((v)->counter);
+       return __READ_ONCE((v)->counter);
 }
 
 /**
@@ -31,7 +31,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v)
  */
 static inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
-       WRITE_ONCE(v->counter, i);
+       __WRITE_ONCE(v->counter, i);
 }
 
 /**
@@ -159,37 +159,43 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
 {
        return i + xadd(&v->counter, i);
 }
+#define arch_atomic64_add_return arch_atomic64_add_return
 
 static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
 {
        return arch_atomic64_add_return(-i, v);
 }
+#define arch_atomic64_sub_return arch_atomic64_sub_return
 
 static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
        return xadd(&v->counter, i);
 }
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
 
 static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
        return xadd(&v->counter, -i);
 }
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
 
 static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
        return arch_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
-#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
 static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
        return try_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
 
 static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
 {
        return arch_xchg(&v->counter, new);
 }
+#define arch_atomic64_xchg arch_atomic64_xchg
 
 static inline void arch_atomic64_and(s64 i, atomic64_t *v)
 {
@@ -207,6 +213,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
        } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
        return val;
 }
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
 
 static inline void arch_atomic64_or(s64 i, atomic64_t *v)
 {
@@ -224,6 +231,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
        } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
        return val;
 }
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
 
 static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
 {
@@ -241,5 +249,6 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
        } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
        return val;
 }
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
 
 #endif /* _ASM_X86_ATOMIC64_64_H */
index 0367efd..35460fe 100644 (file)
@@ -201,8 +201,12 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr)
        return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
-static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
+static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
 {
+       /*
+        * Because this is a plain access, we need to disable KCSAN here to
+        * avoid double instrumentation via instrumented bitops.
+        */
        return ((1UL << (nr & (BITS_PER_LONG-1))) &
                (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
index 8f1e94f..a338a6d 100644 (file)
@@ -89,6 +89,8 @@
 #define INTEL_FAM6_COMETLAKE           0xA5
 #define INTEL_FAM6_COMETLAKE_L         0xA6
 
+#define INTEL_FAM6_SAPPHIRERAPIDS_X    0x8F
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL                0x1C /* Diamondville, Pineview */
index 1da5858..f8998e9 100644 (file)
@@ -1306,7 +1306,6 @@ struct kvm_arch_async_pf {
 extern u64 __read_mostly host_efer;
 
 extern struct kvm_x86_ops kvm_x86_ops;
-extern struct kmem_cache *x86_fpu_cache;
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
@@ -1671,7 +1670,7 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
                                       unsigned long *vcpu_bitmap);
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                                     struct kvm_async_pf *work);
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                                 struct kvm_async_pf *work);
index 9a6dc9b..fb81fea 100644 (file)
@@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
        return __vdso_data;
 }
 
+static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
+{
+       return true;
+}
+#define vdso_clocksource_ok arch_vdso_clocksource_ok
+
+/*
+ * Clocksource read value validation to handle PV and HyperV clocksources
+ * which can be invalidated asynchronously and indicate invalidation by
+ * returning U64_MAX, which can be effectively tested by checking for a
+ * negative value after casting it to s64.
+ */
+static inline bool arch_vdso_cycles_ok(u64 cycles)
+{
+       return (s64)cycles >= 0;
+}
+#define vdso_cycles_ok arch_vdso_cycles_ok
+
 /*
  * x86 specific delta calculation.
  *
index 8ef4369..e77261d 100644 (file)
@@ -28,6 +28,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o                   := n
 KASAN_SANITIZE_stacktrace.o                            := n
 KASAN_SANITIZE_paravirt.o                              := n
 
+# With some compiler versions the generated code results in boot hangs, caused
+# by several compilation units. To be safe, disable all instrumentation.
+KCSAN_SANITIZE := n
+
 OBJECT_FILES_NON_STANDARD_test_nx.o                    := y
 OBJECT_FILES_NON_STANDARD_paravirt_patch.o             := y
 
index 4b1d31b..bf4acb0 100644 (file)
@@ -2060,7 +2060,7 @@ void __init init_apic_mappings(void)
        unsigned int new_apicid;
 
        if (apic_validate_deadline_timer())
-               pr_debug("TSC deadline timer available\n");
+               pr_info("TSC deadline timer available\n");
 
        if (x2apic_mode) {
                boot_cpu_physical_apicid = read_apic_id();
index 7dc4ad6..dba6a83 100644 (file)
@@ -13,6 +13,9 @@ endif
 KCOV_INSTRUMENT_common.o := n
 KCOV_INSTRUMENT_perf_event.o := n
 
+# As above, instrumenting secondary CPU boot code causes boot hangs.
+KCSAN_SANITIZE_common.o := n
+
 # Make sure load_percpu_segment has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_common.o                := $(nostackp)
index b6f887b..0b71970 100644 (file)
@@ -588,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline);
 static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
        SPECTRE_V2_NONE;
 
-static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+       SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
        SPECTRE_V2_USER_NONE;
 
 #ifdef CONFIG_RETPOLINE
@@ -734,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
                break;
        }
 
-       /*
-        * At this point, an STIBP mode other than "off" has been set.
-        * If STIBP support is not being forced, check if STIBP always-on
-        * is preferred.
-        */
-       if (mode != SPECTRE_V2_USER_STRICT &&
-           boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
-               mode = SPECTRE_V2_USER_STRICT_PREFERRED;
-
        /* Initialize Indirect Branch Prediction Barrier */
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
@@ -765,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
                pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
                        static_key_enabled(&switch_mm_always_ibpb) ?
                        "always-on" : "conditional");
+
+               spectre_v2_user_ibpb = mode;
        }
 
-       /* If enhanced IBRS is enabled no STIBP required */
-       if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+       /*
+        * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+        * required.
+        */
+       if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
                return;
 
        /*
-        * If SMT is not possible or STIBP is not available clear the STIBP
-        * mode.
+        * At this point, an STIBP mode other than "off" has been set.
+        * If STIBP support is not being forced, check if STIBP always-on
+        * is preferred.
         */
-       if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+       if (mode != SPECTRE_V2_USER_STRICT &&
+           boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+               mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
+       /*
+        * If STIBP is not available, clear the STIBP mode.
+        */
+       if (!boot_cpu_has(X86_FEATURE_STIBP))
                mode = SPECTRE_V2_USER_NONE;
+
+       spectre_v2_user_stibp = mode;
+
 set_mode:
-       spectre_v2_user = mode;
-       /* Only print the STIBP mode when SMT possible */
-       if (smt_possible)
-               pr_info("%s\n", spectre_v2_user_strings[mode]);
+       pr_info("%s\n", spectre_v2_user_strings[mode]);
 }
 
 static const char * const spectre_v2_strings[] = {
@@ -1014,7 +1020,7 @@ void cpu_bugs_smt_update(void)
 {
        mutex_lock(&spec_ctrl_mutex);
 
-       switch (spectre_v2_user) {
+       switch (spectre_v2_user_stibp) {
        case SPECTRE_V2_USER_NONE:
                break;
        case SPECTRE_V2_USER_STRICT:
@@ -1257,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
        switch (ctrl) {
        case PR_SPEC_ENABLE:
-               if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
                        return 0;
                /*
                 * Indirect branch speculation is always disabled in strict
-                * mode.
+                * mode. It can neither be enabled if it was force-disabled
+                * by a  previous prctl call.
+
                 */
-               if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
-                   spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+                   task_spec_ib_force_disable(task))
                        return -EPERM;
                task_clear_spec_ib_disable(task);
                task_update_spec_tif(task);
@@ -1275,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
                 * Indirect branch speculation is always allowed when
                 * mitigation is force disabled.
                 */
-               if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
                        return -EPERM;
-               if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
-                   spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
                        return 0;
                task_set_spec_ib_disable(task);
                if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1309,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
 {
        if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
                ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
-       if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+       if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP)
                ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
 }
 #endif
@@ -1340,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task)
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return PR_SPEC_NOT_AFFECTED;
 
-       switch (spectre_v2_user) {
-       case SPECTRE_V2_USER_NONE:
+       if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+           spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
                return PR_SPEC_ENABLE;
-       case SPECTRE_V2_USER_PRCTL:
-       case SPECTRE_V2_USER_SECCOMP:
+       else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+               return PR_SPEC_DISABLE;
+       else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+           spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
                if (task_spec_ib_force_disable(task))
                        return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
                if (task_spec_ib_disable(task))
                        return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
                return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
-       case SPECTRE_V2_USER_STRICT:
-       case SPECTRE_V2_USER_STRICT_PREFERRED:
-               return PR_SPEC_DISABLE;
-       default:
+       } else
                return PR_SPEC_NOT_AFFECTED;
-       }
 }
 
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
@@ -1594,7 +1610,7 @@ static char *stibp_state(void)
        if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
                return "";
 
-       switch (spectre_v2_user) {
+       switch (spectre_v2_user_stibp) {
        case SPECTRE_V2_USER_NONE:
                return ", STIBP: disabled";
        case SPECTRE_V2_USER_STRICT:
index 63926c9..c25a67a 100644 (file)
@@ -1142,9 +1142,12 @@ void switch_to_sld(unsigned long tifn)
 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           0),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           0),
+       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           0),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,        1),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      1),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      1),
+       X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         1),
+       X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           1),
        {}
 };
 
index 4d13c57..983cd53 100644 (file)
@@ -991,7 +991,15 @@ void __init e820__reserve_setup_data(void)
        while (pa_data) {
                data = early_memremap(pa_data, sizeof(*data));
                e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
-               e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+
+               /*
+                * SETUP_EFI is supplied by kexec and does not need to be
+                * reserved.
+                */
+               if (data->type != SETUP_EFI)
+                       e820__range_update_kexec(pa_data,
+                                                sizeof(*data) + data->len,
+                                                E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
 
                if (data->type == SETUP_INDIRECT &&
                    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
index d6f22a3..7e6403a 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/kprobes.h>
-#include <linux/debugfs.h>
 #include <linux/nmi.h>
 #include <linux/swait.h>
 #include <asm/timer.h>
index 8e3d034..f362ce0 100644 (file)
@@ -545,28 +545,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 
        lockdep_assert_irqs_disabled();
 
-       /*
-        * If TIF_SSBD is different, select the proper mitigation
-        * method. Note that if SSBD mitigation is disabled or permanentely
-        * enabled this branch can't be taken because nothing can set
-        * TIF_SSBD.
-        */
-       if (tif_diff & _TIF_SSBD) {
-               if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+       /* Handle change of TIF_SSBD depending on the mitigation method. */
+       if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+               if (tif_diff & _TIF_SSBD)
                        amd_set_ssb_virt_state(tifn);
-               } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+       } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+               if (tif_diff & _TIF_SSBD)
                        amd_set_core_ssb_state(tifn);
-               } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
-                          static_cpu_has(X86_FEATURE_AMD_SSBD)) {
-                       msr |= ssbd_tif_to_spec_ctrl(tifn);
-                       updmsr  = true;
-               }
+       } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+                  static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+               updmsr |= !!(tif_diff & _TIF_SSBD);
+               msr |= ssbd_tif_to_spec_ctrl(tifn);
        }
 
-       /*
-        * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
-        * otherwise avoid the MSR write.
-        */
+       /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */
        if (IS_ENABLED(CONFIG_SMP) &&
            static_branch_unlikely(&switch_to_cond_stibp)) {
                updmsr |= !!(tif_diff & _TIF_SPEC_IB);
index e040ba6..0ec7ced 100644 (file)
@@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
                },
        },
+       {       /* Handle problems with rebooting on Apple MacBook6,1 */
+               .callback = set_pci_reboot,
+               .ident = "Apple MacBook6,1",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"),
+               },
+       },
        {       /* Handle problems with rebooting on Apple MacBookPro5 */
                .callback = set_pci_reboot,
                .ident = "Apple MacBookPro5",
index 371a6b3..e42faa7 100644 (file)
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-#endif
-
 unsigned long profile_pc(struct pt_regs *regs)
 {
        unsigned long pc = instruction_pointer(regs);
index 1bf7e31..7c35556 100644 (file)
@@ -40,13 +40,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
 #ifdef CONFIG_X86_32
 OUTPUT_ARCH(i386)
 ENTRY(phys_startup_32)
-jiffies = jiffies_64;
 #else
 OUTPUT_ARCH(i386:x86-64)
 ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
 #endif
 
+jiffies = jiffies_64;
+
 #if defined(CONFIG_X86_64)
 /*
  * On 64-bit, align RODATA to 2MB so we retain large page mappings for
index 253b8e8..8a294f9 100644 (file)
@@ -181,17 +181,14 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
        r = -E2BIG;
        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
                goto out;
-       r = -ENOMEM;
        if (cpuid->nent) {
-               cpuid_entries =
-                       vmalloc(array_size(sizeof(struct kvm_cpuid_entry),
-                                          cpuid->nent));
-               if (!cpuid_entries)
-                       goto out;
-               r = -EFAULT;
-               if (copy_from_user(cpuid_entries, entries,
-                                  cpuid->nent * sizeof(struct kvm_cpuid_entry)))
+               cpuid_entries = vmemdup_user(entries,
+                                            array_size(sizeof(struct kvm_cpuid_entry),
+                                                       cpuid->nent));
+               if (IS_ERR(cpuid_entries)) {
+                       r = PTR_ERR(cpuid_entries);
                        goto out;
+               }
        }
        for (i = 0; i < cpuid->nent; i++) {
                vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
@@ -211,8 +208,8 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
        kvm_x86_ops.cpuid_update(vcpu);
        r = kvm_update_cpuid(vcpu);
 
+       kvfree(cpuid_entries);
 out:
-       vfree(cpuid_entries);
        return r;
 }
 
@@ -325,7 +322,7 @@ void kvm_set_cpu_caps(void)
        );
 
        kvm_cpu_cap_mask(CPUID_7_ECX,
-               F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
+               F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
                F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
@@ -334,6 +331,13 @@ void kvm_set_cpu_caps(void)
        if (cpuid_ecx(7) & F(LA57))
                kvm_cpu_cap_set(X86_FEATURE_LA57);
 
+       /*
+        * PKU not yet implemented for shadow paging and requires OSPKE
+        * to be set on the host. Clear it if that is not the case
+        */
+       if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+               kvm_cpu_cap_clear(X86_FEATURE_PKU);
+
        kvm_cpu_cap_mask(CPUID_7_EDX,
                F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
                F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
@@ -426,7 +430,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
 
 struct kvm_cpuid_array {
        struct kvm_cpuid_entry2 *entries;
-       const int maxnent;
+       int maxnent;
        int nent;
 };
 
@@ -870,7 +874,6 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
 
        struct kvm_cpuid_array array = {
                .nent = 0,
-               .maxnent = cpuid->nent,
        };
        int r, i;
 
@@ -887,6 +890,8 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
        if (!array.entries)
                return -ENOMEM;
 
+       array.maxnent = cpuid->nent;
+
        for (i = 0; i < ARRAY_SIZE(funcs); i++) {
                r = get_cpuid_func(&array, funcs[i], type);
                if (r)
index 018aebc..7e818d6 100644 (file)
@@ -43,22 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
 
-void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
 {
-       debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu,
+       debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu,
                            &vcpu_tsc_offset_fops);
 
        if (lapic_in_kernel(vcpu))
                debugfs_create_file("lapic_timer_advance_ns", 0444,
-                                   vcpu->debugfs_dentry, vcpu,
+                                   debugfs_dentry, vcpu,
                                    &vcpu_timer_advance_ns_fops);
 
        if (kvm_has_tsc_control) {
                debugfs_create_file("tsc-scaling-ratio", 0444,
-                                   vcpu->debugfs_dentry, vcpu,
+                                   debugfs_dentry, vcpu,
                                    &vcpu_tsc_scaling_fops);
                debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
-                                   vcpu->debugfs_dentry, vcpu,
+                                   debugfs_dentry, vcpu,
                                    &vcpu_tsc_scaling_frac_fops);
        }
 }
index de5476f..d0e2825 100644 (file)
@@ -4800,8 +4800,12 @@ static const struct opcode twobyte_table[256] = {
        GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
        GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
        N, N, N, N, N, N,
-       D(ImplicitOps | ModRM | SrcMem | NoAccess),
-       N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
+       D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
+       D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
+       D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
+       D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
+       D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
+       D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
        /* 0x20 - 0x2F */
        DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
        DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
index febca33..a6e218c 100644 (file)
@@ -462,7 +462,6 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
                if (channel == 3) {
                        /* Read-Back Command. */
                        for (channel = 0; channel < 3; channel++) {
-                               s = &pit_state->channels[channel];
                                if (val & (2 << channel)) {
                                        if (!(val & 0x20))
                                                pit_latch_count(pit, channel);
index 8a6db11..6bceafb 100644 (file)
@@ -258,7 +258,7 @@ void sync_nested_vmcb_control(struct vcpu_svm *svm)
        /* Only a few fields of int_ctl are written by the processor.  */
        mask = V_IRQ_MASK | V_TPR_MASK;
        if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
-           is_intercept(svm, SVM_EXIT_VINTR)) {
+           is_intercept(svm, INTERCEPT_VINTR)) {
                /*
                 * In order to request an interrupt window, L0 is usurping
                 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
index 9e333b9..c8f5e87 100644 (file)
@@ -1378,6 +1378,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
        /* Drop int_ctl fields related to VINTR injection.  */
        svm->vmcb->control.int_ctl &= mask;
        if (is_guest_mode(&svm->vcpu)) {
+               svm->nested.hsave->control.int_ctl &= mask;
+
                WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
                        (svm->nested.ctl.int_ctl & V_TPR_MASK));
                svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
@@ -1999,7 +2001,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
                 */
                if (vgif_enabled(svm))
                        clr_intercept(svm, INTERCEPT_STGI);
-               if (is_intercept(svm, SVM_EXIT_VINTR))
+               if (is_intercept(svm, INTERCEPT_VINTR))
                        svm_clear_vintr(svm);
 
                enable_gif(svm);
index 9c74a73..adb11b5 100644 (file)
@@ -4624,19 +4624,24 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
        }
 }
 
-static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
+                               int *ret)
 {
        gva_t gva;
        struct x86_exception e;
+       int r;
 
        if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                                vmcs_read32(VMX_INSTRUCTION_INFO), false,
-                               sizeof(*vmpointer), &gva))
-               return 1;
+                               sizeof(*vmpointer), &gva)) {
+               *ret = 1;
+               return -EINVAL;
+       }
 
-       if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
-               kvm_inject_emulated_page_fault(vcpu, &e);
-               return 1;
+       r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
+       if (r != X86EMUL_CONTINUE) {
+               *ret = vmx_handle_memory_failure(vcpu, r, &e);
+               return -EINVAL;
        }
 
        return 0;
@@ -4764,8 +4769,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       if (nested_vmx_get_vmptr(vcpu, &vmptr))
-               return 1;
+       if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
+               return ret;
 
        /*
         * SDM 3: 24.11.5
@@ -4838,12 +4843,13 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
        u32 zero = 0;
        gpa_t vmptr;
        u64 evmcs_gpa;
+       int r;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (nested_vmx_get_vmptr(vcpu, &vmptr))
-               return 1;
+       if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
+               return r;
 
        if (!page_address_valid(vcpu, vmptr))
                return nested_vmx_failValid(vcpu,
@@ -4902,7 +4908,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
        u64 value;
        gva_t gva = 0;
        short offset;
-       int len;
+       int len, r;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
@@ -4943,10 +4949,9 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
                                        instr_info, true, len, &gva))
                        return 1;
                /* _system ok, nested_vmx_check_permission has verified cpl=0 */
-               if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) {
-                       kvm_inject_emulated_page_fault(vcpu, &e);
-                       return 1;
-               }
+               r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
+               if (r != X86EMUL_CONTINUE)
+                       return vmx_handle_memory_failure(vcpu, r, &e);
        }
 
        return nested_vmx_succeed(vcpu);
@@ -4987,7 +4992,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
        unsigned long field;
        short offset;
        gva_t gva;
-       int len;
+       int len, r;
 
        /*
         * The value to write might be 32 or 64 bits, depending on L1's long
@@ -5017,10 +5022,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
                if (get_vmx_mem_address(vcpu, exit_qualification,
                                        instr_info, false, len, &gva))
                        return 1;
-               if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) {
-                       kvm_inject_emulated_page_fault(vcpu, &e);
-                       return 1;
-               }
+               r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
+               if (r != X86EMUL_CONTINUE)
+                       return vmx_handle_memory_failure(vcpu, r, &e);
        }
 
        field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
@@ -5103,12 +5107,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        gpa_t vmptr;
+       int r;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (nested_vmx_get_vmptr(vcpu, &vmptr))
-               return 1;
+       if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
+               return r;
 
        if (!page_address_valid(vcpu, vmptr))
                return nested_vmx_failValid(vcpu,
@@ -5170,6 +5175,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
        struct x86_exception e;
        gva_t gva;
+       int r;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
@@ -5181,11 +5187,11 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
                                true, sizeof(gpa_t), &gva))
                return 1;
        /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
-       if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
-                                       sizeof(gpa_t), &e)) {
-               kvm_inject_emulated_page_fault(vcpu, &e);
-               return 1;
-       }
+       r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
+                                       sizeof(gpa_t), &e);
+       if (r != X86EMUL_CONTINUE)
+               return vmx_handle_memory_failure(vcpu, r, &e);
+
        return nested_vmx_succeed(vcpu);
 }
 
@@ -5209,7 +5215,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        struct {
                u64 eptp, gpa;
        } operand;
-       int i;
+       int i, r;
 
        if (!(vmx->nested.msrs.secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_EPT) ||
@@ -5236,10 +5242,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                        vmx_instruction_info, false, sizeof(operand), &gva))
                return 1;
-       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-               kvm_inject_emulated_page_fault(vcpu, &e);
-               return 1;
-       }
+       r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
+       if (r != X86EMUL_CONTINUE)
+               return vmx_handle_memory_failure(vcpu, r, &e);
 
        /*
         * Nested EPT roots are always held through guest_mmu,
@@ -5291,6 +5296,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
                u64 gla;
        } operand;
        u16 vpid02;
+       int r;
 
        if (!(vmx->nested.msrs.secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_VPID) ||
@@ -5318,10 +5324,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                        vmx_instruction_info, false, sizeof(operand), &gva))
                return 1;
-       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-               kvm_inject_emulated_page_fault(vcpu, &e);
-               return 1;
-       }
+       r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
+       if (r != X86EMUL_CONTINUE)
+               return vmx_handle_memory_failure(vcpu, r, &e);
+
        if (operand.vpid >> 16)
                return nested_vmx_failValid(vcpu,
                        VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
@@ -5666,7 +5672,7 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
 {
        u32 intr_info;
 
-       switch (exit_reason) {
+       switch ((u16)exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
                intr_info = vmx_get_intr_info(vcpu);
                if (is_nmi(intr_info))
@@ -5727,7 +5733,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
        u32 intr_info;
 
-       switch (exit_reason) {
+       switch ((u16)exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
                intr_info = vmx_get_intr_info(vcpu);
                if (is_nmi(intr_info))
index d33d890..bdcce65 100644 (file)
@@ -181,7 +181,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
                ret = pmu->version > 1;
                break;
        case MSR_IA32_PERF_CAPABILITIES:
-               ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM);
+               ret = 1;
                break;
        default:
                ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
index 170cc76..08e26a9 100644 (file)
@@ -1600,6 +1600,32 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+/*
+ * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns
+ * KVM_EXIT_INTERNAL_ERROR for cases not currently handled by KVM. Return value
+ * indicates whether exit to userspace is needed.
+ */
+int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+                             struct x86_exception *e)
+{
+       if (r == X86EMUL_PROPAGATE_FAULT) {
+               kvm_inject_emulated_page_fault(vcpu, e);
+               return 1;
+       }
+
+       /*
+        * In case kvm_read/write_guest_virt*() failed with X86EMUL_IO_NEEDED
+        * while handling a VMX instruction KVM could've handled the request
+        * correctly by exiting to userspace and performing I/O but there
+        * doesn't seem to be a real use-case behind such requests, just return
+        * KVM_EXIT_INTERNAL_ERROR for now.
+        */
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+       vcpu->run->internal.ndata = 0;
+
+       return 0;
+}
 
 /*
  * Recognizes a pending MTF VM-exit and records the nested state for later
@@ -5486,6 +5512,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
                u64 pcid;
                u64 gla;
        } operand;
+       int r;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
                kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5508,10 +5535,9 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
                                sizeof(operand), &gva))
                return 1;
 
-       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
-               kvm_inject_emulated_page_fault(vcpu, &e);
-               return 1;
-       }
+       r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
+       if (r != X86EMUL_CONTINUE)
+               return vmx_handle_memory_failure(vcpu, r, &e);
 
        if (operand.pcid >> 12 != 0) {
                kvm_inject_gp(vcpu, 0);
@@ -7282,10 +7308,6 @@ static __init void vmx_set_cpu_caps(void)
        if (vmx_pt_mode_is_host_guest())
                kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
 
-       /* PKU is not yet implemented for shadow paging. */
-       if (enable_ept && boot_cpu_has(X86_FEATURE_OSPKE))
-               kvm_cpu_cap_check_and_set(X86_FEATURE_PKU);
-
        if (vmx_umip_emulated())
                kvm_cpu_cap_set(X86_FEATURE_UMIP);
 
index 672c28f..8a83b5e 100644 (file)
@@ -355,6 +355,8 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
 void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
 int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
+int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+                             struct x86_exception *e);
 
 #define POSTED_INTR_ON  0
 #define POSTED_INTR_SN  1
index 9e41b51..00c88c2 100644 (file)
@@ -239,8 +239,7 @@ u64 __read_mostly host_xcr0;
 u64 __read_mostly supported_xcr0;
 EXPORT_SYMBOL_GPL(supported_xcr0);
 
-struct kmem_cache *x86_fpu_cache;
-EXPORT_SYMBOL_GPL(x86_fpu_cache);
+static struct kmem_cache *x86_fpu_cache;
 
 static struct kmem_cache *x86_emulator_cache;
 
@@ -5647,13 +5646,6 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
        /* kvm_write_guest_virt_system can pull in tons of pages. */
        vcpu->arch.l1tf_flush_l1d = true;
 
-       /*
-        * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
-        * is returned, but our callers are not ready for that and they blindly
-        * call kvm_inject_page_fault.  Ensure that they at least do not leak
-        * uninitialized kernel stack memory into cr2 and error code.
-        */
-       memset(exception, 0, sizeof(*exception));
        return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
                                           PFERR_WRITE_MASK, exception);
 }
@@ -7018,7 +7010,7 @@ restart:
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
                        kvm_rip_write(vcpu, ctxt->eip);
-                       if (r && ctxt->tf)
+                       if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
                                r = kvm_vcpu_do_singlestep(vcpu);
                        if (kvm_x86_ops.update_emulated_instruction)
                                kvm_x86_ops.update_emulated_instruction(vcpu);
@@ -8277,9 +8269,8 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
        kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
-int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end,
-               bool blockable)
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+                                           unsigned long start, unsigned long end)
 {
        unsigned long apic_address;
 
@@ -8290,8 +8281,6 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
        apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
        if (start <= apic_address && apic_address < end)
                kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
-
-       return 0;
 }
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@ -9962,13 +9951,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                if (!slot || !slot->npages)
                        return 0;
 
-               /*
-                * Stuff a non-canonical value to catch use-after-delete.  This
-                * ends up being 0 on 32-bit KVM, but there's no better
-                * alternative.
-                */
-               hva = (unsigned long)(0xdeadull << 48);
                old_npages = slot->npages;
+               hva = 0;
        }
 
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
@@ -10140,43 +10124,65 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 }
 
 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
-                                    struct kvm_memory_slot *new)
+                                    struct kvm_memory_slot *old,
+                                    struct kvm_memory_slot *new,
+                                    enum kvm_mr_change change)
 {
-       /* Still write protect RO slot */
-       if (new->flags & KVM_MEM_READONLY) {
-               kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
+       /*
+        * Nothing to do for RO slots or CREATE/MOVE/DELETE of a slot.
+        * See comments below.
+        */
+       if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
                return;
-       }
 
        /*
-        * Call kvm_x86_ops dirty logging hooks when they are valid.
-        *
-        * kvm_x86_ops.slot_disable_log_dirty is called when:
-        *
-        *  - KVM_MR_CREATE with dirty logging is disabled
-        *  - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
-        *
-        * The reason is, in case of PML, we need to set D-bit for any slots
-        * with dirty logging disabled in order to eliminate unnecessary GPA
-        * logging in PML buffer (and potential PML buffer full VMEXIT). This
-        * guarantees leaving PML enabled during guest's lifetime won't have
-        * any additional overhead from PML when guest is running with dirty
-        * logging disabled for memory slots.
+        * Dirty logging tracks sptes in 4k granularity, meaning that large
+        * sptes have to be split.  If live migration is successful, the guest
+        * in the source machine will be destroyed and large sptes will be
+        * created in the destination. However, if the guest continues to run
+        * in the source machine (for example if live migration fails), small
+        * sptes will remain around and cause bad performance.
         *
-        * kvm_x86_ops.slot_enable_log_dirty is called when switching new slot
-        * to dirty logging mode.
+        * Scan sptes if dirty logging has been stopped, dropping those
+        * which can be collapsed into a single large-page spte.  Later
+        * page faults will create the large-page sptes.
         *
-        * If kvm_x86_ops dirty logging hooks are invalid, use write protect.
+        * There is no need to do this in any of the following cases:
+        * CREATE:      No dirty mappings will already exist.
+        * MOVE/DELETE: The old mappings will already have been cleaned up by
+        *              kvm_arch_flush_shadow_memslot()
+        */
+       if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+           !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+
+       /*
+        * Enable or disable dirty logging for the slot.
         *
-        * In case of write protect:
+        * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old
+        * slot have been zapped so no dirty logging updates are needed for
+        * the old slot.
+        * For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible
+        * any mappings that might be created in it will consume the
+        * properties of the new slot and do not need to be updated here.
         *
-        * Write protect all pages for dirty logging.
+        * When PML is enabled, the kvm_x86_ops dirty logging hooks are
+        * called to enable/disable dirty logging.
         *
-        * All the sptes including the large sptes which point to this
-        * slot are set to readonly. We can not create any new large
-        * spte on this slot until the end of the logging.
+        * When disabling dirty logging with PML enabled, the D-bit is set
+        * for sptes in the slot in order to prevent unnecessary GPA
+        * logging in the PML buffer (and potential PML buffer full VMEXIT).
+        * This guarantees leaving PML enabled for the guest's lifetime
+        * won't have any additional overhead from PML when the guest is
+        * running with dirty logging disabled.
         *
+        * When enabling dirty logging, large sptes are write-protected
+        * so they can be split on first write.  New large sptes cannot
+        * be created for this slot until the end of the logging.
         * See the comments in fast_page_fault().
+        * For small sptes, nothing is done if the dirty log is in the
+        * initial-all-set state.  Otherwise, depending on whether pml
+        * is enabled the D-bit or the W-bit will be cleared.
         */
        if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                if (kvm_x86_ops.slot_enable_log_dirty) {
@@ -10213,39 +10219,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                kvm_mmu_calculate_default_mmu_pages(kvm));
 
        /*
-        * Dirty logging tracks sptes in 4k granularity, meaning that large
-        * sptes have to be split.  If live migration is successful, the guest
-        * in the source machine will be destroyed and large sptes will be
-        * created in the destination. However, if the guest continues to run
-        * in the source machine (for example if live migration fails), small
-        * sptes will remain around and cause bad performance.
-        *
-        * Scan sptes if dirty logging has been stopped, dropping those
-        * which can be collapsed into a single large-page spte.  Later
-        * page faults will create the large-page sptes.
-        *
-        * There is no need to do this in any of the following cases:
-        * CREATE:      No dirty mappings will already exist.
-        * MOVE/DELETE: The old mappings will already have been cleaned up by
-        *              kvm_arch_flush_shadow_memslot()
-        */
-       if (change == KVM_MR_FLAGS_ONLY &&
-               (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
-               !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
-               kvm_mmu_zap_collapsible_sptes(kvm, new);
-
-       /*
-        * Set up write protection and/or dirty logging for the new slot.
-        *
-        * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
-        * been zapped so no dirty logging staff is needed for old slot. For
-        * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
-        * new and it's also covered when dealing with the new slot.
-        *
         * FIXME: const-ify all uses of struct kvm_memory_slot.
         */
-       if (change != KVM_MR_DELETE)
-               kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
+       kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change);
 
        /* Free the arrays associated with the old memslot. */
        if (change == KVM_MR_MOVE)
@@ -10530,7 +10506,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
        return kvm_arch_interrupt_allowed(vcpu);
 }
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                                     struct kvm_async_pf *work)
 {
        struct x86_exception fault;
@@ -10547,6 +10523,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                fault.address = work->arch.token;
                fault.async_page_fault = true;
                kvm_inject_page_fault(vcpu, &fault);
+               return true;
        } else {
                /*
                 * It is not possible to deliver a paravirtualized asynchronous
@@ -10557,6 +10534,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
                 * fault is retried, hopefully the page will be ready in the host.
                 */
                kvm_make_request(KVM_REQ_APF_HALT, vcpu);
+               return false;
        }
 }
 
@@ -10574,7 +10552,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
        trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
 
-       if (kvm_pv_async_pf_enabled(vcpu) &&
+       if ((work->wakeup_all || work->notpresent_injected) &&
+           kvm_pv_async_pf_enabled(vcpu) &&
            !apf_put_user_ready(vcpu, work->arch.token)) {
                vcpu->arch.apf.pageready_pending = true;
                kvm_apic_set_irq(vcpu, &irq, NULL);
index 5246db4..6110bce 100644 (file)
@@ -6,10 +6,19 @@
 # Produces uninteresting flaky coverage.
 KCOV_INSTRUMENT_delay.o        := n
 
+# KCSAN uses udelay for introducing watchpoint delay; avoid recursion.
+KCSAN_SANITIZE_delay.o := n
+ifdef CONFIG_KCSAN
+# In case KCSAN+lockdep+ftrace are enabled, disable ftrace for delay.o to avoid
+# lockdep -> [other libs] -> KCSAN -> udelay -> ftrace -> lockdep recursion.
+CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
+endif
+
 # Early boot use of cmdline; don't instrument it
 ifdef CONFIG_AMD_MEM_ENCRYPT
 KCOV_INSTRUMENT_cmdline.o := n
 KASAN_SANITIZE_cmdline.o  := n
+KCSAN_SANITIZE_cmdline.o  := n
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_cmdline.o = -pg
index 98f7c6f..f7fd0e8 100644 (file)
@@ -7,6 +7,10 @@ KCOV_INSTRUMENT_mem_encrypt_identity.o := n
 KASAN_SANITIZE_mem_encrypt.o           := n
 KASAN_SANITIZE_mem_encrypt_identity.o  := n
 
+# Disable KCSAN entirely, because otherwise we get warnings that some functions
+# reference __initdata sections.
+KCSAN_SANITIZE := n
+
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_mem_encrypt.o            = -pg
 CFLAGS_REMOVE_mem_encrypt_identity.o   = -pg
diff --git a/arch/x86/purgatory/.gitignore b/arch/x86/purgatory/.gitignore
new file mode 100644 (file)
index 0000000..d2be150
--- /dev/null
@@ -0,0 +1 @@
+purgatory.chk
index fb4ee54..b04e6e7 100644 (file)
@@ -14,10 +14,18 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 
 CFLAGS_sha256.o := -D__DISABLE_EXPORTS
 
-LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
-targets += purgatory.ro
-
+# When linking purgatory.ro with -r unresolved symbols are not checked,
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib
+LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS)
+LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS)
+targets += purgatory.ro purgatory.chk
+
+# Sanitizer, etc. runtimes are unavailable and cannot be linked here.
+GCOV_PROFILE   := n
 KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCSAN_SANITIZE := n
 KCOV_INSTRUMENT := n
 
 # These are adjustments to the compiler flags used for objects that
@@ -25,7 +33,7 @@ KCOV_INSTRUMENT := n
 
 PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
 PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
-PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN)
+PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
 
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
@@ -58,12 +66,15 @@ CFLAGS_string.o                     += $(PURGATORY_CFLAGS)
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
                $(call if_changed,ld)
 
+$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE
+               $(call if_changed,ld)
+
 targets += kexec-purgatory.c
 
 quiet_cmd_bin2c = BIN2C   $@
       cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
 
-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE
        $(call if_changed,bin2c)
 
 obj-$(CONFIG_KEXEC_FILE)       += kexec-purgatory.o
index 682c895..6b1f3a4 100644 (file)
@@ -6,7 +6,10 @@
 # for more details.
 #
 #
+
+# Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE                 := n
+KCSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
 
 subdir- := rm
index b11ec5d..83f1b6a 100644 (file)
@@ -6,7 +6,10 @@
 # for more details.
 #
 #
+
+# Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE                 := n
+KCSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
index 3579ac0..23632a3 100644 (file)
@@ -281,7 +281,6 @@ bool bio_integrity_prep(struct bio *bio)
 
                if (ret == 0) {
                        printk(KERN_ERR "could not attach integrity payload\n");
-                       kfree(buf);
                        status = BLK_STS_RESOURCE;
                        goto err_end_io;
                }
index 5235da6..a7366c0 100644 (file)
@@ -1434,8 +1434,7 @@ again:
        }
 
        if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-               trace_block_bio_complete(bio->bi_disk->queue, bio,
-                                        blk_status_to_errno(bio->bi_status));
+               trace_block_bio_complete(bio->bi_disk->queue, bio);
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);
        }
 
index 96a39d0..44f3d09 100644 (file)
@@ -191,6 +191,33 @@ found_tag:
        return tag + tag_offset;
 }
 
+bool __blk_mq_get_driver_tag(struct request *rq)
+{
+       struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+       unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
+       bool shared = blk_mq_tag_busy(rq->mq_hctx);
+       int tag;
+
+       if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
+               bt = &rq->mq_hctx->tags->breserved_tags;
+               tag_offset = 0;
+       }
+
+       if (!hctx_may_queue(rq->mq_hctx, bt))
+               return false;
+       tag = __sbitmap_queue_get(bt);
+       if (tag == BLK_MQ_NO_TAG)
+               return false;
+
+       rq->tag = tag + tag_offset;
+       if (shared) {
+               rq->rq_flags |= RQF_MQ_INFLIGHT;
+               atomic_inc(&rq->mq_hctx->nr_active);
+       }
+       rq->mq_hctx->tags->rqs[rq->tag] = rq;
+       return true;
+}
+
 void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
                    unsigned int tag)
 {
@@ -269,6 +296,7 @@ struct bt_tags_iter_data {
 
 #define BT_TAG_ITER_RESERVED           (1 << 0)
 #define BT_TAG_ITER_STARTED            (1 << 1)
+#define BT_TAG_ITER_STATIC_RQS         (1 << 2)
 
 static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 {
@@ -282,9 +310,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 
        /*
         * We can hit rq == NULL here, because the tagging functions
-        * test and set the bit before assining ->rqs[].
+        * test and set the bit before assigning ->rqs[].
         */
-       rq = tags->rqs[bitnr];
+       if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
+               rq = tags->static_rqs[bitnr];
+       else
+               rq = tags->rqs[bitnr];
        if (!rq)
                return true;
        if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
@@ -339,11 +370,13 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
  *             indicates whether or not @rq is a reserved request. Return
  *             true to continue iterating tags, false to stop.
  * @priv:      Will be passed as second argument to @fn.
+ *
+ * Caller has to pass the tag map from which requests are allocated.
  */
 void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
                void *priv)
 {
-       return __blk_mq_all_tag_iter(tags, fn, priv, 0);
+       return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
 }
 
 /**
index d38e48f..2e4ef51 100644 (file)
@@ -51,6 +51,14 @@ enum {
        BLK_MQ_TAG_MAX          = BLK_MQ_NO_TAG - 1,
 };
 
+bool __blk_mq_get_driver_tag(struct request *rq);
+static inline bool blk_mq_get_driver_tag(struct request *rq)
+{
+       if (rq->tag != BLK_MQ_NO_TAG)
+               return true;
+       return __blk_mq_get_driver_tag(rq);
+}
+
 extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
 extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
 
index 9a36ac1..4f57d27 100644 (file)
@@ -1052,35 +1052,6 @@ static inline unsigned int queued_to_index(unsigned int queued)
        return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
-bool blk_mq_get_driver_tag(struct request *rq)
-{
-       struct blk_mq_alloc_data data = {
-               .q = rq->q,
-               .hctx = rq->mq_hctx,
-               .flags = BLK_MQ_REQ_NOWAIT,
-               .cmd_flags = rq->cmd_flags,
-       };
-       bool shared;
-
-       if (rq->tag != BLK_MQ_NO_TAG)
-               return true;
-
-       if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
-               data.flags |= BLK_MQ_REQ_RESERVED;
-
-       shared = blk_mq_tag_busy(data.hctx);
-       rq->tag = blk_mq_get_tag(&data);
-       if (rq->tag >= 0) {
-               if (shared) {
-                       rq->rq_flags |= RQF_MQ_INFLIGHT;
-                       atomic_inc(&data.hctx->nr_active);
-               }
-               data.hctx->tags->rqs[rq->tag] = rq;
-       }
-
-       return rq->tag != BLK_MQ_NO_TAG;
-}
-
 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
                                int flags, void *key)
 {
index a139b06..b3ce0f3 100644 (file)
@@ -44,7 +44,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
                                bool kick_requeue_list);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
-bool blk_mq_get_driver_tag(struct request *rq);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
                                        struct blk_mq_ctx *start);
 
index aa16e52..b5d1f0f 100644 (file)
@@ -420,9 +420,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part)
 static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       preempt_disable();
        write_seqcount_begin(&part->nr_sects_seq);
        part->nr_sects = size;
        write_seqcount_end(&part->nr_sects_seq);
+       preempt_enable();
 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        preempt_disable();
        part->nr_sects = size;
index 2e96d8b..c33bbbf 100644 (file)
@@ -1390,7 +1390,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
                goto out_unfreeze;
 
        /* Mask out flags that can't be set using LOOP_SET_STATUS. */
-       lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+       lo->lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
        /* For those flags, use the previous values instead */
        lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
        /* For flags that can't be cleared, use previous values too */
index 0b944ac..27a33ad 100644 (file)
@@ -1613,7 +1613,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
        disc_information di;
        track_information ti;
        __u32 last_track;
-       int ret = -1;
+       int ret;
 
        ret = pkt_get_disc_info(pd, &di);
        if (ret)
index d84e8a8..1e2aa5a 100644 (file)
@@ -784,7 +784,7 @@ static const struct block_device_operations mm_fops = {
 
 static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
-       int ret = -ENODEV;
+       int ret;
        struct cardinfo *card = &cards[num_cards];
        unsigned char   mem_present;
        unsigned char   batt_status;
index c4f15c4..9de1dab 100644 (file)
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/sched_clock.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
 #include <asm/smp.h>
 #include <asm/sbi.h>
 
@@ -39,6 +42,7 @@ static int riscv_clock_next_event(unsigned long delta,
        return 0;
 }
 
+static unsigned int riscv_clock_event_irq;
 static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
        .name                   = "riscv_timer_clockevent",
        .features               = CLOCK_EVT_FEAT_ONESHOT,
@@ -74,30 +78,36 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
        struct clock_event_device *ce = per_cpu_ptr(&riscv_clock_event, cpu);
 
        ce->cpumask = cpumask_of(cpu);
+       ce->irq = riscv_clock_event_irq;
        clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
 
-       csr_set(CSR_IE, IE_TIE);
+       enable_percpu_irq(riscv_clock_event_irq,
+                         irq_get_trigger_type(riscv_clock_event_irq));
        return 0;
 }
 
 static int riscv_timer_dying_cpu(unsigned int cpu)
 {
-       csr_clear(CSR_IE, IE_TIE);
+       disable_percpu_irq(riscv_clock_event_irq);
        return 0;
 }
 
 /* called directly from the low-level interrupt handler */
-void riscv_timer_interrupt(void)
+static irqreturn_t riscv_timer_interrupt(int irq, void *dev_id)
 {
        struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
 
        csr_clear(CSR_IE, IE_TIE);
        evdev->event_handler(evdev);
+
+       return IRQ_HANDLED;
 }
 
 static int __init riscv_timer_init_dt(struct device_node *n)
 {
        int cpuid, hartid, error;
+       struct device_node *child;
+       struct irq_domain *domain;
 
        hartid = riscv_of_processor_hartid(n);
        if (hartid < 0) {
@@ -115,6 +125,25 @@ static int __init riscv_timer_init_dt(struct device_node *n)
        if (cpuid != smp_processor_id())
                return 0;
 
+       domain = NULL;
+       child = of_get_compatible_child(n, "riscv,cpu-intc");
+       if (!child) {
+               pr_err("Failed to find INTC node [%pOF]\n", n);
+               return -ENODEV;
+       }
+       domain = irq_find_host(child);
+       of_node_put(child);
+       if (!domain) {
+               pr_err("Failed to find IRQ domain for node [%pOF]\n", n);
+               return -ENODEV;
+       }
+
+       riscv_clock_event_irq = irq_create_mapping(domain, RV_IRQ_TIMER);
+       if (!riscv_clock_event_irq) {
+               pr_err("Failed to map timer interrupt for node [%pOF]\n", n);
+               return -ENODEV;
+       }
+
        pr_info("%s: Registering clocksource cpuid [%d] hartid [%d]\n",
               __func__, cpuid, hartid);
        error = clocksource_register_hz(&riscv_clocksource, riscv_timebase);
@@ -126,6 +155,14 @@ static int __init riscv_timer_init_dt(struct device_node *n)
 
        sched_clock_register(riscv_sched_clock, 64, riscv_timebase);
 
+       error = request_percpu_irq(riscv_clock_event_irq,
+                                   riscv_timer_interrupt,
+                                   "riscv-timer", &riscv_clock_event);
+       if (error) {
+               pr_err("registering percpu irq failed [%d]\n", error);
+               return error;
+       }
+
        error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING,
                         "clockevents/riscv/timer:starting",
                         riscv_timer_starting_cpu, riscv_timer_dying_cpu);
index cce4a74..75daaf2 100644 (file)
@@ -37,7 +37,9 @@ KBUILD_CFLAGS                 := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
 KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
 
 GCOV_PROFILE                   := n
+# Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE                 := n
+KCSAN_SANITIZE                 := n
 UBSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
 
index 53b4126..ffe149a 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
@@ -195,10 +196,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
                        pagefault_disable();                            \
                        if ((mmptr) == current->mm) {                   \
                                valid = !get_user((dst), (wptr));       \
-                       } else if (current->mm == NULL) {               \
-                               use_mm(mmptr);                          \
+                       } else if (current->flags & PF_KTHREAD) {       \
+                               kthread_use_mm(mmptr);                  \
                                valid = !get_user((dst), (wptr));       \
-                               unuse_mm(mmptr);                        \
+                               kthread_unuse_mm(mmptr);                \
                        }                                               \
                        pagefault_enable();                             \
                }                                                       \
index 6529cac..35d4a5a 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
-#include <linux/mmu_context.h>
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
index 691c897..bf927f4 100644 (file)
@@ -19,7 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include <linux/mmu_context.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "gc/gc_10_1_0_offset.h"
index c694473..744366c 100644 (file)
@@ -20,8 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/mmu_context.h>
-
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
index 2f4bdc8..feab4cc 100644 (file)
@@ -20,8 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/mmu_context.h>
-
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "gfx_v8_0.h"
index df841c2..c7fd0c4 100644 (file)
@@ -19,8 +19,6 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include <linux/mmu_context.h>
-
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "gc/gc_9_0_offset.h"
index eee5304..ad8a9df 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/mm.h>
-#include <linux/mmu_context.h>
+#include <linux/kthread.h>
 #include <linux/sched/mm.h>
 #include <linux/types.h>
 #include <linux/list.h>
index 66b9a68..29fead2 100644 (file)
@@ -493,6 +493,19 @@ config TI_SCI_INTA_IRQCHIP
          If you wish to use interrupt aggregator irq resources managed by the
          TI System Controller, say Y here. Otherwise, say N.
 
+config RISCV_INTC
+       bool "RISC-V Local Interrupt Controller"
+       depends on RISCV
+       default y
+       help
+          This enables support for the per-HART local interrupt controller
+          found in standard RISC-V systems.  The per-HART local interrupt
+          controller handles timer interrupts, software interrupts, and
+          hardware interrupts. Without a per-HART local interrupt controller,
+          a RISC-V system will be unable to handle any interrupts.
+
+          If you don't know what to do here, say Y.
+
 config SIFIVE_PLIC
        bool "SiFive Platform-Level Interrupt Controller"
        depends on RISCV
index 3a4ce28..133f9c4 100644 (file)
@@ -98,6 +98,7 @@ obj-$(CONFIG_NDS32)                   += irq-ativic32.o
 obj-$(CONFIG_QCOM_PDC)                 += qcom-pdc.o
 obj-$(CONFIG_CSKY_MPINTC)              += irq-csky-mpintc.o
 obj-$(CONFIG_CSKY_APB_INTC)            += irq-csky-apb-intc.o
+obj-$(CONFIG_RISCV_INTC)               += irq-riscv-intc.o
 obj-$(CONFIG_SIFIVE_PLIC)              += irq-sifive-plic.o
 obj-$(CONFIG_IMX_IRQSTEER)             += irq-imx-irqsteer.o
 obj-$(CONFIG_IMX_INTMUX)               += irq-imx-intmux.o
diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
new file mode 100644 (file)
index 0000000..a6f97fa
--- /dev/null
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017-2018 SiFive
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+#define pr_fmt(fmt) "riscv-intc: " fmt
+#include <linux/atomic.h>
+#include <linux/bits.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+static struct irq_domain *intc_domain;
+
+static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
+{
+       unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
+
+       if (unlikely(cause >= BITS_PER_LONG))
+               panic("unexpected interrupt cause");
+
+       switch (cause) {
+#ifdef CONFIG_SMP
+       case RV_IRQ_SOFT:
+               /*
+                * We only use software interrupts to pass IPIs, so if a
+                * non-SMP system gets one, then we don't know what to do.
+                */
+               handle_IPI(regs);
+               break;
+#endif
+       default:
+               handle_domain_irq(intc_domain, cause, regs);
+               break;
+       }
+}
+
+/*
+ * On RISC-V systems local interrupts are masked or unmasked by writing
+ * the SIE (Supervisor Interrupt Enable) CSR.  As CSRs can only be written
+ * on the local hart, these functions can only be called on the hart that
+ * corresponds to the IRQ chip.
+ */
+
+static void riscv_intc_irq_mask(struct irq_data *d)
+{
+       csr_clear(CSR_IE, BIT(d->hwirq));
+}
+
+static void riscv_intc_irq_unmask(struct irq_data *d)
+{
+       csr_set(CSR_IE, BIT(d->hwirq));
+}
+
+static int riscv_intc_cpu_starting(unsigned int cpu)
+{
+       csr_set(CSR_IE, BIT(RV_IRQ_SOFT));
+       return 0;
+}
+
+static int riscv_intc_cpu_dying(unsigned int cpu)
+{
+       csr_clear(CSR_IE, BIT(RV_IRQ_SOFT));
+       return 0;
+}
+
+static struct irq_chip riscv_intc_chip = {
+       .name = "RISC-V INTC",
+       .irq_mask = riscv_intc_irq_mask,
+       .irq_unmask = riscv_intc_irq_unmask,
+};
+
+static int riscv_intc_domain_map(struct irq_domain *d, unsigned int irq,
+                                irq_hw_number_t hwirq)
+{
+       irq_set_percpu_devid(irq);
+       irq_domain_set_info(d, irq, hwirq, &riscv_intc_chip, d->host_data,
+                           handle_percpu_devid_irq, NULL, NULL);
+
+       return 0;
+}
+
+static const struct irq_domain_ops riscv_intc_domain_ops = {
+       .map    = riscv_intc_domain_map,
+       .xlate  = irq_domain_xlate_onecell,
+};
+
+static int __init riscv_intc_init(struct device_node *node,
+                                 struct device_node *parent)
+{
+       int rc, hartid;
+
+       hartid = riscv_of_parent_hartid(node);
+       if (hartid < 0) {
+               pr_warn("unable to fine hart id for %pOF\n", node);
+               return 0;
+       }
+
+       /*
+        * The DT will have one INTC DT node under each CPU (or HART)
+        * DT node so riscv_intc_init() function will be called once
+        * for each INTC DT node. We only need to do INTC initialization
+        * for the INTC DT node belonging to boot CPU (or boot HART).
+        */
+       if (riscv_hartid_to_cpuid(hartid) != smp_processor_id())
+               return 0;
+
+       intc_domain = irq_domain_add_linear(node, BITS_PER_LONG,
+                                           &riscv_intc_domain_ops, NULL);
+       if (!intc_domain) {
+               pr_err("unable to add IRQ domain\n");
+               return -ENXIO;
+       }
+
+       rc = set_handle_irq(&riscv_intc_irq);
+       if (rc) {
+               pr_err("failed to set irq handler\n");
+               return rc;
+       }
+
+       cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_STARTING,
+                         "irqchip/riscv/intc:starting",
+                         riscv_intc_cpu_starting,
+                         riscv_intc_cpu_dying);
+
+       pr_info("%d local interrupts mapped\n", BITS_PER_LONG);
+
+       return 0;
+}
+
+IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init);
index d9c53f8..eaa3e9f 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -76,6 +77,7 @@ struct plic_handler {
        void __iomem            *enable_base;
        struct plic_priv        *priv;
 };
+static int plic_parent_irq;
 static bool plic_cpuhp_setup_done;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
@@ -219,15 +221,17 @@ static const struct irq_domain_ops plic_irqdomain_ops = {
  * that source ID back to the same claim register.  This automatically enables
  * and disables the interrupt, so there's nothing else to do.
  */
-static void plic_handle_irq(struct pt_regs *regs)
+static void plic_handle_irq(struct irq_desc *desc)
 {
        struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+       struct irq_chip *chip = irq_desc_get_chip(desc);
        void __iomem *claim = handler->hart_base + CONTEXT_CLAIM;
        irq_hw_number_t hwirq;
 
        WARN_ON_ONCE(!handler->present);
 
-       csr_clear(CSR_IE, IE_EIE);
+       chained_irq_enter(chip, desc);
+
        while ((hwirq = readl(claim))) {
                int irq = irq_find_mapping(handler->priv->irqdomain, hwirq);
 
@@ -237,21 +241,8 @@ static void plic_handle_irq(struct pt_regs *regs)
                else
                        generic_handle_irq(irq);
        }
-       csr_set(CSR_IE, IE_EIE);
-}
-
-/*
- * Walk up the DT tree until we find an active RISC-V core (HART) node and
- * extract the cpuid from it.
- */
-static int plic_find_hart_id(struct device_node *node)
-{
-       for (; node; node = node->parent) {
-               if (of_device_is_compatible(node, "riscv"))
-                       return riscv_of_processor_hartid(node);
-       }
 
-       return -1;
+       chained_irq_exit(chip, desc);
 }
 
 static void plic_set_threshold(struct plic_handler *handler, u32 threshold)
@@ -262,10 +253,8 @@ static void plic_set_threshold(struct plic_handler *handler, u32 threshold)
 
 static int plic_dying_cpu(unsigned int cpu)
 {
-       struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
-
-       csr_clear(CSR_IE, IE_EIE);
-       plic_set_threshold(handler, PLIC_DISABLE_THRESHOLD);
+       if (plic_parent_irq)
+               disable_percpu_irq(plic_parent_irq);
 
        return 0;
 }
@@ -274,7 +263,11 @@ static int plic_starting_cpu(unsigned int cpu)
 {
        struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
 
-       csr_set(CSR_IE, IE_EIE);
+       if (plic_parent_irq)
+               enable_percpu_irq(plic_parent_irq,
+                                 irq_get_trigger_type(plic_parent_irq));
+       else
+               pr_warn("cpu%d: parent irq not available\n", cpu);
        plic_set_threshold(handler, PLIC_ENABLE_THRESHOLD);
 
        return 0;
@@ -330,7 +323,7 @@ static int __init plic_init(struct device_node *node,
                if (parent.args[0] != RV_IRQ_EXT)
                        continue;
 
-               hartid = plic_find_hart_id(parent.np);
+               hartid = riscv_of_parent_hartid(parent.np);
                if (hartid < 0) {
                        pr_warn("failed to parse hart ID for context %d.\n", i);
                        continue;
@@ -342,6 +335,14 @@ static int __init plic_init(struct device_node *node,
                        continue;
                }
 
+               /* Find parent domain and register chained handler */
+               if (!plic_parent_irq && irq_find_host(parent.np)) {
+                       plic_parent_irq = irq_of_parse_and_map(node, i);
+                       if (plic_parent_irq)
+                               irq_set_chained_handler(plic_parent_irq,
+                                                       plic_handle_irq);
+               }
+
                /*
                 * When running in M-mode we need to ignore the S-mode handler.
                 * Here we assume it always comes later, but that might be a
@@ -382,7 +383,6 @@ done:
 
        pr_info("%pOFP: mapped %d interrupts with %d handlers for"
                " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
-       set_handle_irq(plic_handle_irq);
        return 0;
 
 out_iounmap:
index 0585efa..c2c5bc4 100644 (file)
@@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        ns->disk = disk;
 
        if (__nvme_revalidate_disk(disk, id))
-               goto out_free_disk;
+               goto out_put_disk;
 
        if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
                ret = nvme_nvm_register(ns, disk_name, node);
@@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        /* prevent double queue cleanup */
        ns->disk->queue = NULL;
        put_disk(ns->disk);
- out_free_disk:
-       del_gendisk(ns->disk);
  out_unlink_ns:
        mutex_lock(&ctrl->subsys->lock);
        list_del_rcu(&ns->siblings);
index cb00075..e999a8c 100644 (file)
@@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
                __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
 
-               if (!(op->flags & FCOP_FLAGS_AEN))
+               if (!(op->flags & FCOP_FLAGS_AEN)) {
                        nvme_fc_unmap_data(ctrl, op->rq, op);
+                       nvme_cleanup_cmd(op->rq);
+               }
 
-               nvme_cleanup_cmd(op->rq);
                nvme_fc_ctrl_put(ctrl);
 
                if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
index fa5c755..c0f4226 100644 (file)
@@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req,
        struct nvme_ns *ns = req->q->queuedata;
 
        if (req->cmd_flags & REQ_NVME_MPATH)
-               trace_block_bio_complete(ns->head->disk->queue,
-                                        req->bio, status);
+               trace_block_bio_complete(ns->head->disk->queue, req->bio);
 }
 
 extern struct device_attribute dev_attr_ana_grpid;
index d690d55..e2bacd3 100644 (file)
@@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev)
         * the PCI bus layer to put it into D3 in order to take the PCIe link
         * down, so as to allow the platform to achieve its minimum low-power
         * state (which may not be possible if the link is up).
+        *
+        * If a host memory buffer is enabled, shut down the device as the NVMe
+        * specification allows the device to access the host memory buffer in
+        * host DRAM from all power states, but hosts will fail access to DRAM
+        * during S3.
         */
        if (pm_suspend_via_firmware() || !ctrl->npss ||
            !pcie_aspm_enabled(pdev) ||
+           ndev->nr_host_mem_descs ||
            (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
                return nvme_disable_prepare_reset(ndev, true);
 
index 1843110..3345ec7 100644 (file)
@@ -131,8 +131,8 @@ struct nvme_tcp_ctrl {
 static LIST_HEAD(nvme_tcp_ctrl_list);
 static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
 static struct workqueue_struct *nvme_tcp_wq;
-static struct blk_mq_ops nvme_tcp_mq_ops;
-static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static const struct blk_mq_ops nvme_tcp_mq_ops;
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
@@ -2301,7 +2301,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
        return queue->nr_cqe;
 }
 
-static struct blk_mq_ops nvme_tcp_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_mq_ops = {
        .queue_rq       = nvme_tcp_queue_rq,
        .complete       = nvme_complete_rq,
        .init_request   = nvme_tcp_init_request,
@@ -2312,7 +2312,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = {
        .poll           = nvme_tcp_poll,
 };
 
-static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
        .queue_rq       = nvme_tcp_queue_rq,
        .complete       = nvme_complete_rq,
        .init_request   = nvme_tcp_init_request,
index 6392bcd..6e2f623 100644 (file)
@@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
        return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
 }
 
-static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
+static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
+{
+       u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
+       struct nvmet_req *req;
+
+       mutex_lock(&ctrl->lock);
+       while (ctrl->nr_async_event_cmds) {
+               req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+               mutex_unlock(&ctrl->lock);
+               nvmet_req_complete(req, status);
+               mutex_lock(&ctrl->lock);
+       }
+       mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
 {
        struct nvmet_async_event *aen;
        struct nvmet_req *req;
@@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
                aen = list_first_entry(&ctrl->async_events,
                                       struct nvmet_async_event, entry);
                req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
-               if (status == 0)
-                       nvmet_set_result(req, nvmet_async_event_result(aen));
+               nvmet_set_result(req, nvmet_async_event_result(aen));
 
                list_del(&aen->entry);
                kfree(aen);
 
                mutex_unlock(&ctrl->lock);
                trace_nvmet_async_event(ctrl, req->cqe->result.u32);
-               nvmet_req_complete(req, status);
+               nvmet_req_complete(req, 0);
                mutex_lock(&ctrl->lock);
        }
        mutex_unlock(&ctrl->lock);
@@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work)
        struct nvmet_ctrl *ctrl =
                container_of(work, struct nvmet_ctrl, async_event_work);
 
-       nvmet_async_events_process(ctrl, 0);
+       nvmet_async_events_process(ctrl);
 }
 
 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
@@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
 
 void nvmet_sq_destroy(struct nvmet_sq *sq)
 {
-       u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
        struct nvmet_ctrl *ctrl = sq->ctrl;
 
        /*
@@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
         * queue doesn't have outstanding requests on it.
         */
        if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
-               nvmet_async_events_process(ctrl, status);
+               nvmet_async_events_failall(ctrl);
        percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
        wait_for_completion(&sq->confirm_done);
        wait_for_completion(&sq->free_done);
index 1669177..de9217c 100644 (file)
@@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list);
 static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
 
 static struct workqueue_struct *nvmet_tcp_wq;
-static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static const struct nvmet_fabrics_ops nvmet_tcp_ops;
 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
 static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
 
@@ -1713,7 +1713,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
        }
 }
 
-static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
        .owner                  = THIS_MODULE,
        .type                   = NVMF_TRTYPE_TCP,
        .msdbd                  = 1,
index d1b16cf..fab267e 100644 (file)
@@ -77,9 +77,6 @@ static inline void pcifront_init_sd(struct pcifront_sd *sd,
 static DEFINE_SPINLOCK(pcifront_dev_lock);
 static struct pcifront_device *pcifront_dev;
 
-static int verbose_request;
-module_param(verbose_request, int, 0644);
-
 static int errno_to_pcibios_err(int errno)
 {
        switch (errno) {
@@ -190,18 +187,16 @@ static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
        struct pcifront_sd *sd = bus->sysdata;
        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 
-       if (verbose_request)
-               dev_info(&pdev->xdev->dev,
-                        "read dev=%04x:%02x:%02x.%d - offset %x size %d\n",
-                        pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
-                        PCI_FUNC(devfn), where, size);
+       dev_dbg(&pdev->xdev->dev,
+               "read dev=%04x:%02x:%02x.%d - offset %x size %d\n",
+               pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+               PCI_FUNC(devfn), where, size);
 
        err = do_pci_op(pdev, &op);
 
        if (likely(!err)) {
-               if (verbose_request)
-                       dev_info(&pdev->xdev->dev, "read got back value %x\n",
-                                op.value);
+               dev_dbg(&pdev->xdev->dev, "read got back value %x\n",
+                       op.value);
 
                *val = op.value;
        } else if (err == -ENODEV) {
@@ -229,12 +224,10 @@ static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
        struct pcifront_sd *sd = bus->sysdata;
        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 
-       if (verbose_request)
-               dev_info(&pdev->xdev->dev,
-                        "write dev=%04x:%02x:%02x.%d - "
-                        "offset %x size %d val %x\n",
-                        pci_domain_nr(bus), bus->number,
-                        PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+       dev_dbg(&pdev->xdev->dev,
+               "write dev=%04x:%02x:%02x.%d - offset %x size %d val %x\n",
+               pci_domain_nr(bus), bus->number,
+               PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
 
        return errno_to_pcibios_err(do_pci_op(pdev, &op));
 }
index 8dd1278..7719ae4 100644 (file)
@@ -35,7 +35,7 @@
 /* L3C has 8-counters */
 #define L3C_NR_COUNTERS                0x8
 
-#define L3C_PERF_CTRL_EN       0x20000
+#define L3C_PERF_CTRL_EN       0x10000
 #define L3C_EVTYPE_NONE                0xff
 
 /*
index 494f853..490d353 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/usb/functionfs.h>
 
 #include <linux/aio.h>
-#include <linux/mmu_context.h>
+#include <linux/kthread.h>
 #include <linux/poll.h>
 #include <linux/eventfd.h>
 
@@ -824,13 +824,9 @@ static void ffs_user_copy_worker(struct work_struct *work)
        bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD;
 
        if (io_data->read && ret > 0) {
-               mm_segment_t oldfs = get_fs();
-
-               set_fs(USER_DS);
-               use_mm(io_data->mm);
+               kthread_use_mm(io_data->mm);
                ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data);
-               unuse_mm(io_data->mm);
-               set_fs(oldfs);
+               kthread_unuse_mm(io_data->mm);
        }
 
        io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
index 3afddd3..9ee0bfe 100644 (file)
@@ -21,7 +21,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
-#include <linux/mmu_context.h>
+#include <linux/kthread.h>
 #include <linux/aio.h>
 #include <linux/uio.h>
 #include <linux/refcount.h>
@@ -462,9 +462,9 @@ static void ep_user_copy_worker(struct work_struct *work)
        struct kiocb *iocb = priv->iocb;
        size_t ret;
 
-       use_mm(mm);
+       kthread_use_mm(mm);
        ret = copy_to_iter(priv->buf, priv->actual, &priv->to);
-       unuse_mm(mm);
+       kthread_unuse_mm(mm);
        if (!ret)
                ret = -EFAULT;
 
index 186acd8..5e556ac 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/mmu_context.h>
+#include <linux/kthread.h>
 #include <linux/rbtree.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
@@ -2817,7 +2817,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
                return -EPERM;
 
        if (kthread)
-               use_mm(mm);
+               kthread_use_mm(mm);
        else if (current->mm != mm)
                goto out;
 
@@ -2844,7 +2844,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
                *copied = copy_from_user(data, (void __user *)vaddr,
                                           count) ? 0 : count;
        if (kthread)
-               unuse_mm(mm);
+               kthread_unuse_mm(mm);
 out:
        mmput(mm);
        return *copied ? 0 : -EFAULT;
index 062595e..d7b8df3 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/vhost.h>
 #include <linux/uio.h>
 #include <linux/mm.h>
-#include <linux/mmu_context.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
@@ -335,10 +334,8 @@ static int vhost_worker(void *data)
        struct vhost_dev *dev = data;
        struct vhost_work *work, *work_next;
        struct llist_node *node;
-       mm_segment_t oldfs = get_fs();
 
-       set_fs(USER_DS);
-       use_mm(dev->mm);
+       kthread_use_mm(dev->mm);
 
        for (;;) {
                /* mb paired w/ kthread_stop */
@@ -366,8 +363,7 @@ static int vhost_worker(void *data)
                                schedule();
                }
        }
-       unuse_mm(dev->mm);
-       set_fs(oldfs);
+       kthread_unuse_mm(dev->mm);
        return 0;
 }
 
index 61212fc..727f11e 100644 (file)
@@ -13,12 +13,16 @@ config XEN_BALLOON
 config XEN_BALLOON_MEMORY_HOTPLUG
        bool "Memory hotplug support for Xen balloon driver"
        depends on XEN_BALLOON && MEMORY_HOTPLUG
+       default y
        help
          Memory hotplug support for Xen balloon driver allows expanding memory
          available for the system above limit declared at system startup.
          It is very useful on critical systems which require long
          run without rebooting.
 
+         It's also very useful for non PV domains to obtain unpopulated physical
+         memory ranges to use in order to map foreign memory or grants.
+
          Memory could be hotplugged in following steps:
 
            1) target domain: ensure that memory auto online policy is in
index ec975de..b96b11e 100644 (file)
@@ -93,10 +93,8 @@ static int setup_cpu_watcher(struct notifier_block *notifier,
        (void)register_xenbus_watch(&cpu_watch);
 
        for_each_possible_cpu(cpu) {
-               if (vcpu_online(cpu) == 0) {
-                       device_offline(get_cpu_device(cpu));
-                       set_cpu_present(cpu, false);
-               }
+               if (vcpu_online(cpu) == 0)
+                       disable_hotplug_cpu(cpu);
        }
 
        return NOTIFY_DONE;
@@ -119,5 +117,5 @@ static int __init setup_vcpu_hotplug_event(void)
        return 0;
 }
 
-arch_initcall(setup_vcpu_hotplug_event);
+late_initcall(setup_vcpu_hotplug_event);
 
index 59e85e4..dd911e1 100644 (file)
@@ -168,7 +168,7 @@ static const struct pci_device_id platform_pci_tbl[] = {
        {0,}
 };
 
-static struct dev_pm_ops platform_pm_ops = {
+static const struct dev_pm_ops platform_pm_ops = {
        .resume_noirq =   platform_pci_resume,
 };
 
index cf4ce3e..9eae1fc 100644 (file)
@@ -24,7 +24,7 @@
 #define PVCALLS_VERSIONS "1"
 #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
 
-struct pvcalls_back_global {
+static struct pvcalls_back_global {
        struct list_head frontends;
        struct semaphore frontends_lock;
 } pvcalls_back_global;
@@ -1088,7 +1088,8 @@ static void set_backend_state(struct xenbus_device *dev,
                case XenbusStateInitialised:
                        switch (state) {
                        case XenbusStateConnected:
-                               backend_connect(dev);
+                               if (backend_connect(dev))
+                                       return;
                                xenbus_switch_state(dev, XenbusStateConnected);
                                break;
                        case XenbusStateClosing:
index da51a5d..059de92 100644 (file)
@@ -10,6 +10,8 @@
  * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
  */
 
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
@@ -154,9 +156,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
         * (as if device didn't respond) */
        u32 value = 0, tmp_val;
 
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
-                      pci_name(dev), size, offset);
+       dev_dbg(&dev->dev, "read %d bytes at 0x%x\n", size, offset);
 
        if (!valid_request(offset, size)) {
                err = XEN_PCI_ERR_invalid_offset;
@@ -195,9 +195,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
        }
 
 out:
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
-                      pci_name(dev), size, offset, value);
+       dev_dbg(&dev->dev, "read %d bytes at 0x%x = %x\n", size, offset, value);
 
        *ret_val = value;
        return xen_pcibios_err_to_errno(err);
@@ -212,10 +210,8 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
        u32 tmp_val;
        int field_start, field_end;
 
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG
-                      DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
-                      pci_name(dev), size, offset, value);
+       dev_dbg(&dev->dev, "write request %d bytes at 0x%x = %x\n",
+               size, offset, value);
 
        if (!valid_request(offset, size))
                return XEN_PCI_ERR_invalid_offset;
index fb4fccb..ac45cdc 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
@@ -67,53 +68,39 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 
        dev_data = pci_get_drvdata(dev);
        if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
-               if (unlikely(verbose_request))
-                       printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
-                              pci_name(dev));
+               dev_dbg(&dev->dev, "enable\n");
                err = pci_enable_device(dev);
                if (err)
                        return err;
                if (dev_data)
                        dev_data->enable_intx = 1;
        } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
-               if (unlikely(verbose_request))
-                       printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
-                              pci_name(dev));
+               dev_dbg(&dev->dev, "disable\n");
                pci_disable_device(dev);
                if (dev_data)
                        dev_data->enable_intx = 0;
        }
 
        if (!dev->is_busmaster && is_master_cmd(value)) {
-               if (unlikely(verbose_request))
-                       printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
-                              pci_name(dev));
+               dev_dbg(&dev->dev, "set bus master\n");
                pci_set_master(dev);
        } else if (dev->is_busmaster && !is_master_cmd(value)) {
-               if (unlikely(verbose_request))
-                       printk(KERN_DEBUG DRV_NAME ": %s: clear bus master\n",
-                              pci_name(dev));
+               dev_dbg(&dev->dev, "clear bus master\n");
                pci_clear_master(dev);
        }
 
        if (!(cmd->val & PCI_COMMAND_INVALIDATE) &&
            (value & PCI_COMMAND_INVALIDATE)) {
-               if (unlikely(verbose_request))
-                       printk(KERN_DEBUG
-                              DRV_NAME ": %s: enable memory-write-invalidate\n",
-                              pci_name(dev));
+               dev_dbg(&dev->dev, "enable memory-write-invalidate\n");
                err = pci_set_mwi(dev);
                if (err) {
-                       pr_warn("%s: cannot enable memory-write-invalidate (%d)\n",
-                               pci_name(dev), err);
+                       dev_warn(&dev->dev, "cannot enable memory-write-invalidate (%d)\n",
+                               err);
                        value &= ~PCI_COMMAND_INVALIDATE;
                }
        } else if ((cmd->val & PCI_COMMAND_INVALIDATE) &&
                   !(value & PCI_COMMAND_INVALIDATE)) {
-               if (unlikely(verbose_request))
-                       printk(KERN_DEBUG
-                              DRV_NAME ": %s: disable memory-write-invalidate\n",
-                              pci_name(dev));
+               dev_dbg(&dev->dev, "disable memory-write-invalidate\n");
                pci_clear_mwi(dev);
        }
 
@@ -157,8 +144,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
        struct pci_bar_info *bar = data;
 
        if (unlikely(!bar)) {
-               pr_warn(DRV_NAME ": driver data not found for %s\n",
-                      pci_name(dev));
+               dev_warn(&dev->dev, "driver data not found\n");
                return XEN_PCI_ERR_op_failed;
        }
 
@@ -194,8 +180,7 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
        u32 mask;
 
        if (unlikely(!bar)) {
-               pr_warn(DRV_NAME ": driver data not found for %s\n",
-                      pci_name(dev));
+               dev_warn(&dev->dev, "driver data not found\n");
                return XEN_PCI_ERR_op_failed;
        }
 
@@ -228,8 +213,7 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
        struct pci_bar_info *bar = data;
 
        if (unlikely(!bar)) {
-               pr_warn(DRV_NAME ": driver data not found for %s\n",
-                      pci_name(dev));
+               dev_warn(&dev->dev, "driver data not found\n");
                return XEN_PCI_ERR_op_failed;
        }
 
@@ -433,8 +417,8 @@ int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
 
        default:
                err = -EINVAL;
-               pr_err("%s: Unsupported header type %d!\n",
-                      pci_name(dev), dev->hdr_type);
+               dev_err(&dev->dev, "Unsupported header type %d!\n",
+                       dev->hdr_type);
                break;
        }
 
index ed593d1..7dc2810 100644 (file)
@@ -6,6 +6,8 @@
  * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
  */
 
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include "pciback.h"
@@ -35,8 +37,8 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
                if (match_one_device(&tmp_quirk->devid, dev) != NULL)
                        goto out;
        tmp_quirk = NULL;
-       printk(KERN_DEBUG DRV_NAME
-              ": quirk didn't match any device known\n");
+       dev_printk(KERN_DEBUG, &dev->dev,
+                  "quirk didn't match any device known\n");
 out:
        return tmp_quirk;
 }
index 7af93d6..e876c3d 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -626,11 +627,11 @@ static void pcistub_remove(struct pci_dev *dev)
                if (found_psdev->pdev) {
                        int domid = xen_find_device_domain_owner(dev);
 
-                       pr_warn("****** removing device %s while still in-use by domain %d! ******\n",
+                       dev_warn(&dev->dev, "****** removing device %s while still in-use by domain %d! ******\n",
                               pci_name(found_psdev->dev), domid);
-                       pr_warn("****** driver domain may still access this device's i/o resources!\n");
-                       pr_warn("****** shutdown driver domain before binding device\n");
-                       pr_warn("****** to other drivers or domains\n");
+                       dev_warn(&dev->dev, "****** driver domain may still access this device's i/o resources!\n");
+                       dev_warn(&dev->dev, "****** shutdown driver domain before binding device\n");
+                       dev_warn(&dev->dev, "****** to other drivers or domains\n");
 
                        /* N.B. This ends up calling pcistub_put_pci_dev which ends up
                         * doing the FLR. */
@@ -711,14 +712,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
        ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
                &aer_op->domain, &aer_op->bus, &aer_op->devfn);
        if (!ret) {
-               dev_err(&psdev->dev->dev,
-                       DRV_NAME ": failed to get pcifront device\n");
+               dev_err(&psdev->dev->dev, "failed to get pcifront device\n");
                return PCI_ERS_RESULT_NONE;
        }
        wmb();
 
-       dev_dbg(&psdev->dev->dev,
-                       DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n",
+       dev_dbg(&psdev->dev->dev, "aer_op %x dom %x bus %x devfn %x\n",
                        aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
        /*local flag to mark there's aer request, xen_pcibk callback will use
        * this flag to judge whether we need to check pci-front give aer
@@ -754,8 +753,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
 
        if (test_bit(_XEN_PCIF_active,
                (unsigned long *)&sh_info->flags)) {
-               dev_dbg(&psdev->dev->dev,
-                       "schedule pci_conf service in " DRV_NAME "\n");
+               dev_dbg(&psdev->dev->dev, "schedule pci_conf service\n");
                xen_pcibk_test_and_schedule_op(psdev->pdev);
        }
 
@@ -786,13 +784,12 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
                                PCI_FUNC(dev->devfn));
 
        if (!psdev || !psdev->pdev) {
-               dev_err(&dev->dev,
-                       DRV_NAME " device is not found/assigned\n");
+               dev_err(&dev->dev, "device is not found/assigned\n");
                goto end;
        }
 
        if (!psdev->pdev->sh_info) {
-               dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+               dev_err(&dev->dev, "device is not connected or owned"
                        " by HVM, kill it\n");
                kill_domain_by_device(psdev);
                goto end;
@@ -844,13 +841,12 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
                                PCI_FUNC(dev->devfn));
 
        if (!psdev || !psdev->pdev) {
-               dev_err(&dev->dev,
-                       DRV_NAME " device is not found/assigned\n");
+               dev_err(&dev->dev, "device is not found/assigned\n");
                goto end;
        }
 
        if (!psdev->pdev->sh_info) {
-               dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+               dev_err(&dev->dev, "device is not connected or owned"
                        " by HVM, kill it\n");
                kill_domain_by_device(psdev);
                goto end;
@@ -902,13 +898,12 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
                                PCI_FUNC(dev->devfn));
 
        if (!psdev || !psdev->pdev) {
-               dev_err(&dev->dev,
-                       DRV_NAME " device is not found/assigned\n");
+               dev_err(&dev->dev, "device is not found/assigned\n");
                goto end;
        }
 
        if (!psdev->pdev->sh_info) {
-               dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+               dev_err(&dev->dev, "device is not connected or owned"
                        " by HVM, kill it\n");
                kill_domain_by_device(psdev);
                goto end;
@@ -956,13 +951,12 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
                                PCI_FUNC(dev->devfn));
 
        if (!psdev || !psdev->pdev) {
-               dev_err(&dev->dev,
-                       DRV_NAME " device is not found/assigned\n");
+               dev_err(&dev->dev, "device is not found/assigned\n");
                goto end;
        }
 
        if (!psdev->pdev->sh_info) {
-               dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+               dev_err(&dev->dev, "device is not connected or owned"
                        " by HVM, kill it\n");
                kill_domain_by_device(psdev);
                goto end;
index 7c95516..f1ed2db 100644 (file)
@@ -186,8 +186,6 @@ void xen_pcibk_do_op(struct work_struct *data);
 int xen_pcibk_xenbus_register(void);
 void xen_pcibk_xenbus_unregister(void);
 
-extern int verbose_request;
-
 void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
 #endif
 
index 787966f..e11a743 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #include <linux/moduleparam.h>
 #include <linux/wait.h>
@@ -14,9 +15,6 @@
 #include <linux/sched.h>
 #include "pciback.h"
 
-int verbose_request;
-module_param(verbose_request, int, 0644);
-
 static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
 
 /* Ensure a device is has the fake IRQ handler "turned on/off" and is
@@ -147,9 +145,6 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
        struct xen_pcibk_dev_data *dev_data;
        int status;
 
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
-
        if (dev->msi_enabled)
                status = -EALREADY;
        else if (dev->msix_enabled)
@@ -158,9 +153,8 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
                status = pci_enable_msi(dev);
 
        if (status) {
-               pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
-                                   pci_name(dev), pdev->xdev->otherend_id,
-                                   status);
+               dev_warn_ratelimited(&dev->dev, "error enabling MSI for guest %u: err %d\n",
+                                    pdev->xdev->otherend_id, status);
                op->value = 0;
                return XEN_PCI_ERR_op_failed;
        }
@@ -169,9 +163,8 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
         * the local domain's IRQ number. */
 
        op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
-                       op->value);
+
+       dev_dbg(&dev->dev, "MSI: %d\n", op->value);
 
        dev_data = pci_get_drvdata(dev);
        if (dev_data)
@@ -184,10 +177,6 @@ static
 int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
                          struct pci_dev *dev, struct xen_pci_op *op)
 {
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
-                      pci_name(dev));
-
        if (dev->msi_enabled) {
                struct xen_pcibk_dev_data *dev_data;
 
@@ -198,9 +187,9 @@ int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
                        dev_data->ack_intr = 1;
        }
        op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
-                       op->value);
+
+       dev_dbg(&dev->dev, "MSI: %d\n", op->value);
+
        return 0;
 }
 
@@ -213,9 +202,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
        struct msix_entry *entries;
        u16 cmd;
 
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
-                      pci_name(dev));
+       dev_dbg(&dev->dev, "enable MSI-X\n");
 
        if (op->value > SH_INFO_MAX_VEC)
                return -EINVAL;
@@ -248,17 +235,13 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
                        if (entries[i].vector) {
                                op->msix_entries[i].vector =
                                        xen_pirq_from_irq(entries[i].vector);
-                               if (unlikely(verbose_request))
-                                       printk(KERN_DEBUG DRV_NAME ": %s: " \
-                                               "MSI-X[%d]: %d\n",
-                                               pci_name(dev), i,
-                                               op->msix_entries[i].vector);
+                               dev_dbg(&dev->dev, "MSI-X[%d]: %d\n", i,
+                                       op->msix_entries[i].vector);
                        }
                }
        } else
-               pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
-                                   pci_name(dev), pdev->xdev->otherend_id,
-                                   result);
+               dev_warn_ratelimited(&dev->dev, "error enabling MSI-X for guest %u: err %d!\n",
+                                    pdev->xdev->otherend_id, result);
        kfree(entries);
 
        op->value = result;
@@ -273,10 +256,6 @@ static
 int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
                           struct pci_dev *dev, struct xen_pci_op *op)
 {
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
-                       pci_name(dev));
-
        if (dev->msix_enabled) {
                struct xen_pcibk_dev_data *dev_data;
 
@@ -291,9 +270,9 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
         * an undefined IRQ value of zero.
         */
        op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-       if (unlikely(verbose_request))
-               printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
-                      pci_name(dev), op->value);
+
+       dev_dbg(&dev->dev, "MSI-X: %d\n", op->value);
+
        return 0;
 }
 #endif
@@ -424,7 +403,7 @@ static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
                dev_data->handled++;
                if ((dev_data->handled % 1000) == 0) {
                        if (xen_test_irq_shared(irq)) {
-                               pr_info("%s IRQ line is not shared "
+                               dev_info(&dev->dev, "%s IRQ line is not shared "
                                        "with other domains. Turning ISR off\n",
                                         dev_data->irq_name);
                                dev_data->ack_intr = 0;
index f6ba181..5447b5a 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -105,9 +106,8 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
                                       struct pci_dev_entry, list);
 
                        if (match_slot(dev, t->dev)) {
-                               pr_info("vpci: %s: assign to virtual slot %d func %d\n",
-                                       pci_name(dev), slot,
-                                       PCI_FUNC(dev->devfn));
+                               dev_info(&dev->dev, "vpci: assign to virtual slot %d func %d\n",
+                                        slot, PCI_FUNC(dev->devfn));
                                list_add_tail(&dev_entry->list,
                                              &vpci_dev->dev_list[slot]);
                                func = PCI_FUNC(dev->devfn);
@@ -119,8 +119,8 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
        /* Assign to a new slot on the virtual PCI bus */
        for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
                if (list_empty(&vpci_dev->dev_list[slot])) {
-                       pr_info("vpci: %s: assign to virtual slot %d\n",
-                               pci_name(dev), slot);
+                       dev_info(&dev->dev, "vpci: assign to virtual slot %d\n",
+                                slot);
                        list_add_tail(&dev_entry->list,
                                      &vpci_dev->dev_list[slot]);
                        func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
index dc81e99..38725d9 100644 (file)
@@ -31,6 +31,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #define DPRINTK(fmt, args...)                          \
        pr_debug("xenbus_probe (%s:%d) " fmt ".\n",     \
@@ -607,7 +608,7 @@ int xenbus_dev_suspend(struct device *dev)
        if (drv->suspend)
                err = drv->suspend(xdev);
        if (err)
-               pr_warn("suspend %s failed: %i\n", dev_name(dev), err);
+               dev_warn(dev, "suspend failed: %i\n", err);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
@@ -626,8 +627,7 @@ int xenbus_dev_resume(struct device *dev)
        drv = to_xenbus_driver(dev->driver);
        err = talk_to_otherend(xdev);
        if (err) {
-               pr_warn("resume (talk_to_otherend) %s failed: %i\n",
-                       dev_name(dev), err);
+               dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err);
                return err;
        }
 
@@ -636,15 +636,14 @@ int xenbus_dev_resume(struct device *dev)
        if (drv->resume) {
                err = drv->resume(xdev);
                if (err) {
-                       pr_warn("resume %s failed: %i\n", dev_name(dev), err);
+                       dev_warn(dev, "resume failed: %i\n", err);
                        return err;
                }
        }
 
        err = watch_otherend(xdev);
        if (err) {
-               pr_warn("resume (watch_otherend) %s failed: %d.\n",
-                       dev_name(dev), err);
+               dev_warn(dev, "resume (watch_otherend) failed: %d\n", err);
                return err;
        }
 
index 97bccde..768497f 100644 (file)
@@ -447,6 +447,7 @@ static int afs_store_data(struct address_space *mapping,
        op->store.last = last;
        op->store.first_offset = offset;
        op->store.last_to = to;
+       op->mtime = vnode->vfs_inode.i_mtime;
        op->ops = &afs_store_data_operation;
 
 try_next_key:
index 7e07913..7ecddc2 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -27,7 +27,6 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/mmu_context.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
index 4023c98..0b65a91 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
 #include <linux/mm.h>
-#include <linux/mmu_context.h>
 #include <linux/sched/mm.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
@@ -112,6 +111,7 @@ struct io_wq {
        unsigned long state;
 
        free_work_fn *free_work;
+       io_wq_work_fn *do_work;
 
        struct task_struct *manager;
        struct user_struct *user;
@@ -170,8 +170,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
                        dropped_lock = true;
                }
                __set_current_state(TASK_RUNNING);
-               set_fs(KERNEL_DS);
-               unuse_mm(worker->mm);
+               kthread_unuse_mm(worker->mm);
                mmput(worker->mm);
                worker->mm = NULL;
        }
@@ -418,18 +417,15 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
 static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
 {
        if (worker->mm) {
-               unuse_mm(worker->mm);
+               kthread_unuse_mm(worker->mm);
                mmput(worker->mm);
                worker->mm = NULL;
        }
-       if (!work->mm) {
-               set_fs(KERNEL_DS);
+       if (!work->mm)
                return;
-       }
+
        if (mmget_not_zero(work->mm)) {
-               use_mm(work->mm);
-               if (!worker->mm)
-                       set_fs(USER_DS);
+               kthread_use_mm(work->mm);
                worker->mm = work->mm;
                /* hang on to this mm */
                work->mm = NULL;
@@ -528,7 +524,7 @@ get_next:
 
                        hash = io_get_work_hash(work);
                        linked = old_work = work;
-                       linked->func(&linked);
+                       wq->do_work(&linked);
                        linked = (old_work == linked) ? NULL : linked;
 
                        work = next_hashed;
@@ -785,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
                struct io_wq_work *old_work = work;
 
                work->flags |= IO_WQ_WORK_CANCEL;
-               work->func(&work);
+               wq->do_work(&work);
                work = (work == old_work) ? NULL : work;
                wq->free_work(old_work);
        } while (work);
@@ -1023,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
        int ret = -ENOMEM, node;
        struct io_wq *wq;
 
-       if (WARN_ON_ONCE(!data->free_work))
+       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
                return ERR_PTR(-EINVAL);
 
        wq = kzalloc(sizeof(*wq), GFP_KERNEL);
@@ -1037,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
        }
 
        wq->free_work = data->free_work;
+       wq->do_work = data->do_work;
 
        /* caller must already hold a reference to this */
        wq->user = data->user;
@@ -1093,7 +1090,7 @@ err:
 
 bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
 {
-       if (data->free_work != wq->free_work)
+       if (data->free_work != wq->free_work || data->do_work != wq->do_work)
                return false;
 
        return refcount_inc_not_zero(&wq->use_refs);
index 5ba12de..8e138fa 100644 (file)
@@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
 
 struct io_wq_work {
        struct io_wq_work_node list;
-       void (*func)(struct io_wq_work **);
        struct files_struct *files;
        struct mm_struct *mm;
        const struct cred *creds;
@@ -94,11 +93,6 @@ struct io_wq_work {
        pid_t task_pid;
 };
 
-#define INIT_IO_WORK(work, _func)                              \
-       do {                                                    \
-               *(work) = (struct io_wq_work){ .func = _func }; \
-       } while (0)                                             \
-
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 {
        if (!work->list.next)
@@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 }
 
 typedef void (free_work_fn)(struct io_wq_work *);
+typedef void (io_wq_work_fn)(struct io_wq_work **);
 
 struct io_wq_data {
        struct user_struct *user;
 
+       io_wq_work_fn *do_work;
        free_work_fn *free_work;
 };
 
index 9fb0dc6..155f3d8 100644 (file)
@@ -55,7 +55,6 @@
 #include <linux/fdtable.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/mmu_context.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
@@ -529,7 +528,6 @@ enum {
        REQ_F_INFLIGHT_BIT,
        REQ_F_CUR_POS_BIT,
        REQ_F_NOWAIT_BIT,
-       REQ_F_IOPOLL_COMPLETED_BIT,
        REQ_F_LINK_TIMEOUT_BIT,
        REQ_F_TIMEOUT_BIT,
        REQ_F_ISREG_BIT,
@@ -541,6 +539,8 @@ enum {
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
        REQ_F_NO_FILE_TABLE_BIT,
+       REQ_F_QUEUE_TIMEOUT_BIT,
+       REQ_F_WORK_INITIALIZED_BIT,
 
        /* not a real bit, just to check we're not overflowing the space */
        __REQ_F_LAST_BIT,
@@ -572,8 +572,6 @@ enum {
        REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
        /* must not punt to workers */
        REQ_F_NOWAIT            = BIT(REQ_F_NOWAIT_BIT),
-       /* polled IO has completed */
-       REQ_F_IOPOLL_COMPLETED  = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
        /* has linked timeout */
        REQ_F_LINK_TIMEOUT      = BIT(REQ_F_LINK_TIMEOUT_BIT),
        /* timeout request */
@@ -596,6 +594,10 @@ enum {
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
        /* doesn't need file table for this request */
        REQ_F_NO_FILE_TABLE     = BIT(REQ_F_NO_FILE_TABLE_BIT),
+       /* needs to queue linked timeout */
+       REQ_F_QUEUE_TIMEOUT     = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
+       /* io_wq_work is initialized */
+       REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
 };
 
 struct async_poll {
@@ -634,6 +636,8 @@ struct io_kiocb {
        struct io_async_ctx             *io;
        int                             cflags;
        u8                              opcode;
+       /* polled IO has completed */
+       u8                              iopoll_completed;
 
        u16                             buf_index;
 
@@ -698,6 +702,8 @@ struct io_op_def {
        unsigned                needs_mm : 1;
        /* needs req->file assigned */
        unsigned                needs_file : 1;
+       /* don't fail if file grab fails */
+       unsigned                needs_file_no_error : 1;
        /* hash wq insertion if file is a regular file */
        unsigned                hash_reg_file : 1;
        /* unbound wq insertion if file is a non-regular file */
@@ -804,6 +810,8 @@ static const struct io_op_def io_op_defs[] = {
                .needs_fs               = 1,
        },
        [IORING_OP_CLOSE] = {
+               .needs_file             = 1,
+               .needs_file_no_error    = 1,
                .file_table             = 1,
        },
        [IORING_OP_FILES_UPDATE] = {
@@ -904,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket);
 
 static void io_file_put_work(struct work_struct *work);
 
+/*
+ * Note: must call io_req_init_async() for the first time you
+ * touch any members of io_wq_work.
+ */
+static inline void io_req_init_async(struct io_kiocb *req)
+{
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               return;
+
+       memset(&req->work, 0, sizeof(req->work));
+       req->flags |= REQ_F_WORK_INITIALIZED;
+}
+
 static inline bool io_async_submit(struct io_ring_ctx *ctx)
 {
        return ctx->flags & IORING_SETUP_SQPOLL;
@@ -1030,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 
 static inline void io_req_work_drop_env(struct io_kiocb *req)
 {
+       if (!(req->flags & REQ_F_WORK_INITIALIZED))
+               return;
+
        if (req->work.mm) {
                mmdrop(req->work.mm);
                req->work.mm = NULL;
@@ -1576,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req)
                io_queue_async_work(nxt);
 }
 
-static void io_link_work_cb(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-       struct io_kiocb *link;
-
-       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
-       io_queue_linked_timeout(link);
-       io_wq_submit_work(workptr);
-}
-
 static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
 {
        struct io_kiocb *link;
@@ -1597,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
        *workptr = &nxt->work;
        link = io_prep_linked_timeout(nxt);
        if (link)
-               nxt->work.func = io_link_work_cb;
+               nxt->flags |= REQ_F_QUEUE_TIMEOUT;
 }
 
 /*
@@ -1782,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
                 * If we find a request that requires polling, break out
                 * and complete those lists first, if we have entries there.
                 */
-               if (req->flags & REQ_F_IOPOLL_COMPLETED) {
+               if (READ_ONCE(req->iopoll_completed)) {
                        list_move_tail(&req->list, &done);
                        continue;
                }
@@ -1963,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
                req_set_fail_links(req);
        req->result = res;
        if (res != -EAGAIN)
-               req->flags |= REQ_F_IOPOLL_COMPLETED;
+               WRITE_ONCE(req->iopoll_completed, 1);
 }
 
 /*
@@ -1996,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
         * For fast devices, IO may have already completed. If it has, add
         * it to the front so we find it first.
         */
-       if (req->flags & REQ_F_IOPOLL_COMPLETED)
+       if (READ_ONCE(req->iopoll_completed))
                list_add(&req->list, &ctx->poll_list);
        else
                list_add_tail(&req->list, &ctx->poll_list);
@@ -2064,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw)
        if (S_ISREG(mode) && file->f_op != &io_uring_fops)
                return true;
 
+       /* any ->read/write should understand O_NONBLOCK */
+       if (file->f_flags & O_NONBLOCK)
+               return true;
+
        if (!(file->f_mode & FMODE_NOWAIT))
                return false;
 
@@ -2106,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                kiocb->ki_ioprio = get_current_ioprio();
 
        /* don't allow async punt if RWF_NOWAIT was requested */
-       if ((kiocb->ki_flags & IOCB_NOWAIT) ||
-           (req->file->f_flags & O_NONBLOCK))
+       if (kiocb->ki_flags & IOCB_NOWAIT)
                req->flags |= REQ_F_NOWAIT;
 
        if (force_nonblock)
@@ -2121,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                kiocb->ki_flags |= IOCB_HIPRI;
                kiocb->ki_complete = io_complete_rw_iopoll;
                req->result = 0;
+               req->iopoll_completed = 0;
        } else {
                if (kiocb->ki_flags & IOCB_HIPRI)
                        return -EINVAL;
@@ -2359,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
 static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
                                    bool needs_lock)
 {
-       if (req->flags & REQ_F_BUFFER_SELECTED)
+       if (req->flags & REQ_F_BUFFER_SELECTED) {
+               struct io_buffer *kbuf;
+
+               kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+               iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+               iov[0].iov_len = kbuf->len;
                return 0;
+       }
        if (!req->rw.len)
                return 0;
        else if (req->rw.len > 1)
@@ -2742,7 +2766,8 @@ copy_iov:
                        if (ret)
                                goto out_free;
                        /* any defer here is final, must blocking retry */
-                       if (!file_can_poll(req->file))
+                       if (!(req->flags & REQ_F_NOWAIT) &&
+                           !file_can_poll(req->file))
                                req->flags |= REQ_F_MUST_PUNT;
                        return -EAGAIN;
                }
@@ -2762,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req,
 
        if (req->flags & REQ_F_NEED_CLEANUP)
                return 0;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        sp->file_in = NULL;
        sp->len = READ_ONCE(sqe->len);
@@ -2776,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req,
                return ret;
        req->flags |= REQ_F_NEED_CLEANUP;
 
-       if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+       if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
+               /*
+                * Splice operation will be punted aync, and here need to
+                * modify io_wq_work.flags, so initialize io_wq_work firstly.
+                */
+               io_req_init_async(req);
                req->work.flags |= IO_WQ_WORK_UNBOUND;
+       }
 
        return 0;
 }
@@ -2886,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static bool io_req_cancelled(struct io_kiocb *req)
-{
-       if (req->work.flags & IO_WQ_WORK_CANCEL) {
-               req_set_fail_links(req);
-               io_cqring_add_event(req, -ECANCELED);
-               io_put_req(req);
-               return true;
-       }
-
-       return false;
-}
-
-static void __io_fsync(struct io_kiocb *req)
+static int io_fsync(struct io_kiocb *req, bool force_nonblock)
 {
        loff_t end = req->sync.off + req->sync.len;
        int ret;
 
+       /* fsync always requires a blocking context */
+       if (force_nonblock)
+               return -EAGAIN;
+
        ret = vfs_fsync_range(req->file, req->sync.off,
                                end > 0 ? end : LLONG_MAX,
                                req->sync.flags & IORING_FSYNC_DATASYNC);
@@ -2910,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req)
                req_set_fail_links(req);
        io_cqring_add_event(req, ret);
        io_put_req(req);
-}
-
-static void io_fsync_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_fsync(req);
-       io_steal_work(req, workptr);
-}
-
-static int io_fsync(struct io_kiocb *req, bool force_nonblock)
-{
-       /* fsync always requires a blocking context */
-       if (force_nonblock) {
-               req->work.func = io_fsync_finish;
-               return -EAGAIN;
-       }
-       __io_fsync(req);
        return 0;
 }
 
-static void __io_fallocate(struct io_kiocb *req)
-{
-       int ret;
-
-       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
-       ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
-                               req->sync.len);
-       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-       if (ret < 0)
-               req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
-}
-
-static void io_fallocate_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_fallocate(req);
-       io_steal_work(req, workptr);
-}
-
 static int io_fallocate_prep(struct io_kiocb *req,
                             const struct io_uring_sqe *sqe)
 {
        if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->sync.off = READ_ONCE(sqe->off);
        req->sync.len = READ_ONCE(sqe->addr);
@@ -2972,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req,
 
 static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
 {
+       int ret;
+
        /* fallocate always requiring blocking context */
-       if (force_nonblock) {
-               req->work.func = io_fallocate_finish;
+       if (force_nonblock)
                return -EAGAIN;
-       }
 
-       __io_fallocate(req);
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+       ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
+                               req->sync.len);
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       io_put_req(req);
        return 0;
 }
 
-static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        const char __user *fname;
        int ret;
 
-       if (sqe->ioprio || sqe->buf_index)
+       if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
                return -EINVAL;
-       if (req->flags & REQ_F_FIXED_FILE)
+       if (unlikely(sqe->ioprio || sqe->buf_index))
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
-       if (req->flags & REQ_F_NEED_CLEANUP)
-               return 0;
 
-       req->open.dfd = READ_ONCE(sqe->fd);
-       req->open.how.mode = READ_ONCE(sqe->len);
-       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
-       req->open.how.flags = READ_ONCE(sqe->open_flags);
-       if (force_o_largefile())
+       /* open.how should be already initialised */
+       if (!(req->open.how.flags & O_PATH) && force_o_largefile())
                req->open.how.flags |= O_LARGEFILE;
 
+       req->open.dfd = READ_ONCE(sqe->fd);
+       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
        req->open.filename = getname(fname);
        if (IS_ERR(req->open.filename)) {
                ret = PTR_ERR(req->open.filename);
                req->open.filename = NULL;
                return ret;
        }
-
        req->open.nofile = rlimit(RLIMIT_NOFILE);
        req->flags |= REQ_F_NEED_CLEANUP;
        return 0;
 }
 
+static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       u64 flags, mode;
+
+       if (req->flags & REQ_F_NEED_CLEANUP)
+               return 0;
+       mode = READ_ONCE(sqe->len);
+       flags = READ_ONCE(sqe->open_flags);
+       req->open.how = build_open_how(flags, mode);
+       return __io_openat_prep(req, sqe);
+}
+
 static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct open_how __user *how;
-       const char __user *fname;
        size_t len;
        int ret;
 
-       if (sqe->ioprio || sqe->buf_index)
-               return -EINVAL;
-       if (req->flags & REQ_F_FIXED_FILE)
-               return -EBADF;
        if (req->flags & REQ_F_NEED_CLEANUP)
                return 0;
-
-       req->open.dfd = READ_ONCE(sqe->fd);
-       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
        how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        len = READ_ONCE(sqe->len);
-
        if (len < OPEN_HOW_SIZE_VER0)
                return -EINVAL;
 
@@ -3040,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (ret)
                return ret;
 
-       if (!(req->open.how.flags & O_PATH) && force_o_largefile())
-               req->open.how.flags |= O_LARGEFILE;
-
-       req->open.filename = getname(fname);
-       if (IS_ERR(req->open.filename)) {
-               ret = PTR_ERR(req->open.filename);
-               req->open.filename = NULL;
-               return ret;
-       }
-
-       req->open.nofile = rlimit(RLIMIT_NOFILE);
-       req->flags |= REQ_F_NEED_CLEANUP;
-       return 0;
+       return __io_openat_prep(req, sqe);
 }
 
 static int io_openat2(struct io_kiocb *req, bool force_nonblock)
@@ -3092,7 +3071,6 @@ err:
 
 static int io_openat(struct io_kiocb *req, bool force_nonblock)
 {
-       req->open.how = build_open_how(req->open.how.flags, req->open.how.mode);
        return io_openat2(req, force_nonblock);
 }
 
@@ -3181,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
        p->addr = READ_ONCE(sqe->addr);
        p->len = READ_ONCE(sqe->len);
 
-       if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+       if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
                return -EFAULT;
 
        p->bgid = READ_ONCE(sqe->buf_group);
@@ -3259,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 #if defined(CONFIG_EPOLL)
        if (sqe->ioprio || sqe->buf_index)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->epoll.epfd = READ_ONCE(sqe->fd);
        req->epoll.op = READ_ONCE(sqe->len);
@@ -3303,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
        if (sqe->ioprio || sqe->buf_index || sqe->off)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->madvise.addr = READ_ONCE(sqe->addr);
        req->madvise.len = READ_ONCE(sqe->len);
@@ -3337,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        if (sqe->ioprio || sqe->buf_index || sqe->addr)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->fadvise.offset = READ_ONCE(sqe->off);
        req->fadvise.len = READ_ONCE(sqe->len);
@@ -3370,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
 
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
        if (sqe->ioprio || sqe->buf_index)
                return -EINVAL;
        if (req->flags & REQ_F_FIXED_FILE)
@@ -3410,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        /*
         * If we queue this for async, it must not be cancellable. That would
-        * leave the 'file' in an undeterminate state.
+        * leave the 'file' in an undeterminate state, and here need to modify
+        * io_wq_work.flags, so initialize io_wq_work firstly.
         */
+       io_req_init_async(req);
        req->work.flags |= IO_WQ_WORK_NO_CANCEL;
 
+       if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+               return -EINVAL;
        if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
            sqe->rw_flags || sqe->buf_index)
                return -EINVAL;
@@ -3421,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return -EBADF;
 
        req->close.fd = READ_ONCE(sqe->fd);
-       return 0;
-}
-
-/* only called when __close_fd_get_file() is done */
-static void __io_close_finish(struct io_kiocb *req)
-{
-       int ret;
-
-       ret = filp_close(req->close.put_file, req->work.files);
-       if (ret < 0)
-               req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       fput(req->close.put_file);
-       io_put_req(req);
-}
-
-static void io_close_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+       if ((req->file && req->file->f_op == &io_uring_fops) ||
+           req->close.fd == req->ctx->ring_fd)
+               return -EBADF;
 
-       /* not cancellable, don't do io_req_cancelled() */
-       __io_close_finish(req);
-       io_steal_work(req, workptr);
+       req->close.put_file = NULL;
+       return 0;
 }
 
 static int io_close(struct io_kiocb *req, bool force_nonblock)
 {
+       struct io_close *close = &req->close;
        int ret;
 
-       req->close.put_file = NULL;
-       ret = __close_fd_get_file(req->close.fd, &req->close.put_file);
-       if (ret < 0)
-               return (ret == -ENOENT) ? -EBADF : ret;
+       /* might be already done during nonblock submission */
+       if (!close->put_file) {
+               ret = __close_fd_get_file(close->fd, &close->put_file);
+               if (ret < 0)
+                       return (ret == -ENOENT) ? -EBADF : ret;
+       }
 
        /* if the file has a flush method, be safe and punt to async */
-       if (req->close.put_file->f_op->flush && force_nonblock) {
+       if (close->put_file->f_op->flush && force_nonblock) {
                /* avoid grabbing files - we don't need the files */
                req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
-               req->work.func = io_close_finish;
                return -EAGAIN;
        }
 
-       /*
-        * No ->flush(), safely close from here and just punt the
-        * fput() to async context.
-        */
-       __io_close_finish(req);
+       /* No ->flush() or already async, safely close from here */
+       ret = filp_close(close->put_file, req->work.files);
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       fput(close->put_file);
+       close->put_file = NULL;
+       io_put_req(req);
        return 0;
 }
 
@@ -3489,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static void __io_sync_file_range(struct io_kiocb *req)
+static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
 {
        int ret;
 
+       /* sync_file_range always requires a blocking context */
+       if (force_nonblock)
+               return -EAGAIN;
+
        ret = sync_file_range(req->file, req->sync.off, req->sync.len,
                                req->sync.flags);
        if (ret < 0)
                req_set_fail_links(req);
        io_cqring_add_event(req, ret);
        io_put_req(req);
-}
-
-
-static void io_sync_file_range_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_sync_file_range(req);
-       io_steal_work(req, workptr);
-}
-
-static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
-{
-       /* sync_file_range always requires a blocking context */
-       if (force_nonblock) {
-               req->work.func = io_sync_file_range_finish;
-               return -EAGAIN;
-       }
-
-       __io_sync_file_range(req);
        return 0;
 }
 
@@ -3546,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        struct io_async_ctx *io = req->io;
        int ret;
 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+
        sr->msg_flags = READ_ONCE(sqe->msg_flags);
        sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
@@ -3575,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_async_ctx io;
@@ -3631,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_sr_msg *sr = &req->sr_msg;
@@ -3786,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req,
        struct io_async_ctx *io = req->io;
        int ret;
 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+
        sr->msg_flags = READ_ONCE(sqe->msg_flags);
        sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
@@ -3814,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret, cflags = 0;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_buffer *kbuf;
@@ -3878,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret, cflags = 0;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_sr_msg *sr = &req->sr_msg;
@@ -3948,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static int __io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
 {
        struct io_accept *accept = &req->accept;
-       unsigned file_flags;
+       unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
        int ret;
 
-       file_flags = force_nonblock ? O_NONBLOCK : 0;
+       if (req->file->f_flags & O_NONBLOCK)
+               req->flags |= REQ_F_NOWAIT;
+
        ret = __sys_accept4_file(req->file, file_flags, accept->addr,
                                        accept->addr_len, accept->flags,
                                        accept->nofile);
        if (ret == -EAGAIN && force_nonblock)
                return -EAGAIN;
-       if (ret == -ERESTARTSYS)
-               ret = -EINTR;
-       if (ret < 0)
+       if (ret < 0) {
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
                req_set_fail_links(req);
+       }
        io_cqring_add_event(req, ret);
        io_put_req(req);
        return 0;
 }
 
-static void io_accept_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_accept(req, false);
-       io_steal_work(req, workptr);
-}
-
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
-{
-       int ret;
-
-       ret = __io_accept(req, force_nonblock);
-       if (ret == -EAGAIN && force_nonblock) {
-               req->work.func = io_accept_finish;
-               return -EAGAIN;
-       }
-       return 0;
-}
-
 static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_connect *conn = &req->connect;
@@ -4329,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb)
        spin_unlock_irq(&ctx->completion_lock);
 
        /* restore ->work in case we need to retry again */
-       memcpy(&req->work, &apoll->work, sizeof(req->work));
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               memcpy(&req->work, &apoll->work, sizeof(req->work));
        kfree(apoll);
 
        if (!canceled) {
@@ -4426,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
                return false;
 
        req->flags |= REQ_F_POLLED;
-       memcpy(&apoll->work, &req->work, sizeof(req->work));
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               memcpy(&apoll->work, &req->work, sizeof(req->work));
        had_io = req->io != NULL;
 
        get_task_struct(current);
@@ -4451,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
                if (!had_io)
                        io_poll_remove_double(req);
                spin_unlock_irq(&ctx->completion_lock);
-               memcpy(&req->work, &apoll->work, sizeof(req->work));
+               if (req->flags & REQ_F_WORK_INITIALIZED)
+                       memcpy(&req->work, &apoll->work, sizeof(req->work));
                kfree(apoll);
                return false;
        }
@@ -4496,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
                         * io_req_work_drop_env below when dropping the
                         * final reference.
                         */
-                       memcpy(&req->work, &apoll->work, sizeof(req->work));
+                       if (req->flags & REQ_F_WORK_INITIALIZED)
+                               memcpy(&req->work, &apoll->work,
+                                      sizeof(req->work));
                        kfree(apoll);
                }
        }
@@ -4945,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
        if (!sqe)
                return 0;
 
+       io_req_init_async(req);
+
        if (io_op_defs[req->opcode].file_table) {
                ret = io_grab_files(req);
                if (unlikely(ret))
@@ -5382,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        return 0;
 }
 
+static void io_arm_async_linked_timeout(struct io_kiocb *req)
+{
+       struct io_kiocb *link;
+
+       /* link head's timeout is queued in io_queue_async_work() */
+       if (!(req->flags & REQ_F_QUEUE_TIMEOUT))
+               return;
+
+       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+       io_queue_linked_timeout(link);
+}
+
 static void io_wq_submit_work(struct io_wq_work **workptr)
 {
        struct io_wq_work *work = *workptr;
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
        int ret = 0;
 
+       io_arm_async_linked_timeout(req);
+
        /* if NO_CANCEL is set, we must still run the work */
        if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
                                IO_WQ_WORK_CANCEL) {
@@ -5438,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
                        return -EBADF;
                fd = array_index_nospec(fd, ctx->nr_user_files);
                file = io_file_from_index(ctx, fd);
-               if (!file)
-                       return -EBADF;
-               req->fixed_file_refs = ctx->file_data->cur_refs;
-               percpu_ref_get(req->fixed_file_refs);
+               if (file) {
+                       req->fixed_file_refs = ctx->file_data->cur_refs;
+                       percpu_ref_get(req->fixed_file_refs);
+               }
        } else {
                trace_io_uring_file_get(ctx, fd);
                file = __io_file_get(state, fd);
-               if (unlikely(!file))
-                       return -EBADF;
        }
 
-       *out_file = file;
-       return 0;
+       if (file || io_op_defs[req->opcode].needs_file_no_error) {
+               *out_file = file;
+               return 0;
+       }
+       return -EBADF;
 }
 
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
@@ -5584,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 again:
        linked_timeout = io_prep_linked_timeout(req);
 
-       if (req->work.creds && req->work.creds != current_cred()) {
+       if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
+           req->work.creds != current_cred()) {
                if (old_creds)
                        revert_creds(old_creds);
                if (old_creds == req->work.creds)
@@ -5607,6 +5565,8 @@ again:
                        goto exit;
                }
 punt:
+               io_req_init_async(req);
+
                if (io_op_defs[req->opcode].file_table) {
                        ret = io_grab_files(req);
                        if (ret)
@@ -5859,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        refcount_set(&req->refs, 2);
        req->task = NULL;
        req->result = 0;
-       INIT_IO_WORK(&req->work, io_wq_submit_work);
 
        if (unlikely(req->opcode >= IORING_OP_LAST))
                return -EINVAL;
@@ -5867,7 +5826,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        if (io_op_defs[req->opcode].needs_mm && !current->mm) {
                if (unlikely(!mmget_not_zero(ctx->sqo_mm)))
                        return -EFAULT;
-               use_mm(ctx->sqo_mm);
+               kthread_use_mm(ctx->sqo_mm);
        }
 
        sqe_flags = READ_ONCE(sqe->flags);
@@ -5881,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
        id = READ_ONCE(sqe->personality);
        if (id) {
+               io_req_init_async(req);
                req->work.creds = idr_find(&ctx->personality_idr, id);
                if (unlikely(!req->work.creds))
                        return -EINVAL;
@@ -5981,7 +5941,7 @@ static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
        struct mm_struct *mm = current->mm;
 
        if (mm) {
-               unuse_mm(mm);
+               kthread_unuse_mm(mm);
                mmput(mm);
        }
 }
@@ -5990,15 +5950,12 @@ static int io_sq_thread(void *data)
 {
        struct io_ring_ctx *ctx = data;
        const struct cred *old_cred;
-       mm_segment_t old_fs;
        DEFINE_WAIT(wait);
        unsigned long timeout;
        int ret = 0;
 
        complete(&ctx->sq_thread_comp);
 
-       old_fs = get_fs();
-       set_fs(USER_DS);
        old_cred = override_creds(ctx->creds);
 
        timeout = jiffies + ctx->sq_thread_idle;
@@ -6103,7 +6060,6 @@ static int io_sq_thread(void *data)
        if (current->task_works)
                task_work_run();
 
-       set_fs(old_fs);
        io_sq_thread_drop_mm(ctx);
        revert_creds(old_cred);
 
@@ -6879,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
 
        data.user = ctx->user;
        data.free_work = io_free_work;
+       data.do_work = io_wq_submit_work;
 
        if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
                /* Do QD, or 4 * CPUS, whatever is smallest */
@@ -7160,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 
                ret = 0;
                if (!pages || nr_pages > got_pages) {
-                       kfree(vmas);
-                       kfree(pages);
+                       kvfree(vmas);
+                       kvfree(pages);
                        pages = kvmalloc_array(nr_pages, sizeof(struct page *),
                                                GFP_KERNEL);
                        vmas = kvmalloc_array(nr_pages,
index 445eef4..91b58c8 100644 (file)
@@ -2780,6 +2780,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
        if (!nilfs->ns_writer)
                return -ENOMEM;
 
+       inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+
        err = nilfs_segctor_start_thread(nilfs->ns_writer);
        if (err) {
                kfree(nilfs->ns_writer);
index 1177c33..aca1662 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config OCFS2_FS
        tristate "OCFS2 file system support"
-       depends on NET && SYSFS && CONFIGFS_FS
+       depends on INET && SYSFS && CONFIGFS_FS
        select JBD2
        select CRC32
        select QUOTA
index 3a44e46..25cabbf 100644 (file)
@@ -62,7 +62,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
        last_index = (size - 1) >> PAGE_SHIFT;
 
        /*
-        * There are cases that lead to the page no longer bebongs to the
+        * There are cases that lead to the page no longer belonging to the
         * mapping.
         * 1) pagecache truncates locally due to memory pressure.
         * 2) pagecache truncates when another is taking EX lock against 
index e8730c6..379986e 100644 (file)
 #define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
 
 #include <linux/build_bug.h>
-#include <linux/kasan-checks.h>
+#include <linux/compiler.h>
+#include <linux/instrumented.h>
 
-static inline int
+static __always_inline int
 atomic_read(const atomic_t *v)
 {
-       kasan_check_read(v, sizeof(*v));
+       instrument_atomic_read(v, sizeof(*v));
        return arch_atomic_read(v);
 }
 #define atomic_read atomic_read
 
 #if defined(arch_atomic_read_acquire)
-static inline int
+static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
-       kasan_check_read(v, sizeof(*v));
+       instrument_atomic_read(v, sizeof(*v));
        return arch_atomic_read_acquire(v);
 }
 #define atomic_read_acquire atomic_read_acquire
 #endif
 
-static inline void
+static __always_inline void
 atomic_set(atomic_t *v, int i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_set(v, i);
 }
 #define atomic_set atomic_set
 
 #if defined(arch_atomic_set_release)
-static inline void
+static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_set_release(v, i);
 }
 #define atomic_set_release atomic_set_release
 #endif
 
-static inline void
+static __always_inline void
 atomic_add(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_add(i, v);
 }
 #define atomic_add atomic_add
 
 #if !defined(arch_atomic_add_return_relaxed) || defined(arch_atomic_add_return)
-static inline int
+static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_add_return(i, v);
 }
 #define atomic_add_return atomic_add_return
 #endif
 
 #if defined(arch_atomic_add_return_acquire)
-static inline int
+static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_add_return_acquire(i, v);
 }
 #define atomic_add_return_acquire atomic_add_return_acquire
 #endif
 
 #if defined(arch_atomic_add_return_release)
-static inline int
+static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_add_return_release(i, v);
 }
 #define atomic_add_return_release atomic_add_return_release
 #endif
 
 #if defined(arch_atomic_add_return_relaxed)
-static inline int
+static __always_inline int
 atomic_add_return_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_add_return_relaxed(i, v);
 }
 #define atomic_add_return_relaxed atomic_add_return_relaxed
 #endif
 
 #if !defined(arch_atomic_fetch_add_relaxed) || defined(arch_atomic_fetch_add)
-static inline int
+static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_add(i, v);
 }
 #define atomic_fetch_add atomic_fetch_add
 #endif
 
 #if defined(arch_atomic_fetch_add_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_add_acquire(i, v);
 }
 #define atomic_fetch_add_acquire atomic_fetch_add_acquire
 #endif
 
 #if defined(arch_atomic_fetch_add_release)
-static inline int
+static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_add_release(i, v);
 }
 #define atomic_fetch_add_release atomic_fetch_add_release
 #endif
 
 #if defined(arch_atomic_fetch_add_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_add_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_add_relaxed(i, v);
 }
 #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_sub(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_sub(i, v);
 }
 #define atomic_sub atomic_sub
 
 #if !defined(arch_atomic_sub_return_relaxed) || defined(arch_atomic_sub_return)
-static inline int
+static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_sub_return(i, v);
 }
 #define atomic_sub_return atomic_sub_return
 #endif
 
 #if defined(arch_atomic_sub_return_acquire)
-static inline int
+static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_sub_return_acquire(i, v);
 }
 #define atomic_sub_return_acquire atomic_sub_return_acquire
 #endif
 
 #if defined(arch_atomic_sub_return_release)
-static inline int
+static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_sub_return_release(i, v);
 }
 #define atomic_sub_return_release atomic_sub_return_release
 #endif
 
 #if defined(arch_atomic_sub_return_relaxed)
-static inline int
+static __always_inline int
 atomic_sub_return_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_sub_return_relaxed(i, v);
 }
 #define atomic_sub_return_relaxed atomic_sub_return_relaxed
 #endif
 
 #if !defined(arch_atomic_fetch_sub_relaxed) || defined(arch_atomic_fetch_sub)
-static inline int
+static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_sub(i, v);
 }
 #define atomic_fetch_sub atomic_fetch_sub
 #endif
 
 #if defined(arch_atomic_fetch_sub_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_sub_acquire(i, v);
 }
 #define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
 #endif
 
 #if defined(arch_atomic_fetch_sub_release)
-static inline int
+static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_sub_release(i, v);
 }
 #define atomic_fetch_sub_release atomic_fetch_sub_release
 #endif
 
 #if defined(arch_atomic_fetch_sub_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_sub_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_sub_relaxed(i, v);
 }
 #define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
 #endif
 
 #if defined(arch_atomic_inc)
-static inline void
+static __always_inline void
 atomic_inc(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_inc(v);
 }
 #define atomic_inc atomic_inc
 #endif
 
 #if defined(arch_atomic_inc_return)
-static inline int
+static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_return(v);
 }
 #define atomic_inc_return atomic_inc_return
 #endif
 
 #if defined(arch_atomic_inc_return_acquire)
-static inline int
+static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_return_acquire(v);
 }
 #define atomic_inc_return_acquire atomic_inc_return_acquire
 #endif
 
 #if defined(arch_atomic_inc_return_release)
-static inline int
+static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_return_release(v);
 }
 #define atomic_inc_return_release atomic_inc_return_release
 #endif
 
 #if defined(arch_atomic_inc_return_relaxed)
-static inline int
+static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_return_relaxed(v);
 }
 #define atomic_inc_return_relaxed atomic_inc_return_relaxed
 #endif
 
 #if defined(arch_atomic_fetch_inc)
-static inline int
+static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_inc(v);
 }
 #define atomic_fetch_inc atomic_fetch_inc
 #endif
 
 #if defined(arch_atomic_fetch_inc_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_inc_acquire(v);
 }
 #define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
 #endif
 
 #if defined(arch_atomic_fetch_inc_release)
-static inline int
+static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_inc_release(v);
 }
 #define atomic_fetch_inc_release atomic_fetch_inc_release
 #endif
 
 #if defined(arch_atomic_fetch_inc_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_inc_relaxed(v);
 }
 #define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
 #endif
 
 #if defined(arch_atomic_dec)
-static inline void
+static __always_inline void
 atomic_dec(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_dec(v);
 }
 #define atomic_dec atomic_dec
 #endif
 
 #if defined(arch_atomic_dec_return)
-static inline int
+static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_return(v);
 }
 #define atomic_dec_return atomic_dec_return
 #endif
 
 #if defined(arch_atomic_dec_return_acquire)
-static inline int
+static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_return_acquire(v);
 }
 #define atomic_dec_return_acquire atomic_dec_return_acquire
 #endif
 
 #if defined(arch_atomic_dec_return_release)
-static inline int
+static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_return_release(v);
 }
 #define atomic_dec_return_release atomic_dec_return_release
 #endif
 
 #if defined(arch_atomic_dec_return_relaxed)
-static inline int
+static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_return_relaxed(v);
 }
 #define atomic_dec_return_relaxed atomic_dec_return_relaxed
 #endif
 
 #if defined(arch_atomic_fetch_dec)
-static inline int
+static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_dec(v);
 }
 #define atomic_fetch_dec atomic_fetch_dec
 #endif
 
 #if defined(arch_atomic_fetch_dec_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_dec_acquire(v);
 }
 #define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
 #endif
 
 #if defined(arch_atomic_fetch_dec_release)
-static inline int
+static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_dec_release(v);
 }
 #define atomic_fetch_dec_release atomic_fetch_dec_release
 #endif
 
 #if defined(arch_atomic_fetch_dec_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_dec_relaxed(v);
 }
 #define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_and(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_and(i, v);
 }
 #define atomic_and atomic_and
 
 #if !defined(arch_atomic_fetch_and_relaxed) || defined(arch_atomic_fetch_and)
-static inline int
+static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_and(i, v);
 }
 #define atomic_fetch_and atomic_fetch_and
 #endif
 
 #if defined(arch_atomic_fetch_and_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_and_acquire(i, v);
 }
 #define atomic_fetch_and_acquire atomic_fetch_and_acquire
 #endif
 
 #if defined(arch_atomic_fetch_and_release)
-static inline int
+static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_and_release(i, v);
 }
 #define atomic_fetch_and_release atomic_fetch_and_release
 #endif
 
 #if defined(arch_atomic_fetch_and_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_and_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_and_relaxed(i, v);
 }
 #define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
 #endif
 
 #if defined(arch_atomic_andnot)
-static inline void
+static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_andnot(i, v);
 }
 #define atomic_andnot atomic_andnot
 #endif
 
 #if defined(arch_atomic_fetch_andnot)
-static inline int
+static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_andnot(i, v);
 }
 #define atomic_fetch_andnot atomic_fetch_andnot
 #endif
 
 #if defined(arch_atomic_fetch_andnot_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_andnot_acquire(i, v);
 }
 #define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
 #endif
 
 #if defined(arch_atomic_fetch_andnot_release)
-static inline int
+static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_andnot_release(i, v);
 }
 #define atomic_fetch_andnot_release atomic_fetch_andnot_release
 #endif
 
 #if defined(arch_atomic_fetch_andnot_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_andnot_relaxed(i, v);
 }
 #define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_or(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_or(i, v);
 }
 #define atomic_or atomic_or
 
 #if !defined(arch_atomic_fetch_or_relaxed) || defined(arch_atomic_fetch_or)
-static inline int
+static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_or(i, v);
 }
 #define atomic_fetch_or atomic_fetch_or
 #endif
 
 #if defined(arch_atomic_fetch_or_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_or_acquire(i, v);
 }
 #define atomic_fetch_or_acquire atomic_fetch_or_acquire
 #endif
 
 #if defined(arch_atomic_fetch_or_release)
-static inline int
+static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_or_release(i, v);
 }
 #define atomic_fetch_or_release atomic_fetch_or_release
 #endif
 
 #if defined(arch_atomic_fetch_or_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_or_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_or_relaxed(i, v);
 }
 #define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_xor(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic_xor(i, v);
 }
 #define atomic_xor atomic_xor
 
 #if !defined(arch_atomic_fetch_xor_relaxed) || defined(arch_atomic_fetch_xor)
-static inline int
+static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_xor(i, v);
 }
 #define atomic_fetch_xor atomic_fetch_xor
 #endif
 
 #if defined(arch_atomic_fetch_xor_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_xor_acquire(i, v);
 }
 #define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
 #endif
 
 #if defined(arch_atomic_fetch_xor_release)
-static inline int
+static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_xor_release(i, v);
 }
 #define atomic_fetch_xor_release atomic_fetch_xor_release
 #endif
 
 #if defined(arch_atomic_fetch_xor_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_xor_relaxed(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_xor_relaxed(i, v);
 }
 #define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
 #endif
 
 #if !defined(arch_atomic_xchg_relaxed) || defined(arch_atomic_xchg)
-static inline int
+static __always_inline int
 atomic_xchg(atomic_t *v, int i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_xchg(v, i);
 }
 #define atomic_xchg atomic_xchg
 #endif
 
 #if defined(arch_atomic_xchg_acquire)
-static inline int
+static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_xchg_acquire(v, i);
 }
 #define atomic_xchg_acquire atomic_xchg_acquire
 #endif
 
 #if defined(arch_atomic_xchg_release)
-static inline int
+static __always_inline int
 atomic_xchg_release(atomic_t *v, int i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_xchg_release(v, i);
 }
 #define atomic_xchg_release atomic_xchg_release
 #endif
 
 #if defined(arch_atomic_xchg_relaxed)
-static inline int
+static __always_inline int
 atomic_xchg_relaxed(atomic_t *v, int i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_xchg_relaxed(v, i);
 }
 #define atomic_xchg_relaxed atomic_xchg_relaxed
 #endif
 
 #if !defined(arch_atomic_cmpxchg_relaxed) || defined(arch_atomic_cmpxchg)
-static inline int
+static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_cmpxchg(v, old, new);
 }
 #define atomic_cmpxchg atomic_cmpxchg
 #endif
 
 #if defined(arch_atomic_cmpxchg_acquire)
-static inline int
+static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_cmpxchg_acquire(v, old, new);
 }
 #define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
 #endif
 
 #if defined(arch_atomic_cmpxchg_release)
-static inline int
+static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_cmpxchg_release(v, old, new);
 }
 #define atomic_cmpxchg_release atomic_cmpxchg_release
 #endif
 
 #if defined(arch_atomic_cmpxchg_relaxed)
-static inline int
+static __always_inline int
 atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_cmpxchg_relaxed(v, old, new);
 }
 #define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
 #endif
 
 #if defined(arch_atomic_try_cmpxchg)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic_try_cmpxchg(v, old, new);
 }
 #define atomic_try_cmpxchg atomic_try_cmpxchg
 #endif
 
 #if defined(arch_atomic_try_cmpxchg_acquire)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic_try_cmpxchg_acquire(v, old, new);
 }
 #define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
 #endif
 
 #if defined(arch_atomic_try_cmpxchg_release)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic_try_cmpxchg_release(v, old, new);
 }
 #define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
 #endif
 
 #if defined(arch_atomic_try_cmpxchg_relaxed)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic_try_cmpxchg_relaxed(v, old, new);
 }
 #define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed
 #endif
 
 #if defined(arch_atomic_sub_and_test)
-static inline bool
+static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_sub_and_test(i, v);
 }
 #define atomic_sub_and_test atomic_sub_and_test
 #endif
 
 #if defined(arch_atomic_dec_and_test)
-static inline bool
+static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_and_test(v);
 }
 #define atomic_dec_and_test atomic_dec_and_test
 #endif
 
 #if defined(arch_atomic_inc_and_test)
-static inline bool
+static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_and_test(v);
 }
 #define atomic_inc_and_test atomic_inc_and_test
 #endif
 
 #if defined(arch_atomic_add_negative)
-static inline bool
+static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_add_negative(i, v);
 }
 #define atomic_add_negative atomic_add_negative
 #endif
 
 #if defined(arch_atomic_fetch_add_unless)
-static inline int
+static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_fetch_add_unless(v, a, u);
 }
 #define atomic_fetch_add_unless atomic_fetch_add_unless
 #endif
 
 #if defined(arch_atomic_add_unless)
-static inline bool
+static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_add_unless(v, a, u);
 }
 #define atomic_add_unless atomic_add_unless
 #endif
 
 #if defined(arch_atomic_inc_not_zero)
-static inline bool
+static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_not_zero(v);
 }
 #define atomic_inc_not_zero atomic_inc_not_zero
 #endif
 
 #if defined(arch_atomic_inc_unless_negative)
-static inline bool
+static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_inc_unless_negative(v);
 }
 #define atomic_inc_unless_negative atomic_inc_unless_negative
 #endif
 
 #if defined(arch_atomic_dec_unless_positive)
-static inline bool
+static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_unless_positive(v);
 }
 #define atomic_dec_unless_positive atomic_dec_unless_positive
 #endif
 
 #if defined(arch_atomic_dec_if_positive)
-static inline int
+static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic_dec_if_positive(v);
 }
 #define atomic_dec_if_positive atomic_dec_if_positive
 #endif
 
-static inline s64
+static __always_inline s64
 atomic64_read(const atomic64_t *v)
 {
-       kasan_check_read(v, sizeof(*v));
+       instrument_atomic_read(v, sizeof(*v));
        return arch_atomic64_read(v);
 }
 #define atomic64_read atomic64_read
 
 #if defined(arch_atomic64_read_acquire)
-static inline s64
+static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
-       kasan_check_read(v, sizeof(*v));
+       instrument_atomic_read(v, sizeof(*v));
        return arch_atomic64_read_acquire(v);
 }
 #define atomic64_read_acquire atomic64_read_acquire
 #endif
 
-static inline void
+static __always_inline void
 atomic64_set(atomic64_t *v, s64 i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_set(v, i);
 }
 #define atomic64_set atomic64_set
 
 #if defined(arch_atomic64_set_release)
-static inline void
+static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_set_release(v, i);
 }
 #define atomic64_set_release atomic64_set_release
 #endif
 
-static inline void
+static __always_inline void
 atomic64_add(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_add(i, v);
 }
 #define atomic64_add atomic64_add
 
 #if !defined(arch_atomic64_add_return_relaxed) || defined(arch_atomic64_add_return)
-static inline s64
+static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_add_return(i, v);
 }
 #define atomic64_add_return atomic64_add_return
 #endif
 
 #if defined(arch_atomic64_add_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_add_return_acquire(i, v);
 }
 #define atomic64_add_return_acquire atomic64_add_return_acquire
 #endif
 
 #if defined(arch_atomic64_add_return_release)
-static inline s64
+static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_add_return_release(i, v);
 }
 #define atomic64_add_return_release atomic64_add_return_release
 #endif
 
 #if defined(arch_atomic64_add_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_add_return_relaxed(i, v);
 }
 #define atomic64_add_return_relaxed atomic64_add_return_relaxed
 #endif
 
 #if !defined(arch_atomic64_fetch_add_relaxed) || defined(arch_atomic64_fetch_add)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_add(i, v);
 }
 #define atomic64_fetch_add atomic64_fetch_add
 #endif
 
 #if defined(arch_atomic64_fetch_add_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_add_acquire(i, v);
 }
 #define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_add_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_add_release(i, v);
 }
 #define atomic64_fetch_add_release atomic64_fetch_add_release
 #endif
 
 #if defined(arch_atomic64_fetch_add_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_add_relaxed(i, v);
 }
 #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_sub(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_sub(i, v);
 }
 #define atomic64_sub atomic64_sub
 
 #if !defined(arch_atomic64_sub_return_relaxed) || defined(arch_atomic64_sub_return)
-static inline s64
+static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_sub_return(i, v);
 }
 #define atomic64_sub_return atomic64_sub_return
 #endif
 
 #if defined(arch_atomic64_sub_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_sub_return_acquire(i, v);
 }
 #define atomic64_sub_return_acquire atomic64_sub_return_acquire
 #endif
 
 #if defined(arch_atomic64_sub_return_release)
-static inline s64
+static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_sub_return_release(i, v);
 }
 #define atomic64_sub_return_release atomic64_sub_return_release
 #endif
 
 #if defined(arch_atomic64_sub_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_sub_return_relaxed(i, v);
 }
 #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
 #endif
 
 #if !defined(arch_atomic64_fetch_sub_relaxed) || defined(arch_atomic64_fetch_sub)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_sub(i, v);
 }
 #define atomic64_fetch_sub atomic64_fetch_sub
 #endif
 
 #if defined(arch_atomic64_fetch_sub_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_sub_acquire(i, v);
 }
 #define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_sub_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_sub_release(i, v);
 }
 #define atomic64_fetch_sub_release atomic64_fetch_sub_release
 #endif
 
 #if defined(arch_atomic64_fetch_sub_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_sub_relaxed(i, v);
 }
 #define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
 #endif
 
 #if defined(arch_atomic64_inc)
-static inline void
+static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_inc(v);
 }
 #define atomic64_inc atomic64_inc
 #endif
 
 #if defined(arch_atomic64_inc_return)
-static inline s64
+static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_return(v);
 }
 #define atomic64_inc_return atomic64_inc_return
 #endif
 
 #if defined(arch_atomic64_inc_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_return_acquire(v);
 }
 #define atomic64_inc_return_acquire atomic64_inc_return_acquire
 #endif
 
 #if defined(arch_atomic64_inc_return_release)
-static inline s64
+static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_return_release(v);
 }
 #define atomic64_inc_return_release atomic64_inc_return_release
 #endif
 
 #if defined(arch_atomic64_inc_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_return_relaxed(v);
 }
 #define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
 #endif
 
 #if defined(arch_atomic64_fetch_inc)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_inc(v);
 }
 #define atomic64_fetch_inc atomic64_fetch_inc
 #endif
 
 #if defined(arch_atomic64_fetch_inc_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_inc_acquire(v);
 }
 #define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_inc_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_inc_release(v);
 }
 #define atomic64_fetch_inc_release atomic64_fetch_inc_release
 #endif
 
 #if defined(arch_atomic64_fetch_inc_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_inc_relaxed(v);
 }
 #define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
 #endif
 
 #if defined(arch_atomic64_dec)
-static inline void
+static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_dec(v);
 }
 #define atomic64_dec atomic64_dec
 #endif
 
 #if defined(arch_atomic64_dec_return)
-static inline s64
+static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_return(v);
 }
 #define atomic64_dec_return atomic64_dec_return
 #endif
 
 #if defined(arch_atomic64_dec_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_return_acquire(v);
 }
 #define atomic64_dec_return_acquire atomic64_dec_return_acquire
 #endif
 
 #if defined(arch_atomic64_dec_return_release)
-static inline s64
+static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_return_release(v);
 }
 #define atomic64_dec_return_release atomic64_dec_return_release
 #endif
 
 #if defined(arch_atomic64_dec_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_return_relaxed(v);
 }
 #define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
 #endif
 
 #if defined(arch_atomic64_fetch_dec)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_dec(v);
 }
 #define atomic64_fetch_dec atomic64_fetch_dec
 #endif
 
 #if defined(arch_atomic64_fetch_dec_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_dec_acquire(v);
 }
 #define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_dec_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_dec_release(v);
 }
 #define atomic64_fetch_dec_release atomic64_fetch_dec_release
 #endif
 
 #if defined(arch_atomic64_fetch_dec_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_dec_relaxed(v);
 }
 #define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_and(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_and(i, v);
 }
 #define atomic64_and atomic64_and
 
 #if !defined(arch_atomic64_fetch_and_relaxed) || defined(arch_atomic64_fetch_and)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_and(i, v);
 }
 #define atomic64_fetch_and atomic64_fetch_and
 #endif
 
 #if defined(arch_atomic64_fetch_and_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_and_acquire(i, v);
 }
 #define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_and_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_and_release(i, v);
 }
 #define atomic64_fetch_and_release atomic64_fetch_and_release
 #endif
 
 #if defined(arch_atomic64_fetch_and_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_and_relaxed(i, v);
 }
 #define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
 #endif
 
 #if defined(arch_atomic64_andnot)
-static inline void
+static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_andnot(i, v);
 }
 #define atomic64_andnot atomic64_andnot
 #endif
 
 #if defined(arch_atomic64_fetch_andnot)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_andnot(i, v);
 }
 #define atomic64_fetch_andnot atomic64_fetch_andnot
 #endif
 
 #if defined(arch_atomic64_fetch_andnot_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_andnot_acquire(i, v);
 }
 #define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_andnot_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_andnot_release(i, v);
 }
 #define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
 #endif
 
 #if defined(arch_atomic64_fetch_andnot_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_andnot_relaxed(i, v);
 }
 #define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_or(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_or(i, v);
 }
 #define atomic64_or atomic64_or
 
 #if !defined(arch_atomic64_fetch_or_relaxed) || defined(arch_atomic64_fetch_or)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_or(i, v);
 }
 #define atomic64_fetch_or atomic64_fetch_or
 #endif
 
 #if defined(arch_atomic64_fetch_or_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_or_acquire(i, v);
 }
 #define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_or_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_or_release(i, v);
 }
 #define atomic64_fetch_or_release atomic64_fetch_or_release
 #endif
 
 #if defined(arch_atomic64_fetch_or_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_or_relaxed(i, v);
 }
 #define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_xor(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        arch_atomic64_xor(i, v);
 }
 #define atomic64_xor atomic64_xor
 
 #if !defined(arch_atomic64_fetch_xor_relaxed) || defined(arch_atomic64_fetch_xor)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_xor(i, v);
 }
 #define atomic64_fetch_xor atomic64_fetch_xor
 #endif
 
 #if defined(arch_atomic64_fetch_xor_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_xor_acquire(i, v);
 }
 #define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
 #endif
 
 #if defined(arch_atomic64_fetch_xor_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_xor_release(i, v);
 }
 #define atomic64_fetch_xor_release atomic64_fetch_xor_release
 #endif
 
 #if defined(arch_atomic64_fetch_xor_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_xor_relaxed(i, v);
 }
 #define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
 #endif
 
 #if !defined(arch_atomic64_xchg_relaxed) || defined(arch_atomic64_xchg)
-static inline s64
+static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_xchg(v, i);
 }
 #define atomic64_xchg atomic64_xchg
 #endif
 
 #if defined(arch_atomic64_xchg_acquire)
-static inline s64
+static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_xchg_acquire(v, i);
 }
 #define atomic64_xchg_acquire atomic64_xchg_acquire
 #endif
 
 #if defined(arch_atomic64_xchg_release)
-static inline s64
+static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_xchg_release(v, i);
 }
 #define atomic64_xchg_release atomic64_xchg_release
 #endif
 
 #if defined(arch_atomic64_xchg_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_xchg_relaxed(v, i);
 }
 #define atomic64_xchg_relaxed atomic64_xchg_relaxed
 #endif
 
 #if !defined(arch_atomic64_cmpxchg_relaxed) || defined(arch_atomic64_cmpxchg)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_cmpxchg(v, old, new);
 }
 #define atomic64_cmpxchg atomic64_cmpxchg
 #endif
 
 #if defined(arch_atomic64_cmpxchg_acquire)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_cmpxchg_acquire(v, old, new);
 }
 #define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire
 #endif
 
 #if defined(arch_atomic64_cmpxchg_release)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_cmpxchg_release(v, old, new);
 }
 #define atomic64_cmpxchg_release atomic64_cmpxchg_release
 #endif
 
 #if defined(arch_atomic64_cmpxchg_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_cmpxchg_relaxed(v, old, new);
 }
 #define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic64_try_cmpxchg(v, old, new);
 }
 #define atomic64_try_cmpxchg atomic64_try_cmpxchg
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg_acquire)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic64_try_cmpxchg_acquire(v, old, new);
 }
 #define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg_release)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic64_try_cmpxchg_release(v, old, new);
 }
 #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg_relaxed)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
-       kasan_check_write(v, sizeof(*v));
-       kasan_check_write(old, sizeof(*old));
+       instrument_atomic_write(v, sizeof(*v));
+       instrument_atomic_write(old, sizeof(*old));
        return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
 }
 #define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed
 #endif
 
 #if defined(arch_atomic64_sub_and_test)
-static inline bool
+static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_sub_and_test(i, v);
 }
 #define atomic64_sub_and_test atomic64_sub_and_test
 #endif
 
 #if defined(arch_atomic64_dec_and_test)
-static inline bool
+static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_and_test(v);
 }
 #define atomic64_dec_and_test atomic64_dec_and_test
 #endif
 
 #if defined(arch_atomic64_inc_and_test)
-static inline bool
+static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_and_test(v);
 }
 #define atomic64_inc_and_test atomic64_inc_and_test
 #endif
 
 #if defined(arch_atomic64_add_negative)
-static inline bool
+static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_add_negative(i, v);
 }
 #define atomic64_add_negative atomic64_add_negative
 #endif
 
 #if defined(arch_atomic64_fetch_add_unless)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_fetch_add_unless(v, a, u);
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 #endif
 
 #if defined(arch_atomic64_add_unless)
-static inline bool
+static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_add_unless(v, a, u);
 }
 #define atomic64_add_unless atomic64_add_unless
 #endif
 
 #if defined(arch_atomic64_inc_not_zero)
-static inline bool
+static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_not_zero(v);
 }
 #define atomic64_inc_not_zero atomic64_inc_not_zero
 #endif
 
 #if defined(arch_atomic64_inc_unless_negative)
-static inline bool
+static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_inc_unless_negative(v);
 }
 #define atomic64_inc_unless_negative atomic64_inc_unless_negative
 #endif
 
 #if defined(arch_atomic64_dec_unless_positive)
-static inline bool
+static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_unless_positive(v);
 }
 #define atomic64_dec_unless_positive atomic64_dec_unless_positive
 #endif
 
 #if defined(arch_atomic64_dec_if_positive)
-static inline s64
+static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
-       kasan_check_write(v, sizeof(*v));
+       instrument_atomic_write(v, sizeof(*v));
        return arch_atomic64_dec_if_positive(v);
 }
 #define atomic64_dec_if_positive atomic64_dec_if_positive
@@ -1644,7 +1645,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define xchg(ptr, ...)                                         \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_xchg(__ai_ptr, __VA_ARGS__);                               \
 })
 #endif
@@ -1653,7 +1654,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define xchg_acquire(ptr, ...)                                         \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_xchg_acquire(__ai_ptr, __VA_ARGS__);                               \
 })
 #endif
@@ -1662,7 +1663,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define xchg_release(ptr, ...)                                         \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_xchg_release(__ai_ptr, __VA_ARGS__);                               \
 })
 #endif
@@ -1671,7 +1672,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define xchg_relaxed(ptr, ...)                                         \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_xchg_relaxed(__ai_ptr, __VA_ARGS__);                               \
 })
 #endif
@@ -1680,7 +1681,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg(ptr, ...)                                              \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg(__ai_ptr, __VA_ARGS__);                            \
 })
 #endif
@@ -1689,7 +1690,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg_acquire(ptr, ...)                                              \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__);                            \
 })
 #endif
@@ -1698,7 +1699,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg_release(ptr, ...)                                              \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg_release(__ai_ptr, __VA_ARGS__);                            \
 })
 #endif
@@ -1707,7 +1708,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg_relaxed(ptr, ...)                                              \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__);                            \
 })
 #endif
@@ -1716,7 +1717,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg64(ptr, ...)                                            \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg64(__ai_ptr, __VA_ARGS__);                          \
 })
 #endif
@@ -1725,7 +1726,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg64_acquire(ptr, ...)                                            \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__);                          \
 })
 #endif
@@ -1734,7 +1735,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg64_release(ptr, ...)                                            \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__);                          \
 })
 #endif
@@ -1743,7 +1744,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg64_relaxed(ptr, ...)                                            \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__);                          \
 })
 #endif
@@ -1751,28 +1752,28 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg_local(ptr, ...)                                                \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg_local(__ai_ptr, __VA_ARGS__);                              \
 })
 
 #define cmpxchg64_local(ptr, ...)                                              \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__);                            \
 })
 
 #define sync_cmpxchg(ptr, ...)                                         \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));         \
+       instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr));           \
        arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__);                               \
 })
 
 #define cmpxchg_double(ptr, ...)                                               \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, 2 * sizeof(*__ai_ptr));             \
+       instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr));               \
        arch_cmpxchg_double(__ai_ptr, __VA_ARGS__);                             \
 })
 
@@ -1780,9 +1781,9 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define cmpxchg_double_local(ptr, ...)                                         \
 ({                                                                     \
        typeof(ptr) __ai_ptr = (ptr);                                   \
-       kasan_check_write(__ai_ptr, 2 * sizeof(*__ai_ptr));             \
+       instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr));               \
        arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__);                               \
 })
 
 #endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// b29b625d5de9280f680e42c7be859b55b15e5f6a
+// 89bf97f3a7509b740845e51ddf31055b48a81f40
index 881c7e2..073cf40 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef _ASM_GENERIC_ATOMIC_LONG_H
 #define _ASM_GENERIC_ATOMIC_LONG_H
 
+#include <linux/compiler.h>
 #include <asm/types.h>
 
 #ifdef CONFIG_64BIT
@@ -22,493 +23,493 @@ typedef atomic_t atomic_long_t;
 
 #ifdef CONFIG_64BIT
 
-static inline long
+static __always_inline long
 atomic_long_read(const atomic_long_t *v)
 {
        return atomic64_read(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_read_acquire(const atomic_long_t *v)
 {
        return atomic64_read_acquire(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set(atomic_long_t *v, long i)
 {
        atomic64_set(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set_release(atomic_long_t *v, long i)
 {
        atomic64_set_release(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_add(long i, atomic_long_t *v)
 {
        atomic64_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return(long i, atomic_long_t *v)
 {
        return atomic64_add_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
        return atomic64_add_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_release(long i, atomic_long_t *v)
 {
        return atomic64_add_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_add_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add(long i, atomic_long_t *v)
 {
        return atomic64_fetch_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
        return atomic64_fetch_add_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
        return atomic64_fetch_add_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_fetch_add_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_sub(long i, atomic_long_t *v)
 {
        atomic64_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return(long i, atomic_long_t *v)
 {
        return atomic64_sub_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
        return atomic64_sub_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
        return atomic64_sub_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_sub_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
        return atomic64_fetch_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
        return atomic64_fetch_sub_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
        return atomic64_fetch_sub_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_fetch_sub_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_inc(atomic_long_t *v)
 {
        atomic64_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return(atomic_long_t *v)
 {
        return atomic64_inc_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_acquire(atomic_long_t *v)
 {
        return atomic64_inc_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_release(atomic_long_t *v)
 {
        return atomic64_inc_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
        return atomic64_inc_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc(atomic_long_t *v)
 {
        return atomic64_fetch_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
        return atomic64_fetch_inc_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_release(atomic_long_t *v)
 {
        return atomic64_fetch_inc_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
        return atomic64_fetch_inc_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_dec(atomic_long_t *v)
 {
        atomic64_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return(atomic_long_t *v)
 {
        return atomic64_dec_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_acquire(atomic_long_t *v)
 {
        return atomic64_dec_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_release(atomic_long_t *v)
 {
        return atomic64_dec_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
        return atomic64_dec_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec(atomic_long_t *v)
 {
        return atomic64_fetch_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
        return atomic64_fetch_dec_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_release(atomic_long_t *v)
 {
        return atomic64_fetch_dec_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
        return atomic64_fetch_dec_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_and(long i, atomic_long_t *v)
 {
        atomic64_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and(long i, atomic_long_t *v)
 {
        return atomic64_fetch_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
        return atomic64_fetch_and_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
        return atomic64_fetch_and_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_fetch_and_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_andnot(long i, atomic_long_t *v)
 {
        atomic64_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
        return atomic64_fetch_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
        return atomic64_fetch_andnot_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
        return atomic64_fetch_andnot_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_fetch_andnot_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_or(long i, atomic_long_t *v)
 {
        atomic64_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or(long i, atomic_long_t *v)
 {
        return atomic64_fetch_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
        return atomic64_fetch_or_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
        return atomic64_fetch_or_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_fetch_or_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_xor(long i, atomic_long_t *v)
 {
        atomic64_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
        return atomic64_fetch_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
        return atomic64_fetch_xor_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
        return atomic64_fetch_xor_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
        return atomic64_fetch_xor_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg(atomic_long_t *v, long i)
 {
        return atomic64_xchg(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
        return atomic64_xchg_acquire(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_release(atomic_long_t *v, long i)
 {
        return atomic64_xchg_release(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
        return atomic64_xchg_relaxed(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
        return atomic64_cmpxchg(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
        return atomic64_cmpxchg_acquire(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
        return atomic64_cmpxchg_release(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
        return atomic64_cmpxchg_relaxed(v, old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
        return atomic64_try_cmpxchg(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
        return atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
        return atomic64_try_cmpxchg_release(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
        return atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
        return atomic64_sub_and_test(i, v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_and_test(atomic_long_t *v)
 {
        return atomic64_dec_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_and_test(atomic_long_t *v)
 {
        return atomic64_inc_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_negative(long i, atomic_long_t *v)
 {
        return atomic64_add_negative(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
        return atomic64_fetch_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
        return atomic64_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_not_zero(atomic_long_t *v)
 {
        return atomic64_inc_not_zero(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_unless_negative(atomic_long_t *v)
 {
        return atomic64_inc_unless_negative(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_unless_positive(atomic_long_t *v)
 {
        return atomic64_dec_unless_positive(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
 {
        return atomic64_dec_if_positive(v);
@@ -516,493 +517,493 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 #else /* CONFIG_64BIT */
 
-static inline long
+static __always_inline long
 atomic_long_read(const atomic_long_t *v)
 {
        return atomic_read(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_read_acquire(const atomic_long_t *v)
 {
        return atomic_read_acquire(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set(atomic_long_t *v, long i)
 {
        atomic_set(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set_release(atomic_long_t *v, long i)
 {
        atomic_set_release(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_add(long i, atomic_long_t *v)
 {
        atomic_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return(long i, atomic_long_t *v)
 {
        return atomic_add_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
        return atomic_add_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_release(long i, atomic_long_t *v)
 {
        return atomic_add_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
        return atomic_add_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add(long i, atomic_long_t *v)
 {
        return atomic_fetch_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
        return atomic_fetch_add_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
        return atomic_fetch_add_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
        return atomic_fetch_add_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_sub(long i, atomic_long_t *v)
 {
        atomic_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return(long i, atomic_long_t *v)
 {
        return atomic_sub_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
        return atomic_sub_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
        return atomic_sub_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
        return atomic_sub_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
        return atomic_fetch_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
        return atomic_fetch_sub_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
        return atomic_fetch_sub_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
        return atomic_fetch_sub_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_inc(atomic_long_t *v)
 {
        atomic_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return(atomic_long_t *v)
 {
        return atomic_inc_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_acquire(atomic_long_t *v)
 {
        return atomic_inc_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_release(atomic_long_t *v)
 {
        return atomic_inc_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
        return atomic_inc_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc(atomic_long_t *v)
 {
        return atomic_fetch_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
        return atomic_fetch_inc_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_release(atomic_long_t *v)
 {
        return atomic_fetch_inc_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
        return atomic_fetch_inc_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_dec(atomic_long_t *v)
 {
        atomic_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return(atomic_long_t *v)
 {
        return atomic_dec_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_acquire(atomic_long_t *v)
 {
        return atomic_dec_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_release(atomic_long_t *v)
 {
        return atomic_dec_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
        return atomic_dec_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec(atomic_long_t *v)
 {
        return atomic_fetch_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
        return atomic_fetch_dec_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_release(atomic_long_t *v)
 {
        return atomic_fetch_dec_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
        return atomic_fetch_dec_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_and(long i, atomic_long_t *v)
 {
        atomic_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and(long i, atomic_long_t *v)
 {
        return atomic_fetch_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
        return atomic_fetch_and_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
        return atomic_fetch_and_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
        return atomic_fetch_and_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_andnot(long i, atomic_long_t *v)
 {
        atomic_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
        return atomic_fetch_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
        return atomic_fetch_andnot_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
        return atomic_fetch_andnot_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
        return atomic_fetch_andnot_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_or(long i, atomic_long_t *v)
 {
        atomic_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or(long i, atomic_long_t *v)
 {
        return atomic_fetch_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
        return atomic_fetch_or_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
        return atomic_fetch_or_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
        return atomic_fetch_or_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_xor(long i, atomic_long_t *v)
 {
        atomic_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
        return atomic_fetch_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
        return atomic_fetch_xor_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
        return atomic_fetch_xor_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
        return atomic_fetch_xor_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg(atomic_long_t *v, long i)
 {
        return atomic_xchg(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
        return atomic_xchg_acquire(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_release(atomic_long_t *v, long i)
 {
        return atomic_xchg_release(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
        return atomic_xchg_relaxed(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
        return atomic_cmpxchg(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
        return atomic_cmpxchg_acquire(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
        return atomic_cmpxchg_release(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
        return atomic_cmpxchg_relaxed(v, old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
        return atomic_try_cmpxchg(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
        return atomic_try_cmpxchg_acquire(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
        return atomic_try_cmpxchg_release(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
        return atomic_try_cmpxchg_relaxed(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
        return atomic_sub_and_test(i, v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_and_test(atomic_long_t *v)
 {
        return atomic_dec_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_and_test(atomic_long_t *v)
 {
        return atomic_inc_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_negative(long i, atomic_long_t *v)
 {
        return atomic_add_negative(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
        return atomic_fetch_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
        return atomic_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_not_zero(atomic_long_t *v)
 {
        return atomic_inc_not_zero(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_unless_negative(atomic_long_t *v)
 {
        return atomic_inc_unless_negative(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_unless_positive(atomic_long_t *v)
 {
        return atomic_dec_unless_positive(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
 {
        return atomic_dec_if_positive(v);
@@ -1010,4 +1011,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_GENERIC_ATOMIC_LONG_H */
-// 77558968132ce4f911ad53f6f52ce423006f6268
+// a624200981f552b2c6be4f32fe44da8289f30d87
index 18ce3c9..fb2cb33 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H
 #define _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H
 
-#include <linux/kasan-checks.h>
+#include <linux/instrumented.h>
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -25,7 +25,7 @@
  */
 static inline void set_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_set_bit(nr, addr);
 }
 
@@ -38,7 +38,7 @@ static inline void set_bit(long nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_clear_bit(nr, addr);
 }
 
@@ -54,7 +54,7 @@ static inline void clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline void change_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_change_bit(nr, addr);
 }
 
@@ -67,7 +67,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_set_bit(nr, addr);
 }
 
@@ -80,7 +80,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_clear_bit(nr, addr);
 }
 
@@ -93,7 +93,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_change_bit(nr, addr);
 }
 
index ec53fde..b9bec46 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H
 #define _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H
 
-#include <linux/kasan-checks.h>
+#include <linux/instrumented.h>
 
 /**
  * clear_bit_unlock - Clear a bit in memory, for unlock
@@ -22,7 +22,7 @@
  */
 static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_clear_bit_unlock(nr, addr);
 }
 
@@ -37,7 +37,7 @@ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
  */
 static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___clear_bit_unlock(nr, addr);
 }
 
@@ -52,7 +52,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
  */
 static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_set_bit_lock(nr, addr);
 }
 
@@ -71,7 +71,7 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 static inline bool
 clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_clear_bit_unlock_is_negative_byte(nr, addr);
 }
 /* Let everybody know we have it. */
index 95ff28d..20f788a 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
 #define _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
 
-#include <linux/kasan-checks.h>
+#include <linux/instrumented.h>
 
 /**
  * __set_bit - Set a bit in memory
@@ -24,7 +24,7 @@
  */
 static inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___set_bit(nr, addr);
 }
 
@@ -39,7 +39,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr)
  */
 static inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___clear_bit(nr, addr);
 }
 
@@ -54,7 +54,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___change_bit(nr, addr);
 }
 
@@ -68,7 +68,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        return arch___test_and_set_bit(nr, addr);
 }
 
@@ -82,7 +82,7 @@ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        return arch___test_and_clear_bit(nr, addr);
 }
 
@@ -96,7 +96,7 @@ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       kasan_check_write(addr + BIT_WORD(nr), sizeof(long));
+       instrument_write(addr + BIT_WORD(nr), sizeof(long));
        return arch___test_and_change_bit(nr, addr);
 }
 
@@ -107,7 +107,7 @@ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
  */
 static inline bool test_bit(long nr, const volatile unsigned long *addr)
 {
-       kasan_check_read(addr + BIT_WORD(nr), sizeof(long));
+       instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_bit(nr, addr);
 }
 
diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h
new file mode 100644 (file)
index 0000000..bcb6aa2
--- /dev/null
@@ -0,0 +1,2291 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by scripts/atomic/gen-atomic-fallback.sh
+// DO NOT MODIFY THIS FILE DIRECTLY
+
+#ifndef _LINUX_ATOMIC_FALLBACK_H
+#define _LINUX_ATOMIC_FALLBACK_H
+
+#include <linux/compiler.h>
+
+#ifndef arch_xchg_relaxed
+#define arch_xchg_relaxed              arch_xchg
+#define arch_xchg_acquire              arch_xchg
+#define arch_xchg_release              arch_xchg
+#else /* arch_xchg_relaxed */
+
+#ifndef arch_xchg_acquire
+#define arch_xchg_acquire(...) \
+       __atomic_op_acquire(arch_xchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_xchg_release
+#define arch_xchg_release(...) \
+       __atomic_op_release(arch_xchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_xchg
+#define arch_xchg(...) \
+       __atomic_op_fence(arch_xchg, __VA_ARGS__)
+#endif
+
+#endif /* arch_xchg_relaxed */
+
+#ifndef arch_cmpxchg_relaxed
+#define arch_cmpxchg_relaxed           arch_cmpxchg
+#define arch_cmpxchg_acquire           arch_cmpxchg
+#define arch_cmpxchg_release           arch_cmpxchg
+#else /* arch_cmpxchg_relaxed */
+
+#ifndef arch_cmpxchg_acquire
+#define arch_cmpxchg_acquire(...) \
+       __atomic_op_acquire(arch_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_cmpxchg_release
+#define arch_cmpxchg_release(...) \
+       __atomic_op_release(arch_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_cmpxchg
+#define arch_cmpxchg(...) \
+       __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
+#endif
+
+#endif /* arch_cmpxchg_relaxed */
+
+#ifndef arch_cmpxchg64_relaxed
+#define arch_cmpxchg64_relaxed         arch_cmpxchg64
+#define arch_cmpxchg64_acquire         arch_cmpxchg64
+#define arch_cmpxchg64_release         arch_cmpxchg64
+#else /* arch_cmpxchg64_relaxed */
+
+#ifndef arch_cmpxchg64_acquire
+#define arch_cmpxchg64_acquire(...) \
+       __atomic_op_acquire(arch_cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef arch_cmpxchg64_release
+#define arch_cmpxchg64_release(...) \
+       __atomic_op_release(arch_cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef arch_cmpxchg64
+#define arch_cmpxchg64(...) \
+       __atomic_op_fence(arch_cmpxchg64, __VA_ARGS__)
+#endif
+
+#endif /* arch_cmpxchg64_relaxed */
+
+#ifndef arch_atomic_read_acquire
+static __always_inline int
+arch_atomic_read_acquire(const atomic_t *v)
+{
+       return smp_load_acquire(&(v)->counter);
+}
+#define arch_atomic_read_acquire arch_atomic_read_acquire
+#endif
+
+#ifndef arch_atomic_set_release
+static __always_inline void
+arch_atomic_set_release(atomic_t *v, int i)
+{
+       smp_store_release(&(v)->counter, i);
+}
+#define arch_atomic_set_release arch_atomic_set_release
+#endif
+
+#ifndef arch_atomic_add_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return
+#define arch_atomic_add_return_release arch_atomic_add_return
+#define arch_atomic_add_return_relaxed arch_atomic_add_return
+#else /* arch_atomic_add_return_relaxed */
+
+#ifndef arch_atomic_add_return_acquire
+static __always_inline int
+arch_atomic_add_return_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_add_return_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#endif
+
+#ifndef arch_atomic_add_return_release
+static __always_inline int
+arch_atomic_add_return_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_add_return_relaxed(i, v);
+}
+#define arch_atomic_add_return_release arch_atomic_add_return_release
+#endif
+
+#ifndef arch_atomic_add_return
+static __always_inline int
+arch_atomic_add_return(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_add_return_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_add_return arch_atomic_add_return
+#endif
+
+#endif /* arch_atomic_add_return_relaxed */
+
+#ifndef arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add
+#else /* arch_atomic_fetch_add_relaxed */
+
+#ifndef arch_atomic_fetch_add_acquire
+static __always_inline int
+arch_atomic_fetch_add_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_fetch_add_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#endif
+
+#ifndef arch_atomic_fetch_add_release
+static __always_inline int
+arch_atomic_fetch_add_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_add_relaxed(i, v);
+}
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
+#endif
+
+#ifndef arch_atomic_fetch_add
+static __always_inline int
+arch_atomic_fetch_add(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_add_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#endif
+
+#endif /* arch_atomic_fetch_add_relaxed */
+
+#ifndef arch_atomic_sub_return_relaxed
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return
+#define arch_atomic_sub_return_release arch_atomic_sub_return
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return
+#else /* arch_atomic_sub_return_relaxed */
+
+#ifndef arch_atomic_sub_return_acquire
+static __always_inline int
+arch_atomic_sub_return_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_sub_return_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#endif
+
+#ifndef arch_atomic_sub_return_release
+static __always_inline int
+arch_atomic_sub_return_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_sub_return_relaxed(i, v);
+}
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
+#endif
+
+#ifndef arch_atomic_sub_return
+static __always_inline int
+arch_atomic_sub_return(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_sub_return_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_sub_return arch_atomic_sub_return
+#endif
+
+#endif /* arch_atomic_sub_return_relaxed */
+
+#ifndef arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub
+#else /* arch_atomic_fetch_sub_relaxed */
+
+#ifndef arch_atomic_fetch_sub_acquire
+static __always_inline int
+arch_atomic_fetch_sub_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_fetch_sub_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#endif
+
+#ifndef arch_atomic_fetch_sub_release
+static __always_inline int
+arch_atomic_fetch_sub_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_sub_relaxed(i, v);
+}
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
+#endif
+
+#ifndef arch_atomic_fetch_sub
+static __always_inline int
+arch_atomic_fetch_sub(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_sub_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#endif
+
+#endif /* arch_atomic_fetch_sub_relaxed */
+
+#ifndef arch_atomic_inc
+static __always_inline void
+arch_atomic_inc(atomic_t *v)
+{
+       arch_atomic_add(1, v);
+}
+#define arch_atomic_inc arch_atomic_inc
+#endif
+
+#ifndef arch_atomic_inc_return_relaxed
+#ifdef arch_atomic_inc_return
+#define arch_atomic_inc_return_acquire arch_atomic_inc_return
+#define arch_atomic_inc_return_release arch_atomic_inc_return
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return
+#endif /* arch_atomic_inc_return */
+
+#ifndef arch_atomic_inc_return
+static __always_inline int
+arch_atomic_inc_return(atomic_t *v)
+{
+       return arch_atomic_add_return(1, v);
+}
+#define arch_atomic_inc_return arch_atomic_inc_return
+#endif
+
+#ifndef arch_atomic_inc_return_acquire
+static __always_inline int
+arch_atomic_inc_return_acquire(atomic_t *v)
+{
+       return arch_atomic_add_return_acquire(1, v);
+}
+#define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire
+#endif
+
+#ifndef arch_atomic_inc_return_release
+static __always_inline int
+arch_atomic_inc_return_release(atomic_t *v)
+{
+       return arch_atomic_add_return_release(1, v);
+}
+#define arch_atomic_inc_return_release arch_atomic_inc_return_release
+#endif
+
+#ifndef arch_atomic_inc_return_relaxed
+static __always_inline int
+arch_atomic_inc_return_relaxed(atomic_t *v)
+{
+       return arch_atomic_add_return_relaxed(1, v);
+}
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+#endif
+
+#else /* arch_atomic_inc_return_relaxed */
+
+#ifndef arch_atomic_inc_return_acquire
+static __always_inline int
+arch_atomic_inc_return_acquire(atomic_t *v)
+{
+       int ret = arch_atomic_inc_return_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire
+#endif
+
+#ifndef arch_atomic_inc_return_release
+static __always_inline int
+arch_atomic_inc_return_release(atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_inc_return_relaxed(v);
+}
+#define arch_atomic_inc_return_release arch_atomic_inc_return_release
+#endif
+
+#ifndef arch_atomic_inc_return
+static __always_inline int
+arch_atomic_inc_return(atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_inc_return_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_inc_return arch_atomic_inc_return
+#endif
+
+#endif /* arch_atomic_inc_return_relaxed */
+
+#ifndef arch_atomic_fetch_inc_relaxed
+#ifdef arch_atomic_fetch_inc
+#define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc
+#define arch_atomic_fetch_inc_release arch_atomic_fetch_inc
+#define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc
+#endif /* arch_atomic_fetch_inc */
+
+#ifndef arch_atomic_fetch_inc
+static __always_inline int
+arch_atomic_fetch_inc(atomic_t *v)
+{
+       return arch_atomic_fetch_add(1, v);
+}
+#define arch_atomic_fetch_inc arch_atomic_fetch_inc
+#endif
+
+#ifndef arch_atomic_fetch_inc_acquire
+static __always_inline int
+arch_atomic_fetch_inc_acquire(atomic_t *v)
+{
+       return arch_atomic_fetch_add_acquire(1, v);
+}
+#define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire
+#endif
+
+#ifndef arch_atomic_fetch_inc_release
+static __always_inline int
+arch_atomic_fetch_inc_release(atomic_t *v)
+{
+       return arch_atomic_fetch_add_release(1, v);
+}
+#define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release
+#endif
+
+#ifndef arch_atomic_fetch_inc_relaxed
+static __always_inline int
+arch_atomic_fetch_inc_relaxed(atomic_t *v)
+{
+       return arch_atomic_fetch_add_relaxed(1, v);
+}
+#define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc_relaxed
+#endif
+
+#else /* arch_atomic_fetch_inc_relaxed */
+
+#ifndef arch_atomic_fetch_inc_acquire
+static __always_inline int
+arch_atomic_fetch_inc_acquire(atomic_t *v)
+{
+       int ret = arch_atomic_fetch_inc_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire
+#endif
+
+#ifndef arch_atomic_fetch_inc_release
+static __always_inline int
+arch_atomic_fetch_inc_release(atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_inc_relaxed(v);
+}
+#define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release
+#endif
+
+#ifndef arch_atomic_fetch_inc
+static __always_inline int
+arch_atomic_fetch_inc(atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_inc_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_inc arch_atomic_fetch_inc
+#endif
+
+#endif /* arch_atomic_fetch_inc_relaxed */
+
+#ifndef arch_atomic_dec
+static __always_inline void
+arch_atomic_dec(atomic_t *v)
+{
+       arch_atomic_sub(1, v);
+}
+#define arch_atomic_dec arch_atomic_dec
+#endif
+
+#ifndef arch_atomic_dec_return_relaxed
+#ifdef arch_atomic_dec_return
+#define arch_atomic_dec_return_acquire arch_atomic_dec_return
+#define arch_atomic_dec_return_release arch_atomic_dec_return
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return
+#endif /* arch_atomic_dec_return */
+
+#ifndef arch_atomic_dec_return
+static __always_inline int
+arch_atomic_dec_return(atomic_t *v)
+{
+       return arch_atomic_sub_return(1, v);
+}
+#define arch_atomic_dec_return arch_atomic_dec_return
+#endif
+
+#ifndef arch_atomic_dec_return_acquire
+static __always_inline int
+arch_atomic_dec_return_acquire(atomic_t *v)
+{
+       return arch_atomic_sub_return_acquire(1, v);
+}
+#define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire
+#endif
+
+#ifndef arch_atomic_dec_return_release
+static __always_inline int
+arch_atomic_dec_return_release(atomic_t *v)
+{
+       return arch_atomic_sub_return_release(1, v);
+}
+#define arch_atomic_dec_return_release arch_atomic_dec_return_release
+#endif
+
+#ifndef arch_atomic_dec_return_relaxed
+static __always_inline int
+arch_atomic_dec_return_relaxed(atomic_t *v)
+{
+       return arch_atomic_sub_return_relaxed(1, v);
+}
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
+#endif
+
+#else /* arch_atomic_dec_return_relaxed */
+
+#ifndef arch_atomic_dec_return_acquire
+static __always_inline int
+arch_atomic_dec_return_acquire(atomic_t *v)
+{
+       int ret = arch_atomic_dec_return_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire
+#endif
+
+#ifndef arch_atomic_dec_return_release
+static __always_inline int
+arch_atomic_dec_return_release(atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_dec_return_relaxed(v);
+}
+#define arch_atomic_dec_return_release arch_atomic_dec_return_release
+#endif
+
+#ifndef arch_atomic_dec_return
+static __always_inline int
+arch_atomic_dec_return(atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_dec_return_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_dec_return arch_atomic_dec_return
+#endif
+
+#endif /* arch_atomic_dec_return_relaxed */
+
+#ifndef arch_atomic_fetch_dec_relaxed
+#ifdef arch_atomic_fetch_dec
+#define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec
+#define arch_atomic_fetch_dec_release arch_atomic_fetch_dec
+#define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec
+#endif /* arch_atomic_fetch_dec */
+
+#ifndef arch_atomic_fetch_dec
+static __always_inline int
+arch_atomic_fetch_dec(atomic_t *v)
+{
+       return arch_atomic_fetch_sub(1, v);
+}
+#define arch_atomic_fetch_dec arch_atomic_fetch_dec
+#endif
+
+#ifndef arch_atomic_fetch_dec_acquire
+static __always_inline int
+arch_atomic_fetch_dec_acquire(atomic_t *v)
+{
+       return arch_atomic_fetch_sub_acquire(1, v);
+}
+#define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire
+#endif
+
+#ifndef arch_atomic_fetch_dec_release
+static __always_inline int
+arch_atomic_fetch_dec_release(atomic_t *v)
+{
+       return arch_atomic_fetch_sub_release(1, v);
+}
+#define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release
+#endif
+
+#ifndef arch_atomic_fetch_dec_relaxed
+static __always_inline int
+arch_atomic_fetch_dec_relaxed(atomic_t *v)
+{
+       return arch_atomic_fetch_sub_relaxed(1, v);
+}
+#define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec_relaxed
+#endif
+
+#else /* arch_atomic_fetch_dec_relaxed */
+
+#ifndef arch_atomic_fetch_dec_acquire
+static __always_inline int
+arch_atomic_fetch_dec_acquire(atomic_t *v)
+{
+       int ret = arch_atomic_fetch_dec_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire
+#endif
+
+#ifndef arch_atomic_fetch_dec_release
+static __always_inline int
+arch_atomic_fetch_dec_release(atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_dec_relaxed(v);
+}
+#define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release
+#endif
+
+#ifndef arch_atomic_fetch_dec
+static __always_inline int
+arch_atomic_fetch_dec(atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_dec_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_dec arch_atomic_fetch_dec
+#endif
+
+#endif /* arch_atomic_fetch_dec_relaxed */
+
+#ifndef arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and
+#else /* arch_atomic_fetch_and_relaxed */
+
+#ifndef arch_atomic_fetch_and_acquire
+static __always_inline int
+arch_atomic_fetch_and_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_fetch_and_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#endif
+
+#ifndef arch_atomic_fetch_and_release
+static __always_inline int
+arch_atomic_fetch_and_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_and_relaxed(i, v);
+}
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
+#endif
+
+#ifndef arch_atomic_fetch_and
+static __always_inline int
+arch_atomic_fetch_and(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_and_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#endif
+
+#endif /* arch_atomic_fetch_and_relaxed */
+
+#ifndef arch_atomic_andnot
+static __always_inline void
+arch_atomic_andnot(int i, atomic_t *v)
+{
+       arch_atomic_and(~i, v);
+}
+#define arch_atomic_andnot arch_atomic_andnot
+#endif
+
+#ifndef arch_atomic_fetch_andnot_relaxed
+#ifdef arch_atomic_fetch_andnot
+#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot
+#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot
+#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot
+#endif /* arch_atomic_fetch_andnot */
+
+#ifndef arch_atomic_fetch_andnot
+static __always_inline int
+arch_atomic_fetch_andnot(int i, atomic_t *v)
+{
+       return arch_atomic_fetch_and(~i, v);
+}
+#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#endif
+
+#ifndef arch_atomic_fetch_andnot_acquire
+static __always_inline int
+arch_atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+       return arch_atomic_fetch_and_acquire(~i, v);
+}
+#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
+#endif
+
+#ifndef arch_atomic_fetch_andnot_release
+static __always_inline int
+arch_atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+       return arch_atomic_fetch_and_release(~i, v);
+}
+#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
+#endif
+
+#ifndef arch_atomic_fetch_andnot_relaxed
+static __always_inline int
+arch_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
+{
+       return arch_atomic_fetch_and_relaxed(~i, v);
+}
+#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
+#endif
+
+#else /* arch_atomic_fetch_andnot_relaxed */
+
+#ifndef arch_atomic_fetch_andnot_acquire
+static __always_inline int
+arch_atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_fetch_andnot_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
+#endif
+
+#ifndef arch_atomic_fetch_andnot_release
+static __always_inline int
+arch_atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_andnot_relaxed(i, v);
+}
+#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
+#endif
+
+#ifndef arch_atomic_fetch_andnot
+static __always_inline int
+arch_atomic_fetch_andnot(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_andnot_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#endif
+
+#endif /* arch_atomic_fetch_andnot_relaxed */
+
+#ifndef arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or
+#else /* arch_atomic_fetch_or_relaxed */
+
+#ifndef arch_atomic_fetch_or_acquire
+static __always_inline int
+arch_atomic_fetch_or_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_fetch_or_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#endif
+
+#ifndef arch_atomic_fetch_or_release
+static __always_inline int
+arch_atomic_fetch_or_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_or_relaxed(i, v);
+}
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
+#endif
+
+#ifndef arch_atomic_fetch_or
+static __always_inline int
+arch_atomic_fetch_or(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_or_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#endif
+
+#endif /* arch_atomic_fetch_or_relaxed */
+
+#ifndef arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor
+#else /* arch_atomic_fetch_xor_relaxed */
+
+#ifndef arch_atomic_fetch_xor_acquire
+static __always_inline int
+arch_atomic_fetch_xor_acquire(int i, atomic_t *v)
+{
+       int ret = arch_atomic_fetch_xor_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#endif
+
+#ifndef arch_atomic_fetch_xor_release
+static __always_inline int
+arch_atomic_fetch_xor_release(int i, atomic_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic_fetch_xor_relaxed(i, v);
+}
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
+#endif
+
+#ifndef arch_atomic_fetch_xor
+static __always_inline int
+arch_atomic_fetch_xor(int i, atomic_t *v)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_fetch_xor_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+#endif
+
+#endif /* arch_atomic_fetch_xor_relaxed */
+
+#ifndef arch_atomic_xchg_relaxed
+#define arch_atomic_xchg_acquire arch_atomic_xchg
+#define arch_atomic_xchg_release arch_atomic_xchg
+#define arch_atomic_xchg_relaxed arch_atomic_xchg
+#else /* arch_atomic_xchg_relaxed */
+
+#ifndef arch_atomic_xchg_acquire
+static __always_inline int
+arch_atomic_xchg_acquire(atomic_t *v, int i)
+{
+       int ret = arch_atomic_xchg_relaxed(v, i);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
+#endif
+
+#ifndef arch_atomic_xchg_release
+static __always_inline int
+arch_atomic_xchg_release(atomic_t *v, int i)
+{
+       __atomic_release_fence();
+       return arch_atomic_xchg_relaxed(v, i);
+}
+#define arch_atomic_xchg_release arch_atomic_xchg_release
+#endif
+
+#ifndef arch_atomic_xchg
+static __always_inline int
+arch_atomic_xchg(atomic_t *v, int i)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_xchg_relaxed(v, i);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_xchg arch_atomic_xchg
+#endif
+
+#endif /* arch_atomic_xchg_relaxed */
+
+#ifndef arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg
+#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg
+#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg
+#else /* arch_atomic_cmpxchg_relaxed */
+
+#ifndef arch_atomic_cmpxchg_acquire
+static __always_inline int
+arch_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+{
+       int ret = arch_atomic_cmpxchg_relaxed(v, old, new);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic_cmpxchg_release
+static __always_inline int
+arch_atomic_cmpxchg_release(atomic_t *v, int old, int new)
+{
+       __atomic_release_fence();
+       return arch_atomic_cmpxchg_relaxed(v, old, new);
+}
+#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
+#endif
+
+#ifndef arch_atomic_cmpxchg
+static __always_inline int
+arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+       int ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_cmpxchg_relaxed(v, old, new);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+#endif
+
+#endif /* arch_atomic_cmpxchg_relaxed */
+
+#ifndef arch_atomic_try_cmpxchg_relaxed
+#ifdef arch_atomic_try_cmpxchg
+#define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg
+#define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg
+#define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg
+#endif /* arch_atomic_try_cmpxchg */
+
+#ifndef arch_atomic_try_cmpxchg
+static __always_inline bool
+arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+       int r, o = *old;
+       r = arch_atomic_cmpxchg(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
+#endif
+
+#ifndef arch_atomic_try_cmpxchg_acquire
+static __always_inline bool
+arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+       int r, o = *old;
+       r = arch_atomic_cmpxchg_acquire(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic_try_cmpxchg_release
+static __always_inline bool
+arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+       int r, o = *old;
+       r = arch_atomic_cmpxchg_release(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release
+#endif
+
+#ifndef arch_atomic_try_cmpxchg_relaxed
+static __always_inline bool
+arch_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
+{
+       int r, o = *old;
+       r = arch_atomic_cmpxchg_relaxed(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg_relaxed
+#endif
+
+#else /* arch_atomic_try_cmpxchg_relaxed */
+
+#ifndef arch_atomic_try_cmpxchg_acquire
+static __always_inline bool
+arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+       bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic_try_cmpxchg_release
+static __always_inline bool
+arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+       __atomic_release_fence();
+       return arch_atomic_try_cmpxchg_relaxed(v, old, new);
+}
+#define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release
+#endif
+
+#ifndef arch_atomic_try_cmpxchg
+static __always_inline bool
+arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+       bool ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
+#endif
+
+#endif /* arch_atomic_try_cmpxchg_relaxed */
+
+#ifndef arch_atomic_sub_and_test
+/**
+ * arch_atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __always_inline bool
+arch_atomic_sub_and_test(int i, atomic_t *v)
+{
+       return arch_atomic_sub_return(i, v) == 0;
+}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
+#endif
+
+#ifndef arch_atomic_dec_and_test
+/**
+ * arch_atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static __always_inline bool
+arch_atomic_dec_and_test(atomic_t *v)
+{
+       return arch_atomic_dec_return(v) == 0;
+}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
+#endif
+
+#ifndef arch_atomic_inc_and_test
+/**
+ * arch_atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static __always_inline bool
+arch_atomic_inc_and_test(atomic_t *v)
+{
+       return arch_atomic_inc_return(v) == 0;
+}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
+#endif
+
+#ifndef arch_atomic_add_negative
+/**
+ * arch_atomic_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic_add_negative(int i, atomic_t *v)
+{
+       return arch_atomic_add_return(i, v) < 0;
+}
+#define arch_atomic_add_negative arch_atomic_add_negative
+#endif
+
+#ifndef arch_atomic_fetch_add_unless
+/**
+ * arch_atomic_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns original value of @v
+ */
+static __always_inline int
+arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+       int c = arch_atomic_read(v);
+
+       do {
+               if (unlikely(c == u))
+                       break;
+       } while (!arch_atomic_try_cmpxchg(v, &c, c + a));
+
+       return c;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+#endif
+
+#ifndef arch_atomic_add_unless
+/**
+ * arch_atomic_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
+ */
+static __always_inline bool
+arch_atomic_add_unless(atomic_t *v, int a, int u)
+{
+       return arch_atomic_fetch_add_unless(v, a, u) != u;
+}
+#define arch_atomic_add_unless arch_atomic_add_unless
+#endif
+
+#ifndef arch_atomic_inc_not_zero
+/**
+ * arch_atomic_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
+ */
+static __always_inline bool
+arch_atomic_inc_not_zero(atomic_t *v)
+{
+       return arch_atomic_add_unless(v, 1, 0);
+}
+#define arch_atomic_inc_not_zero arch_atomic_inc_not_zero
+#endif
+
+#ifndef arch_atomic_inc_unless_negative
+static __always_inline bool
+arch_atomic_inc_unless_negative(atomic_t *v)
+{
+       int c = arch_atomic_read(v);
+
+       do {
+               if (unlikely(c < 0))
+                       return false;
+       } while (!arch_atomic_try_cmpxchg(v, &c, c + 1));
+
+       return true;
+}
+#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+#endif
+
+#ifndef arch_atomic_dec_unless_positive
+static __always_inline bool
+arch_atomic_dec_unless_positive(atomic_t *v)
+{
+       int c = arch_atomic_read(v);
+
+       do {
+               if (unlikely(c > 0))
+                       return false;
+       } while (!arch_atomic_try_cmpxchg(v, &c, c - 1));
+
+       return true;
+}
+#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+#endif
+
+#ifndef arch_atomic_dec_if_positive
+static __always_inline int
+arch_atomic_dec_if_positive(atomic_t *v)
+{
+       int dec, c = arch_atomic_read(v);
+
+       do {
+               dec = c - 1;
+               if (unlikely(dec < 0))
+                       break;
+       } while (!arch_atomic_try_cmpxchg(v, &c, dec));
+
+       return dec;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+#endif
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
+
+#ifndef arch_atomic64_read_acquire
+static __always_inline s64
+arch_atomic64_read_acquire(const atomic64_t *v)
+{
+       return smp_load_acquire(&(v)->counter);
+}
+#define arch_atomic64_read_acquire arch_atomic64_read_acquire
+#endif
+
+#ifndef arch_atomic64_set_release
+static __always_inline void
+arch_atomic64_set_release(atomic64_t *v, s64 i)
+{
+       smp_store_release(&(v)->counter, i);
+}
+#define arch_atomic64_set_release arch_atomic64_set_release
+#endif
+
+#ifndef arch_atomic64_add_return_relaxed
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return
+#define arch_atomic64_add_return_release arch_atomic64_add_return
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return
+#else /* arch_atomic64_add_return_relaxed */
+
+#ifndef arch_atomic64_add_return_acquire
+static __always_inline s64
+arch_atomic64_add_return_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_add_return_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#endif
+
+#ifndef arch_atomic64_add_return_release
+static __always_inline s64
+arch_atomic64_add_return_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_add_return_relaxed(i, v);
+}
+#define arch_atomic64_add_return_release arch_atomic64_add_return_release
+#endif
+
+#ifndef arch_atomic64_add_return
+static __always_inline s64
+arch_atomic64_add_return(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_add_return_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_add_return arch_atomic64_add_return
+#endif
+
+#endif /* arch_atomic64_add_return_relaxed */
+
+#ifndef arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add
+#else /* arch_atomic64_fetch_add_relaxed */
+
+#ifndef arch_atomic64_fetch_add_acquire
+static __always_inline s64
+arch_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_add_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_add_release
+static __always_inline s64
+arch_atomic64_fetch_add_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_add_relaxed(i, v);
+}
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
+#endif
+
+#ifndef arch_atomic64_fetch_add
+static __always_inline s64
+arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_add_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+#endif
+
+#endif /* arch_atomic64_fetch_add_relaxed */
+
+#ifndef arch_atomic64_sub_return_relaxed
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return
+#else /* arch_atomic64_sub_return_relaxed */
+
+#ifndef arch_atomic64_sub_return_acquire
+static __always_inline s64
+arch_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_sub_return_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#endif
+
+#ifndef arch_atomic64_sub_return_release
+static __always_inline s64
+arch_atomic64_sub_return_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_sub_return_relaxed(i, v);
+}
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
+#endif
+
+#ifndef arch_atomic64_sub_return
+static __always_inline s64
+arch_atomic64_sub_return(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_sub_return_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_sub_return arch_atomic64_sub_return
+#endif
+
+#endif /* arch_atomic64_sub_return_relaxed */
+
+#ifndef arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub
+#else /* arch_atomic64_fetch_sub_relaxed */
+
+#ifndef arch_atomic64_fetch_sub_acquire
+static __always_inline s64
+arch_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_sub_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_sub_release
+static __always_inline s64
+arch_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_sub_relaxed(i, v);
+}
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
+#endif
+
+#ifndef arch_atomic64_fetch_sub
+static __always_inline s64
+arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_sub_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#endif
+
+#endif /* arch_atomic64_fetch_sub_relaxed */
+
+#ifndef arch_atomic64_inc
+static __always_inline void
+arch_atomic64_inc(atomic64_t *v)
+{
+       arch_atomic64_add(1, v);
+}
+#define arch_atomic64_inc arch_atomic64_inc
+#endif
+
+#ifndef arch_atomic64_inc_return_relaxed
+#ifdef arch_atomic64_inc_return
+#define arch_atomic64_inc_return_acquire arch_atomic64_inc_return
+#define arch_atomic64_inc_return_release arch_atomic64_inc_return
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return
+#endif /* arch_atomic64_inc_return */
+
+#ifndef arch_atomic64_inc_return
+static __always_inline s64
+arch_atomic64_inc_return(atomic64_t *v)
+{
+       return arch_atomic64_add_return(1, v);
+}
+#define arch_atomic64_inc_return arch_atomic64_inc_return
+#endif
+
+#ifndef arch_atomic64_inc_return_acquire
+static __always_inline s64
+arch_atomic64_inc_return_acquire(atomic64_t *v)
+{
+       return arch_atomic64_add_return_acquire(1, v);
+}
+#define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire
+#endif
+
+#ifndef arch_atomic64_inc_return_release
+static __always_inline s64
+arch_atomic64_inc_return_release(atomic64_t *v)
+{
+       return arch_atomic64_add_return_release(1, v);
+}
+#define arch_atomic64_inc_return_release arch_atomic64_inc_return_release
+#endif
+
+#ifndef arch_atomic64_inc_return_relaxed
+static __always_inline s64
+arch_atomic64_inc_return_relaxed(atomic64_t *v)
+{
+       return arch_atomic64_add_return_relaxed(1, v);
+}
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#endif
+
+#else /* arch_atomic64_inc_return_relaxed */
+
+#ifndef arch_atomic64_inc_return_acquire
+static __always_inline s64
+arch_atomic64_inc_return_acquire(atomic64_t *v)
+{
+       s64 ret = arch_atomic64_inc_return_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire
+#endif
+
+#ifndef arch_atomic64_inc_return_release
+static __always_inline s64
+arch_atomic64_inc_return_release(atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_inc_return_relaxed(v);
+}
+#define arch_atomic64_inc_return_release arch_atomic64_inc_return_release
+#endif
+
+#ifndef arch_atomic64_inc_return
+static __always_inline s64
+arch_atomic64_inc_return(atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_inc_return_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_inc_return arch_atomic64_inc_return
+#endif
+
+#endif /* arch_atomic64_inc_return_relaxed */
+
+#ifndef arch_atomic64_fetch_inc_relaxed
+#ifdef arch_atomic64_fetch_inc
+#define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc
+#define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc
+#define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc
+#endif /* arch_atomic64_fetch_inc */
+
+#ifndef arch_atomic64_fetch_inc
+static __always_inline s64
+arch_atomic64_fetch_inc(atomic64_t *v)
+{
+       return arch_atomic64_fetch_add(1, v);
+}
+#define arch_atomic64_fetch_inc arch_atomic64_fetch_inc
+#endif
+
+#ifndef arch_atomic64_fetch_inc_acquire
+static __always_inline s64
+arch_atomic64_fetch_inc_acquire(atomic64_t *v)
+{
+       return arch_atomic64_fetch_add_acquire(1, v);
+}
+#define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_inc_release
+static __always_inline s64
+arch_atomic64_fetch_inc_release(atomic64_t *v)
+{
+       return arch_atomic64_fetch_add_release(1, v);
+}
+#define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release
+#endif
+
+#ifndef arch_atomic64_fetch_inc_relaxed
+static __always_inline s64
+arch_atomic64_fetch_inc_relaxed(atomic64_t *v)
+{
+       return arch_atomic64_fetch_add_relaxed(1, v);
+}
+#define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc_relaxed
+#endif
+
+#else /* arch_atomic64_fetch_inc_relaxed */
+
+#ifndef arch_atomic64_fetch_inc_acquire
+static __always_inline s64
+arch_atomic64_fetch_inc_acquire(atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_inc_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_inc_release
+static __always_inline s64
+arch_atomic64_fetch_inc_release(atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_inc_relaxed(v);
+}
+#define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release
+#endif
+
+#ifndef arch_atomic64_fetch_inc
+static __always_inline s64
+arch_atomic64_fetch_inc(atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_inc_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_inc arch_atomic64_fetch_inc
+#endif
+
+#endif /* arch_atomic64_fetch_inc_relaxed */
+
+#ifndef arch_atomic64_dec
+static __always_inline void
+arch_atomic64_dec(atomic64_t *v)
+{
+       arch_atomic64_sub(1, v);
+}
+#define arch_atomic64_dec arch_atomic64_dec
+#endif
+
+#ifndef arch_atomic64_dec_return_relaxed
+#ifdef arch_atomic64_dec_return
+#define arch_atomic64_dec_return_acquire arch_atomic64_dec_return
+#define arch_atomic64_dec_return_release arch_atomic64_dec_return
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return
+#endif /* arch_atomic64_dec_return */
+
+#ifndef arch_atomic64_dec_return
+static __always_inline s64
+arch_atomic64_dec_return(atomic64_t *v)
+{
+       return arch_atomic64_sub_return(1, v);
+}
+#define arch_atomic64_dec_return arch_atomic64_dec_return
+#endif
+
+#ifndef arch_atomic64_dec_return_acquire
+static __always_inline s64
+arch_atomic64_dec_return_acquire(atomic64_t *v)
+{
+       return arch_atomic64_sub_return_acquire(1, v);
+}
+#define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire
+#endif
+
+#ifndef arch_atomic64_dec_return_release
+static __always_inline s64
+arch_atomic64_dec_return_release(atomic64_t *v)
+{
+       return arch_atomic64_sub_return_release(1, v);
+}
+#define arch_atomic64_dec_return_release arch_atomic64_dec_return_release
+#endif
+
+#ifndef arch_atomic64_dec_return_relaxed
+static __always_inline s64
+arch_atomic64_dec_return_relaxed(atomic64_t *v)
+{
+       return arch_atomic64_sub_return_relaxed(1, v);
+}
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
+#endif
+
+#else /* arch_atomic64_dec_return_relaxed */
+
+#ifndef arch_atomic64_dec_return_acquire
+static __always_inline s64
+arch_atomic64_dec_return_acquire(atomic64_t *v)
+{
+       s64 ret = arch_atomic64_dec_return_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire
+#endif
+
+#ifndef arch_atomic64_dec_return_release
+static __always_inline s64
+arch_atomic64_dec_return_release(atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_dec_return_relaxed(v);
+}
+#define arch_atomic64_dec_return_release arch_atomic64_dec_return_release
+#endif
+
+#ifndef arch_atomic64_dec_return
+static __always_inline s64
+arch_atomic64_dec_return(atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_dec_return_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_dec_return arch_atomic64_dec_return
+#endif
+
+#endif /* arch_atomic64_dec_return_relaxed */
+
+#ifndef arch_atomic64_fetch_dec_relaxed
+#ifdef arch_atomic64_fetch_dec
+#define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec
+#define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec
+#define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec
+#endif /* arch_atomic64_fetch_dec */
+
+#ifndef arch_atomic64_fetch_dec
+static __always_inline s64
+arch_atomic64_fetch_dec(atomic64_t *v)
+{
+       return arch_atomic64_fetch_sub(1, v);
+}
+#define arch_atomic64_fetch_dec arch_atomic64_fetch_dec
+#endif
+
+#ifndef arch_atomic64_fetch_dec_acquire
+static __always_inline s64
+arch_atomic64_fetch_dec_acquire(atomic64_t *v)
+{
+       return arch_atomic64_fetch_sub_acquire(1, v);
+}
+#define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_dec_release
+static __always_inline s64
+arch_atomic64_fetch_dec_release(atomic64_t *v)
+{
+       return arch_atomic64_fetch_sub_release(1, v);
+}
+#define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release
+#endif
+
+#ifndef arch_atomic64_fetch_dec_relaxed
+static __always_inline s64
+arch_atomic64_fetch_dec_relaxed(atomic64_t *v)
+{
+       return arch_atomic64_fetch_sub_relaxed(1, v);
+}
+#define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec_relaxed
+#endif
+
+#else /* arch_atomic64_fetch_dec_relaxed */
+
+#ifndef arch_atomic64_fetch_dec_acquire
+static __always_inline s64
+arch_atomic64_fetch_dec_acquire(atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_dec_relaxed(v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_dec_release
+static __always_inline s64
+arch_atomic64_fetch_dec_release(atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_dec_relaxed(v);
+}
+#define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release
+#endif
+
+#ifndef arch_atomic64_fetch_dec
+static __always_inline s64
+arch_atomic64_fetch_dec(atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_dec_relaxed(v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_dec arch_atomic64_fetch_dec
+#endif
+
+#endif /* arch_atomic64_fetch_dec_relaxed */
+
+#ifndef arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and
+#else /* arch_atomic64_fetch_and_relaxed */
+
+#ifndef arch_atomic64_fetch_and_acquire
+static __always_inline s64
+arch_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_and_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_and_release
+static __always_inline s64
+arch_atomic64_fetch_and_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_and_relaxed(i, v);
+}
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
+#endif
+
+#ifndef arch_atomic64_fetch_and
+static __always_inline s64
+arch_atomic64_fetch_and(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_and_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+#endif
+
+#endif /* arch_atomic64_fetch_and_relaxed */
+
+#ifndef arch_atomic64_andnot
+static __always_inline void
+arch_atomic64_andnot(s64 i, atomic64_t *v)
+{
+       arch_atomic64_and(~i, v);
+}
+#define arch_atomic64_andnot arch_atomic64_andnot
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_relaxed
+#ifdef arch_atomic64_fetch_andnot
+#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot
+#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot
+#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot
+#endif /* arch_atomic64_fetch_andnot */
+
+#ifndef arch_atomic64_fetch_andnot
+static __always_inline s64
+arch_atomic64_fetch_andnot(s64 i, atomic64_t *v)
+{
+       return arch_atomic64_fetch_and(~i, v);
+}
+#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_acquire
+static __always_inline s64
+arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+{
+       return arch_atomic64_fetch_and_acquire(~i, v);
+}
+#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_release
+static __always_inline s64
+arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+{
+       return arch_atomic64_fetch_and_release(~i, v);
+}
+#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_relaxed
+static __always_inline s64
+arch_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
+{
+       return arch_atomic64_fetch_and_relaxed(~i, v);
+}
+#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
+#endif
+
+#else /* arch_atomic64_fetch_andnot_relaxed */
+
+#ifndef arch_atomic64_fetch_andnot_acquire
+static __always_inline s64
+arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_release
+static __always_inline s64
+arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_andnot_relaxed(i, v);
+}
+#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
+#endif
+
+#ifndef arch_atomic64_fetch_andnot
+static __always_inline s64
+arch_atomic64_fetch_andnot(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_andnot_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
+#endif
+
+#endif /* arch_atomic64_fetch_andnot_relaxed */
+
+#ifndef arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or
+#else /* arch_atomic64_fetch_or_relaxed */
+
+#ifndef arch_atomic64_fetch_or_acquire
+static __always_inline s64
+arch_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_or_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_or_release
+static __always_inline s64
+arch_atomic64_fetch_or_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_or_relaxed(i, v);
+}
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
+#endif
+
+#ifndef arch_atomic64_fetch_or
+static __always_inline s64
+arch_atomic64_fetch_or(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_or_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+#endif
+
+#endif /* arch_atomic64_fetch_or_relaxed */
+
+#ifndef arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor
+#else /* arch_atomic64_fetch_xor_relaxed */
+
+#ifndef arch_atomic64_fetch_xor_acquire
+static __always_inline s64
+arch_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
+{
+       s64 ret = arch_atomic64_fetch_xor_relaxed(i, v);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_xor_release
+static __always_inline s64
+arch_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
+{
+       __atomic_release_fence();
+       return arch_atomic64_fetch_xor_relaxed(i, v);
+}
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
+#endif
+
+#ifndef arch_atomic64_fetch_xor
+static __always_inline s64
+arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_fetch_xor_relaxed(i, v);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
+#endif
+
+#endif /* arch_atomic64_fetch_xor_relaxed */
+
+#ifndef arch_atomic64_xchg_relaxed
+#define arch_atomic64_xchg_acquire arch_atomic64_xchg
+#define arch_atomic64_xchg_release arch_atomic64_xchg
+#define arch_atomic64_xchg_relaxed arch_atomic64_xchg
+#else /* arch_atomic64_xchg_relaxed */
+
+#ifndef arch_atomic64_xchg_acquire
+static __always_inline s64
+arch_atomic64_xchg_acquire(atomic64_t *v, s64 i)
+{
+       s64 ret = arch_atomic64_xchg_relaxed(v, i);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_xchg_acquire arch_atomic64_xchg_acquire
+#endif
+
+#ifndef arch_atomic64_xchg_release
+static __always_inline s64
+arch_atomic64_xchg_release(atomic64_t *v, s64 i)
+{
+       __atomic_release_fence();
+       return arch_atomic64_xchg_relaxed(v, i);
+}
+#define arch_atomic64_xchg_release arch_atomic64_xchg_release
+#endif
+
+#ifndef arch_atomic64_xchg
+static __always_inline s64
+arch_atomic64_xchg(atomic64_t *v, s64 i)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_xchg_relaxed(v, i);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_xchg arch_atomic64_xchg
+#endif
+
+#endif /* arch_atomic64_xchg_relaxed */
+
+#ifndef arch_atomic64_cmpxchg_relaxed
+#define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg
+#define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg
+#define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg
+#else /* arch_atomic64_cmpxchg_relaxed */
+
+#ifndef arch_atomic64_cmpxchg_acquire
+static __always_inline s64
+arch_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+{
+       s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic64_cmpxchg_release
+static __always_inline s64
+arch_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
+{
+       __atomic_release_fence();
+       return arch_atomic64_cmpxchg_relaxed(v, old, new);
+}
+#define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release
+#endif
+
+#ifndef arch_atomic64_cmpxchg
+static __always_inline s64
+arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+       s64 ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+#endif
+
+#endif /* arch_atomic64_cmpxchg_relaxed */
+
+#ifndef arch_atomic64_try_cmpxchg_relaxed
+#ifdef arch_atomic64_try_cmpxchg
+#define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg
+#define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg
+#define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg
+#endif /* arch_atomic64_try_cmpxchg */
+
+#ifndef arch_atomic64_try_cmpxchg
+static __always_inline bool
+arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+       s64 r, o = *old;
+       r = arch_atomic64_cmpxchg(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+#endif
+
+#ifndef arch_atomic64_try_cmpxchg_acquire
+static __always_inline bool
+arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+{
+       s64 r, o = *old;
+       r = arch_atomic64_cmpxchg_acquire(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic64_try_cmpxchg_release
+static __always_inline bool
+arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+{
+       s64 r, o = *old;
+       r = arch_atomic64_cmpxchg_release(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release
+#endif
+
+#ifndef arch_atomic64_try_cmpxchg_relaxed
+static __always_inline bool
+arch_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
+{
+       s64 r, o = *old;
+       r = arch_atomic64_cmpxchg_relaxed(v, o, new);
+       if (unlikely(r != o))
+               *old = r;
+       return likely(r == o);
+}
+#define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg_relaxed
+#endif
+
+#else /* arch_atomic64_try_cmpxchg_relaxed */
+
+#ifndef arch_atomic64_try_cmpxchg_acquire
+static __always_inline bool
+arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+{
+       bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+       __atomic_acquire_fence();
+       return ret;
+}
+#define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic64_try_cmpxchg_release
+static __always_inline bool
+arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+{
+       __atomic_release_fence();
+       return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+}
+#define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release
+#endif
+
+#ifndef arch_atomic64_try_cmpxchg
+static __always_inline bool
+arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+       bool ret;
+       __atomic_pre_full_fence();
+       ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+       __atomic_post_full_fence();
+       return ret;
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+#endif
+
+#endif /* arch_atomic64_try_cmpxchg_relaxed */
+
+#ifndef arch_atomic64_sub_and_test
+/**
+ * arch_atomic64_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __always_inline bool
+arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
+{
+       return arch_atomic64_sub_return(i, v) == 0;
+}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
+#endif
+
+#ifndef arch_atomic64_dec_and_test
+/**
+ * arch_atomic64_dec_and_test - decrement and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static __always_inline bool
+arch_atomic64_dec_and_test(atomic64_t *v)
+{
+       return arch_atomic64_dec_return(v) == 0;
+}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
+#endif
+
+#ifndef arch_atomic64_inc_and_test
+/**
+ * arch_atomic64_inc_and_test - increment and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static __always_inline bool
+arch_atomic64_inc_and_test(atomic64_t *v)
+{
+       return arch_atomic64_inc_return(v) == 0;
+}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
+#endif
+
+#ifndef arch_atomic64_add_negative
+/**
+ * arch_atomic64_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static __always_inline bool
+arch_atomic64_add_negative(s64 i, atomic64_t *v)
+{
+       return arch_atomic64_add_return(i, v) < 0;
+}
+#define arch_atomic64_add_negative arch_atomic64_add_negative
+#endif
+
+#ifndef arch_atomic64_fetch_add_unless
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns original value of @v
+ */
+static __always_inline s64
+arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+       s64 c = arch_atomic64_read(v);
+
+       do {
+               if (unlikely(c == u))
+                       break;
+       } while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
+
+       return c;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+#endif
+
+#ifndef arch_atomic64_add_unless
+/**
+ * arch_atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
+ */
+static __always_inline bool
+arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+       return arch_atomic64_fetch_add_unless(v, a, u) != u;
+}
+#define arch_atomic64_add_unless arch_atomic64_add_unless
+#endif
+
+#ifndef arch_atomic64_inc_not_zero
+/**
+ * arch_atomic64_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
+ */
+static __always_inline bool
+arch_atomic64_inc_not_zero(atomic64_t *v)
+{
+       return arch_atomic64_add_unless(v, 1, 0);
+}
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
+#endif
+
+#ifndef arch_atomic64_inc_unless_negative
+static __always_inline bool
+arch_atomic64_inc_unless_negative(atomic64_t *v)
+{
+       s64 c = arch_atomic64_read(v);
+
+       do {
+               if (unlikely(c < 0))
+                       return false;
+       } while (!arch_atomic64_try_cmpxchg(v, &c, c + 1));
+
+       return true;
+}
+#define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative
+#endif
+
+#ifndef arch_atomic64_dec_unless_positive
+static __always_inline bool
+arch_atomic64_dec_unless_positive(atomic64_t *v)
+{
+       s64 c = arch_atomic64_read(v);
+
+       do {
+               if (unlikely(c > 0))
+                       return false;
+       } while (!arch_atomic64_try_cmpxchg(v, &c, c - 1));
+
+       return true;
+}
+#define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive
+#endif
+
+#ifndef arch_atomic64_dec_if_positive
+static __always_inline s64
+arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+       s64 dec, c = arch_atomic64_read(v);
+
+       do {
+               dec = c - 1;
+               if (unlikely(dec < 0))
+                       break;
+       } while (!arch_atomic64_try_cmpxchg(v, &c, dec));
+
+       return dec;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+#endif
+
+#endif /* _LINUX_ATOMIC_FALLBACK_H */
+// 90cd26cfd69d2250303d654955a0cc12620fb91b
index a7d240e..2c4927b 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef _LINUX_ATOMIC_FALLBACK_H
 #define _LINUX_ATOMIC_FALLBACK_H
 
+#include <linux/compiler.h>
+
 #ifndef xchg_relaxed
 #define xchg_relaxed           xchg
 #define xchg_acquire           xchg
@@ -76,7 +78,7 @@
 #endif /* cmpxchg64_relaxed */
 
 #ifndef atomic_read_acquire
-static inline int
+static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
        return smp_load_acquire(&(v)->counter);
@@ -85,7 +87,7 @@ atomic_read_acquire(const atomic_t *v)
 #endif
 
 #ifndef atomic_set_release
-static inline void
+static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
        smp_store_release(&(v)->counter, i);
@@ -100,7 +102,7 @@ atomic_set_release(atomic_t *v, int i)
 #else /* atomic_add_return_relaxed */
 
 #ifndef atomic_add_return_acquire
-static inline int
+static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
        int ret = atomic_add_return_relaxed(i, v);
@@ -111,7 +113,7 @@ atomic_add_return_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_add_return_release
-static inline int
+static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -121,7 +123,7 @@ atomic_add_return_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_add_return
-static inline int
+static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
        int ret;
@@ -142,7 +144,7 @@ atomic_add_return(int i, atomic_t *v)
 #else /* atomic_fetch_add_relaxed */
 
 #ifndef atomic_fetch_add_acquire
-static inline int
+static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
        int ret = atomic_fetch_add_relaxed(i, v);
@@ -153,7 +155,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_add_release
-static inline int
+static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -163,7 +165,7 @@ atomic_fetch_add_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_add
-static inline int
+static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
        int ret;
@@ -184,7 +186,7 @@ atomic_fetch_add(int i, atomic_t *v)
 #else /* atomic_sub_return_relaxed */
 
 #ifndef atomic_sub_return_acquire
-static inline int
+static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
        int ret = atomic_sub_return_relaxed(i, v);
@@ -195,7 +197,7 @@ atomic_sub_return_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_sub_return_release
-static inline int
+static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -205,7 +207,7 @@ atomic_sub_return_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_sub_return
-static inline int
+static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
        int ret;
@@ -226,7 +228,7 @@ atomic_sub_return(int i, atomic_t *v)
 #else /* atomic_fetch_sub_relaxed */
 
 #ifndef atomic_fetch_sub_acquire
-static inline int
+static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
        int ret = atomic_fetch_sub_relaxed(i, v);
@@ -237,7 +239,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_sub_release
-static inline int
+static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -247,7 +249,7 @@ atomic_fetch_sub_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_sub
-static inline int
+static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
        int ret;
@@ -262,7 +264,7 @@ atomic_fetch_sub(int i, atomic_t *v)
 #endif /* atomic_fetch_sub_relaxed */
 
 #ifndef atomic_inc
-static inline void
+static __always_inline void
 atomic_inc(atomic_t *v)
 {
        atomic_add(1, v);
@@ -278,7 +280,7 @@ atomic_inc(atomic_t *v)
 #endif /* atomic_inc_return */
 
 #ifndef atomic_inc_return
-static inline int
+static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
        return atomic_add_return(1, v);
@@ -287,7 +289,7 @@ atomic_inc_return(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_acquire
-static inline int
+static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
        return atomic_add_return_acquire(1, v);
@@ -296,7 +298,7 @@ atomic_inc_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_release
-static inline int
+static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
        return atomic_add_return_release(1, v);
@@ -305,7 +307,7 @@ atomic_inc_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_relaxed
-static inline int
+static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
        return atomic_add_return_relaxed(1, v);
@@ -316,7 +318,7 @@ atomic_inc_return_relaxed(atomic_t *v)
 #else /* atomic_inc_return_relaxed */
 
 #ifndef atomic_inc_return_acquire
-static inline int
+static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
        int ret = atomic_inc_return_relaxed(v);
@@ -327,7 +329,7 @@ atomic_inc_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_release
-static inline int
+static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
        __atomic_release_fence();
@@ -337,7 +339,7 @@ atomic_inc_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return
-static inline int
+static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
        int ret;
@@ -359,7 +361,7 @@ atomic_inc_return(atomic_t *v)
 #endif /* atomic_fetch_inc */
 
 #ifndef atomic_fetch_inc
-static inline int
+static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
        return atomic_fetch_add(1, v);
@@ -368,7 +370,7 @@ atomic_fetch_inc(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_acquire
-static inline int
+static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
        return atomic_fetch_add_acquire(1, v);
@@ -377,7 +379,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_release
-static inline int
+static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
        return atomic_fetch_add_release(1, v);
@@ -386,7 +388,7 @@ atomic_fetch_inc_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_relaxed
-static inline int
+static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
        return atomic_fetch_add_relaxed(1, v);
@@ -397,7 +399,7 @@ atomic_fetch_inc_relaxed(atomic_t *v)
 #else /* atomic_fetch_inc_relaxed */
 
 #ifndef atomic_fetch_inc_acquire
-static inline int
+static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
        int ret = atomic_fetch_inc_relaxed(v);
@@ -408,7 +410,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_release
-static inline int
+static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
        __atomic_release_fence();
@@ -418,7 +420,7 @@ atomic_fetch_inc_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc
-static inline int
+static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
        int ret;
@@ -433,7 +435,7 @@ atomic_fetch_inc(atomic_t *v)
 #endif /* atomic_fetch_inc_relaxed */
 
 #ifndef atomic_dec
-static inline void
+static __always_inline void
 atomic_dec(atomic_t *v)
 {
        atomic_sub(1, v);
@@ -449,7 +451,7 @@ atomic_dec(atomic_t *v)
 #endif /* atomic_dec_return */
 
 #ifndef atomic_dec_return
-static inline int
+static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
        return atomic_sub_return(1, v);
@@ -458,7 +460,7 @@ atomic_dec_return(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_acquire
-static inline int
+static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
        return atomic_sub_return_acquire(1, v);
@@ -467,7 +469,7 @@ atomic_dec_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_release
-static inline int
+static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
        return atomic_sub_return_release(1, v);
@@ -476,7 +478,7 @@ atomic_dec_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_relaxed
-static inline int
+static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
        return atomic_sub_return_relaxed(1, v);
@@ -487,7 +489,7 @@ atomic_dec_return_relaxed(atomic_t *v)
 #else /* atomic_dec_return_relaxed */
 
 #ifndef atomic_dec_return_acquire
-static inline int
+static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
        int ret = atomic_dec_return_relaxed(v);
@@ -498,7 +500,7 @@ atomic_dec_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_release
-static inline int
+static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
        __atomic_release_fence();
@@ -508,7 +510,7 @@ atomic_dec_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return
-static inline int
+static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
        int ret;
@@ -530,7 +532,7 @@ atomic_dec_return(atomic_t *v)
 #endif /* atomic_fetch_dec */
 
 #ifndef atomic_fetch_dec
-static inline int
+static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
        return atomic_fetch_sub(1, v);
@@ -539,7 +541,7 @@ atomic_fetch_dec(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_acquire
-static inline int
+static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
        return atomic_fetch_sub_acquire(1, v);
@@ -548,7 +550,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_release
-static inline int
+static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
        return atomic_fetch_sub_release(1, v);
@@ -557,7 +559,7 @@ atomic_fetch_dec_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_relaxed
-static inline int
+static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
        return atomic_fetch_sub_relaxed(1, v);
@@ -568,7 +570,7 @@ atomic_fetch_dec_relaxed(atomic_t *v)
 #else /* atomic_fetch_dec_relaxed */
 
 #ifndef atomic_fetch_dec_acquire
-static inline int
+static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
        int ret = atomic_fetch_dec_relaxed(v);
@@ -579,7 +581,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_release
-static inline int
+static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
        __atomic_release_fence();
@@ -589,7 +591,7 @@ atomic_fetch_dec_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec
-static inline int
+static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
        int ret;
@@ -610,7 +612,7 @@ atomic_fetch_dec(atomic_t *v)
 #else /* atomic_fetch_and_relaxed */
 
 #ifndef atomic_fetch_and_acquire
-static inline int
+static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
        int ret = atomic_fetch_and_relaxed(i, v);
@@ -621,7 +623,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_and_release
-static inline int
+static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -631,7 +633,7 @@ atomic_fetch_and_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_and
-static inline int
+static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
        int ret;
@@ -646,7 +648,7 @@ atomic_fetch_and(int i, atomic_t *v)
 #endif /* atomic_fetch_and_relaxed */
 
 #ifndef atomic_andnot
-static inline void
+static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
        atomic_and(~i, v);
@@ -662,7 +664,7 @@ atomic_andnot(int i, atomic_t *v)
 #endif /* atomic_fetch_andnot */
 
 #ifndef atomic_fetch_andnot
-static inline int
+static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
        return atomic_fetch_and(~i, v);
@@ -671,7 +673,7 @@ atomic_fetch_andnot(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_acquire
-static inline int
+static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
        return atomic_fetch_and_acquire(~i, v);
@@ -680,7 +682,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_release
-static inline int
+static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
        return atomic_fetch_and_release(~i, v);
@@ -689,7 +691,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_relaxed
-static inline int
+static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
        return atomic_fetch_and_relaxed(~i, v);
@@ -700,7 +702,7 @@ atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 #else /* atomic_fetch_andnot_relaxed */
 
 #ifndef atomic_fetch_andnot_acquire
-static inline int
+static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
        int ret = atomic_fetch_andnot_relaxed(i, v);
@@ -711,7 +713,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_release
-static inline int
+static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -721,7 +723,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot
-static inline int
+static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
        int ret;
@@ -742,7 +744,7 @@ atomic_fetch_andnot(int i, atomic_t *v)
 #else /* atomic_fetch_or_relaxed */
 
 #ifndef atomic_fetch_or_acquire
-static inline int
+static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
        int ret = atomic_fetch_or_relaxed(i, v);
@@ -753,7 +755,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_or_release
-static inline int
+static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -763,7 +765,7 @@ atomic_fetch_or_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_or
-static inline int
+static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
        int ret;
@@ -784,7 +786,7 @@ atomic_fetch_or(int i, atomic_t *v)
 #else /* atomic_fetch_xor_relaxed */
 
 #ifndef atomic_fetch_xor_acquire
-static inline int
+static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
        int ret = atomic_fetch_xor_relaxed(i, v);
@@ -795,7 +797,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_xor_release
-static inline int
+static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
        __atomic_release_fence();
@@ -805,7 +807,7 @@ atomic_fetch_xor_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_xor
-static inline int
+static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
        int ret;
@@ -826,7 +828,7 @@ atomic_fetch_xor(int i, atomic_t *v)
 #else /* atomic_xchg_relaxed */
 
 #ifndef atomic_xchg_acquire
-static inline int
+static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int i)
 {
        int ret = atomic_xchg_relaxed(v, i);
@@ -837,7 +839,7 @@ atomic_xchg_acquire(atomic_t *v, int i)
 #endif
 
 #ifndef atomic_xchg_release
-static inline int
+static __always_inline int
 atomic_xchg_release(atomic_t *v, int i)
 {
        __atomic_release_fence();
@@ -847,7 +849,7 @@ atomic_xchg_release(atomic_t *v, int i)
 #endif
 
 #ifndef atomic_xchg
-static inline int
+static __always_inline int
 atomic_xchg(atomic_t *v, int i)
 {
        int ret;
@@ -868,7 +870,7 @@ atomic_xchg(atomic_t *v, int i)
 #else /* atomic_cmpxchg_relaxed */
 
 #ifndef atomic_cmpxchg_acquire
-static inline int
+static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
        int ret = atomic_cmpxchg_relaxed(v, old, new);
@@ -879,7 +881,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 #endif
 
 #ifndef atomic_cmpxchg_release
-static inline int
+static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
        __atomic_release_fence();
@@ -889,7 +891,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
 #endif
 
 #ifndef atomic_cmpxchg
-static inline int
+static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
        int ret;
@@ -911,7 +913,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
 #endif /* atomic_try_cmpxchg */
 
 #ifndef atomic_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
        int r, o = *old;
@@ -924,7 +926,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
        int r, o = *old;
@@ -937,7 +939,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
        int r, o = *old;
@@ -950,7 +952,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_relaxed
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
        int r, o = *old;
@@ -965,7 +967,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 #else /* atomic_try_cmpxchg_relaxed */
 
 #ifndef atomic_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
        bool ret = atomic_try_cmpxchg_relaxed(v, old, new);
@@ -976,7 +978,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
        __atomic_release_fence();
@@ -986,7 +988,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
        bool ret;
@@ -1010,7 +1012,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
        return atomic_sub_return(i, v) == 0;
@@ -1027,7 +1029,7 @@ atomic_sub_and_test(int i, atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline bool
+static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
        return atomic_dec_return(v) == 0;
@@ -1044,7 +1046,7 @@ atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
        return atomic_inc_return(v) == 0;
@@ -1062,7 +1064,7 @@ atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool
+static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
        return atomic_add_return(i, v) < 0;
@@ -1080,7 +1082,7 @@ atomic_add_negative(int i, atomic_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns original value of @v
  */
-static inline int
+static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
        int c = atomic_read(v);
@@ -1105,7 +1107,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-static inline bool
+static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
        return atomic_fetch_add_unless(v, a, u) != u;
@@ -1121,7 +1123,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
  * Atomically increments @v by 1, if @v is non-zero.
  * Returns true if the increment was done.
  */
-static inline bool
+static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
        return atomic_add_unless(v, 1, 0);
@@ -1130,7 +1132,7 @@ atomic_inc_not_zero(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_unless_negative
-static inline bool
+static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
        int c = atomic_read(v);
@@ -1146,7 +1148,7 @@ atomic_inc_unless_negative(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_unless_positive
-static inline bool
+static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
        int c = atomic_read(v);
@@ -1162,7 +1164,7 @@ atomic_dec_unless_positive(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_if_positive
-static inline int
+static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
        int dec, c = atomic_read(v);
@@ -1178,15 +1180,12 @@ atomic_dec_if_positive(atomic_t *v)
 #define atomic_dec_if_positive atomic_dec_if_positive
 #endif
 
-#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
-#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
 
 #ifndef atomic64_read_acquire
-static inline s64
+static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
        return smp_load_acquire(&(v)->counter);
@@ -1195,7 +1194,7 @@ atomic64_read_acquire(const atomic64_t *v)
 #endif
 
 #ifndef atomic64_set_release
-static inline void
+static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
        smp_store_release(&(v)->counter, i);
@@ -1210,7 +1209,7 @@ atomic64_set_release(atomic64_t *v, s64 i)
 #else /* atomic64_add_return_relaxed */
 
 #ifndef atomic64_add_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_add_return_relaxed(i, v);
@@ -1221,7 +1220,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_add_return_release
-static inline s64
+static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1231,7 +1230,7 @@ atomic64_add_return_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_add_return
-static inline s64
+static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1252,7 +1251,7 @@ atomic64_add_return(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_add_relaxed */
 
 #ifndef atomic64_fetch_add_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_fetch_add_relaxed(i, v);
@@ -1263,7 +1262,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_add_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1273,7 +1272,7 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_add
-static inline s64
+static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1294,7 +1293,7 @@ atomic64_fetch_add(s64 i, atomic64_t *v)
 #else /* atomic64_sub_return_relaxed */
 
 #ifndef atomic64_sub_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_sub_return_relaxed(i, v);
@@ -1305,7 +1304,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_sub_return_release
-static inline s64
+static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1315,7 +1314,7 @@ atomic64_sub_return_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_sub_return
-static inline s64
+static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1336,7 +1335,7 @@ atomic64_sub_return(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_sub_relaxed */
 
 #ifndef atomic64_fetch_sub_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_fetch_sub_relaxed(i, v);
@@ -1347,7 +1346,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_sub_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1357,7 +1356,7 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_sub
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1372,7 +1371,7 @@ atomic64_fetch_sub(s64 i, atomic64_t *v)
 #endif /* atomic64_fetch_sub_relaxed */
 
 #ifndef atomic64_inc
-static inline void
+static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
        atomic64_add(1, v);
@@ -1388,7 +1387,7 @@ atomic64_inc(atomic64_t *v)
 #endif /* atomic64_inc_return */
 
 #ifndef atomic64_inc_return
-static inline s64
+static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
        return atomic64_add_return(1, v);
@@ -1397,7 +1396,7 @@ atomic64_inc_return(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
        return atomic64_add_return_acquire(1, v);
@@ -1406,7 +1405,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_release
-static inline s64
+static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
        return atomic64_add_return_release(1, v);
@@ -1415,7 +1414,7 @@ atomic64_inc_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_relaxed
-static inline s64
+static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
        return atomic64_add_return_relaxed(1, v);
@@ -1426,7 +1425,7 @@ atomic64_inc_return_relaxed(atomic64_t *v)
 #else /* atomic64_inc_return_relaxed */
 
 #ifndef atomic64_inc_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
        s64 ret = atomic64_inc_return_relaxed(v);
@@ -1437,7 +1436,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_release
-static inline s64
+static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1447,7 +1446,7 @@ atomic64_inc_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return
-static inline s64
+static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
        s64 ret;
@@ -1469,7 +1468,7 @@ atomic64_inc_return(atomic64_t *v)
 #endif /* atomic64_fetch_inc */
 
 #ifndef atomic64_fetch_inc
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
        return atomic64_fetch_add(1, v);
@@ -1478,7 +1477,7 @@ atomic64_fetch_inc(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
        return atomic64_fetch_add_acquire(1, v);
@@ -1487,7 +1486,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
        return atomic64_fetch_add_release(1, v);
@@ -1496,7 +1495,7 @@ atomic64_fetch_inc_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_relaxed
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
        return atomic64_fetch_add_relaxed(1, v);
@@ -1507,7 +1506,7 @@ atomic64_fetch_inc_relaxed(atomic64_t *v)
 #else /* atomic64_fetch_inc_relaxed */
 
 #ifndef atomic64_fetch_inc_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
        s64 ret = atomic64_fetch_inc_relaxed(v);
@@ -1518,7 +1517,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1528,7 +1527,7 @@ atomic64_fetch_inc_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
        s64 ret;
@@ -1543,7 +1542,7 @@ atomic64_fetch_inc(atomic64_t *v)
 #endif /* atomic64_fetch_inc_relaxed */
 
 #ifndef atomic64_dec
-static inline void
+static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
        atomic64_sub(1, v);
@@ -1559,7 +1558,7 @@ atomic64_dec(atomic64_t *v)
 #endif /* atomic64_dec_return */
 
 #ifndef atomic64_dec_return
-static inline s64
+static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
        return atomic64_sub_return(1, v);
@@ -1568,7 +1567,7 @@ atomic64_dec_return(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
        return atomic64_sub_return_acquire(1, v);
@@ -1577,7 +1576,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_release
-static inline s64
+static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
        return atomic64_sub_return_release(1, v);
@@ -1586,7 +1585,7 @@ atomic64_dec_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_relaxed
-static inline s64
+static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
        return atomic64_sub_return_relaxed(1, v);
@@ -1597,7 +1596,7 @@ atomic64_dec_return_relaxed(atomic64_t *v)
 #else /* atomic64_dec_return_relaxed */
 
 #ifndef atomic64_dec_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
        s64 ret = atomic64_dec_return_relaxed(v);
@@ -1608,7 +1607,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_release
-static inline s64
+static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1618,7 +1617,7 @@ atomic64_dec_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return
-static inline s64
+static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
        s64 ret;
@@ -1640,7 +1639,7 @@ atomic64_dec_return(atomic64_t *v)
 #endif /* atomic64_fetch_dec */
 
 #ifndef atomic64_fetch_dec
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
        return atomic64_fetch_sub(1, v);
@@ -1649,7 +1648,7 @@ atomic64_fetch_dec(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
        return atomic64_fetch_sub_acquire(1, v);
@@ -1658,7 +1657,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
        return atomic64_fetch_sub_release(1, v);
@@ -1667,7 +1666,7 @@ atomic64_fetch_dec_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_relaxed
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
        return atomic64_fetch_sub_relaxed(1, v);
@@ -1678,7 +1677,7 @@ atomic64_fetch_dec_relaxed(atomic64_t *v)
 #else /* atomic64_fetch_dec_relaxed */
 
 #ifndef atomic64_fetch_dec_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
        s64 ret = atomic64_fetch_dec_relaxed(v);
@@ -1689,7 +1688,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1699,7 +1698,7 @@ atomic64_fetch_dec_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
        s64 ret;
@@ -1720,7 +1719,7 @@ atomic64_fetch_dec(atomic64_t *v)
 #else /* atomic64_fetch_and_relaxed */
 
 #ifndef atomic64_fetch_and_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_fetch_and_relaxed(i, v);
@@ -1731,7 +1730,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_and_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1741,7 +1740,7 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_and
-static inline s64
+static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1756,7 +1755,7 @@ atomic64_fetch_and(s64 i, atomic64_t *v)
 #endif /* atomic64_fetch_and_relaxed */
 
 #ifndef atomic64_andnot
-static inline void
+static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
        atomic64_and(~i, v);
@@ -1772,7 +1771,7 @@ atomic64_andnot(s64 i, atomic64_t *v)
 #endif /* atomic64_fetch_andnot */
 
 #ifndef atomic64_fetch_andnot
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
        return atomic64_fetch_and(~i, v);
@@ -1781,7 +1780,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
        return atomic64_fetch_and_acquire(~i, v);
@@ -1790,7 +1789,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
        return atomic64_fetch_and_release(~i, v);
@@ -1799,7 +1798,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_relaxed
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
        return atomic64_fetch_and_relaxed(~i, v);
@@ -1810,7 +1809,7 @@ atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_andnot_relaxed */
 
 #ifndef atomic64_fetch_andnot_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_fetch_andnot_relaxed(i, v);
@@ -1821,7 +1820,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1831,7 +1830,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1852,7 +1851,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_or_relaxed */
 
 #ifndef atomic64_fetch_or_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_fetch_or_relaxed(i, v);
@@ -1863,7 +1862,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_or_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1873,7 +1872,7 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_or
-static inline s64
+static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1894,7 +1893,7 @@ atomic64_fetch_or(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_xor_relaxed */
 
 #ifndef atomic64_fetch_xor_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
        s64 ret = atomic64_fetch_xor_relaxed(i, v);
@@ -1905,7 +1904,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_xor_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
        __atomic_release_fence();
@@ -1915,7 +1914,7 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_xor
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
        s64 ret;
@@ -1936,7 +1935,7 @@ atomic64_fetch_xor(s64 i, atomic64_t *v)
 #else /* atomic64_xchg_relaxed */
 
 #ifndef atomic64_xchg_acquire
-static inline s64
+static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
        s64 ret = atomic64_xchg_relaxed(v, i);
@@ -1947,7 +1946,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i)
 #endif
 
 #ifndef atomic64_xchg_release
-static inline s64
+static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 i)
 {
        __atomic_release_fence();
@@ -1957,7 +1956,7 @@ atomic64_xchg_release(atomic64_t *v, s64 i)
 #endif
 
 #ifndef atomic64_xchg
-static inline s64
+static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 i)
 {
        s64 ret;
@@ -1978,7 +1977,7 @@ atomic64_xchg(atomic64_t *v, s64 i)
 #else /* atomic64_cmpxchg_relaxed */
 
 #ifndef atomic64_cmpxchg_acquire
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
        s64 ret = atomic64_cmpxchg_relaxed(v, old, new);
@@ -1989,7 +1988,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #ifndef atomic64_cmpxchg_release
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
        __atomic_release_fence();
@@ -1999,7 +1998,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #ifndef atomic64_cmpxchg
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
        s64 ret;
@@ -2021,7 +2020,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 #endif /* atomic64_try_cmpxchg */
 
 #ifndef atomic64_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
        s64 r, o = *old;
@@ -2034,7 +2033,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
        s64 r, o = *old;
@@ -2047,7 +2046,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
        s64 r, o = *old;
@@ -2060,7 +2059,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_relaxed
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
        s64 r, o = *old;
@@ -2075,7 +2074,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 #else /* atomic64_try_cmpxchg_relaxed */
 
 #ifndef atomic64_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
        bool ret = atomic64_try_cmpxchg_relaxed(v, old, new);
@@ -2086,7 +2085,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
        __atomic_release_fence();
@@ -2096,7 +2095,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
        bool ret;
@@ -2120,7 +2119,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
        return atomic64_sub_return(i, v) == 0;
@@ -2137,7 +2136,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline bool
+static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
        return atomic64_dec_return(v) == 0;
@@ -2154,7 +2153,7 @@ atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
        return atomic64_inc_return(v) == 0;
@@ -2172,7 +2171,7 @@ atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool
+static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
        return atomic64_add_return(i, v) < 0;
@@ -2190,7 +2189,7 @@ atomic64_add_negative(s64 i, atomic64_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns original value of @v
  */
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
        s64 c = atomic64_read(v);
@@ -2215,7 +2214,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-static inline bool
+static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
        return atomic64_fetch_add_unless(v, a, u) != u;
@@ -2231,7 +2230,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
  * Atomically increments @v by 1, if @v is non-zero.
  * Returns true if the increment was done.
  */
-static inline bool
+static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
        return atomic64_add_unless(v, 1, 0);
@@ -2240,7 +2239,7 @@ atomic64_inc_not_zero(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_unless_negative
-static inline bool
+static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
        s64 c = atomic64_read(v);
@@ -2256,7 +2255,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_unless_positive
-static inline bool
+static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
        s64 c = atomic64_read(v);
@@ -2272,7 +2271,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_if_positive
-static inline s64
+static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
        s64 dec, c = atomic64_read(v);
@@ -2288,8 +2287,5 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_dec_if_positive atomic64_dec_if_positive
 #endif
 
-#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
-#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
-
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 25de4a2804d70f57e994fe3b419148658bb5378a
+// 1fac0941c79bf0ae100723cc2ac9b94061f0b67a
index 4c0d009..571a110 100644 (file)
  * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions.
  */
 
+#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
+
+#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
+
 /*
  * The idea here is to build acquire/release variants by adding explicit
  * barriers on top of the relaxed variant. In the case where the relaxed
        __ret;                                                          \
 })
 
+#ifdef ARCH_ATOMIC
+#include <linux/atomic-arch-fallback.h>
+#include <asm-generic/atomic-instrumented.h>
+#else
 #include <linux/atomic-fallback.h>
+#endif
 
 #include <asm-generic/atomic-long.h>
 
index 790c0c6..ee37256 100644 (file)
@@ -16,7 +16,7 @@
 #define KASAN_ABI_VERSION 5
 
 #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
-/* emulate gcc's __SANITIZE_ADDRESS__ flag */
+/* Emulate GCC's __SANITIZE_ADDRESS__ flag */
 #define __SANITIZE_ADDRESS__
 #define __no_sanitize_address \
                __attribute__((no_sanitize("address", "hwaddress")))
 #define __no_sanitize_address
 #endif
 
+#if __has_feature(thread_sanitizer)
+/* emulate gcc's __SANITIZE_THREAD__ flag */
+#define __SANITIZE_THREAD__
+#define __no_sanitize_thread \
+               __attribute__((no_sanitize("thread")))
+#else
+#define __no_sanitize_thread
+#endif
+
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
index e2f7252..7dd4e03 100644 (file)
 #define __no_sanitize_address
 #endif
 
+#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
+#define __no_sanitize_thread __attribute__((no_sanitize_thread))
+#else
+#define __no_sanitize_thread
+#endif
+
 #if GCC_VERSION >= 50100
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
index 33d3a2e..30827f8 100644 (file)
@@ -250,6 +250,27 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  */
 #include <asm/barrier.h>
 #include <linux/kasan-checks.h>
+#include <linux/kcsan-checks.h>
+
+/**
+ * data_race - mark an expression as containing intentional data races
+ *
+ * This data_race() macro is useful for situations in which data races
+ * should be forgiven.  One example is diagnostic code that accesses
+ * shared variables but is not a part of the core synchronization design.
+ *
+ * This macro *does not* affect normal code generation, but is a hint
+ * to tooling that data races here are to be ignored.
+ */
+#define data_race(expr)                                                        \
+({                                                                     \
+       __unqual_scalar_typeof(({ expr; })) __v = ({                    \
+               __kcsan_disable_current();                              \
+               expr;                                                   \
+       });                                                             \
+       __kcsan_enable_current();                                       \
+       __v;                                                            \
+})
 
 /*
  * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
@@ -271,30 +292,18 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
        __READ_ONCE_SCALAR(x);                                          \
 })
 
-#define __WRITE_ONCE(x, val)                           \
-do {                                                   \
-       *(volatile typeof(x) *)&(x) = (val);            \
+#define __WRITE_ONCE(x, val)                                           \
+do {                                                                   \
+       *(volatile typeof(x) *)&(x) = (val);                            \
 } while (0)
 
-#define WRITE_ONCE(x, val)                             \
-do {                                                   \
-       compiletime_assert_rwonce_type(x);              \
-       __WRITE_ONCE(x, val);                           \
+#define WRITE_ONCE(x, val)                                             \
+do {                                                                   \
+       compiletime_assert_rwonce_type(x);                              \
+       __WRITE_ONCE(x, val);                                           \
 } while (0)
 
-#ifdef CONFIG_KASAN
-/*
- * We can't declare function 'inline' because __no_sanitize_address conflicts
- * with inlining. Attempt to inline it may cause a build failure.
- *     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
- */
-# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
-#else
-# define __no_kasan_or_inline __always_inline
-#endif
-
-static __no_kasan_or_inline
+static __no_sanitize_or_inline
 unsigned long __read_once_word_nocheck(const void *addr)
 {
        return __READ_ONCE(*(unsigned long *)addr);
@@ -302,8 +311,8 @@ unsigned long __read_once_word_nocheck(const void *addr)
 
 /*
  * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
- * word from memory atomically but without telling KASAN. This is usually
- * used by unwinding code when walking the stack of a running process.
+ * word from memory atomically but without telling KASAN/KCSAN. This is
+ * usually used by unwinding code when walking the stack of a running process.
  */
 #define READ_ONCE_NOCHECK(x)                                           \
 ({                                                                     \
index d4e1956..21aed09 100644 (file)
@@ -171,6 +171,38 @@ struct ftrace_likely_data {
  */
 #define noinline_for_stack noinline
 
+/*
+ * Sanitizer helper attributes: Because using __always_inline and
+ * __no_sanitize_* conflict, provide helper attributes that will either expand
+ * to __no_sanitize_* in compilation units where instrumentation is enabled
+ * (__SANITIZE_*__), or __always_inline in compilation units without
+ * instrumentation (__SANITIZE_*__ undefined).
+ */
+#ifdef __SANITIZE_ADDRESS__
+/*
+ * We can't declare function 'inline' because __no_sanitize_address conflicts
+ * with inlining. Attempt to inline it may cause a build failure.
+ *     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
+ * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
+ */
+# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
+# define __no_sanitize_or_inline __no_kasan_or_inline
+#else
+# define __no_kasan_or_inline __always_inline
+#endif
+
+#define __no_kcsan __no_sanitize_thread
+#ifdef __SANITIZE_THREAD__
+# define __no_kcsan_or_inline __no_kcsan notrace __maybe_unused
+# define __no_sanitize_or_inline __no_kcsan_or_inline
+#else
+# define __no_kcsan_or_inline __always_inline
+#endif
+
+#ifndef __no_sanitize_or_inline
+#define __no_sanitize_or_inline __always_inline
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
index 8377afe..191772d 100644 (file)
@@ -102,6 +102,7 @@ enum cpuhp_state {
        CPUHP_AP_IRQ_ARMADA_XP_STARTING,
        CPUHP_AP_IRQ_BCM2836_STARTING,
        CPUHP_AP_IRQ_MIPS_GIC_STARTING,
+       CPUHP_AP_IRQ_RISCV_STARTING,
        CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
        CPUHP_AP_ARM_MVEBU_COHERENCY,
        CPUHP_AP_MICROCODE_LOADER,
diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
new file mode 100644 (file)
index 0000000..43e6ea5
--- /dev/null
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This header provides generic wrappers for memory access instrumentation that
+ * the compiler cannot emit for: KASAN, KCSAN.
+ */
+#ifndef _LINUX_INSTRUMENTED_H
+#define _LINUX_INSTRUMENTED_H
+
+#include <linux/compiler.h>
+#include <linux/kasan-checks.h>
+#include <linux/kcsan-checks.h>
+#include <linux/types.h>
+
+/**
+ * instrument_read - instrument regular read access
+ *
+ * Instrument a regular read access. The instrumentation should be inserted
+ * before the actual read happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_read(const volatile void *v, size_t size)
+{
+       kasan_check_read(v, size);
+       kcsan_check_read(v, size);
+}
+
+/**
+ * instrument_write - instrument regular write access
+ *
+ * Instrument a regular write access. The instrumentation should be inserted
+ * before the actual write happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_write(const volatile void *v, size_t size)
+{
+       kasan_check_write(v, size);
+       kcsan_check_write(v, size);
+}
+
+/**
+ * instrument_atomic_read - instrument atomic read access
+ *
+ * Instrument an atomic read access. The instrumentation should be inserted
+ * before the actual read happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_atomic_read(const volatile void *v, size_t size)
+{
+       kasan_check_read(v, size);
+       kcsan_check_atomic_read(v, size);
+}
+
+/**
+ * instrument_atomic_write - instrument atomic write access
+ *
+ * Instrument an atomic write access. The instrumentation should be inserted
+ * before the actual write happens.
+ *
+ * @ptr address of access
+ * @size size of access
+ */
+static __always_inline void instrument_atomic_write(const volatile void *v, size_t size)
+{
+       kasan_check_write(v, size);
+       kcsan_check_atomic_write(v, size);
+}
+
+/**
+ * instrument_copy_to_user - instrument reads of copy_to_user
+ *
+ * Instrument reads from kernel memory, that are due to copy_to_user (and
+ * variants). The instrumentation must be inserted before the accesses.
+ *
+ * @to destination address
+ * @from source address
+ * @n number of bytes to copy
+ */
+static __always_inline void
+instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+       kasan_check_read(from, n);
+       kcsan_check_read(from, n);
+}
+
+/**
+ * instrument_copy_from_user - instrument writes of copy_from_user
+ *
+ * Instrument writes to kernel memory, that are due to copy_from_user (and
+ * variants). The instrumentation should be inserted before the accesses.
+ *
+ * @to destination address
+ * @from source address
+ * @n number of bytes to copy
+ */
+static __always_inline void
+instrument_copy_from_user(const void *to, const void __user *from, unsigned long n)
+{
+       kasan_check_write(to, n);
+       kcsan_check_write(to, n);
+}
+
+#endif /* _LINUX_INSTRUMENTED_H */
diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h
new file mode 100644 (file)
index 0000000..7b0b9c4
--- /dev/null
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_KCSAN_CHECKS_H
+#define _LINUX_KCSAN_CHECKS_H
+
+/* Note: Only include what is already included by compiler.h. */
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+/*
+ * ACCESS TYPE MODIFIERS
+ *
+ *   <none>: normal read access;
+ *   WRITE : write access;
+ *   ATOMIC: access is atomic;
+ *   ASSERT: access is not a regular access, but an assertion;
+ *   SCOPED: access is a scoped access;
+ */
+#define KCSAN_ACCESS_WRITE  0x1
+#define KCSAN_ACCESS_ATOMIC 0x2
+#define KCSAN_ACCESS_ASSERT 0x4
+#define KCSAN_ACCESS_SCOPED 0x8
+
+/*
+ * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used
+ * even in compilation units that selectively disable KCSAN, but must use KCSAN
+ * to validate access to an address. Never use these in header files!
+ */
+#ifdef CONFIG_KCSAN
+/**
+ * __kcsan_check_access - check generic access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ * @type: access type modifier
+ */
+void __kcsan_check_access(const volatile void *ptr, size_t size, int type);
+
+/**
+ * kcsan_disable_current - disable KCSAN for the current context
+ *
+ * Supports nesting.
+ */
+void kcsan_disable_current(void);
+
+/**
+ * kcsan_enable_current - re-enable KCSAN for the current context
+ *
+ * Supports nesting.
+ */
+void kcsan_enable_current(void);
+void kcsan_enable_current_nowarn(void); /* Safe in uaccess regions. */
+
+/**
+ * kcsan_nestable_atomic_begin - begin nestable atomic region
+ *
+ * Accesses within the atomic region may appear to race with other accesses but
+ * should be considered atomic.
+ */
+void kcsan_nestable_atomic_begin(void);
+
+/**
+ * kcsan_nestable_atomic_end - end nestable atomic region
+ */
+void kcsan_nestable_atomic_end(void);
+
+/**
+ * kcsan_flat_atomic_begin - begin flat atomic region
+ *
+ * Accesses within the atomic region may appear to race with other accesses but
+ * should be considered atomic.
+ */
+void kcsan_flat_atomic_begin(void);
+
+/**
+ * kcsan_flat_atomic_end - end flat atomic region
+ */
+void kcsan_flat_atomic_end(void);
+
+/**
+ * kcsan_atomic_next - consider following accesses as atomic
+ *
+ * Force treating the next n memory accesses for the current context as atomic
+ * operations.
+ *
+ * @n: number of following memory accesses to treat as atomic.
+ */
+void kcsan_atomic_next(int n);
+
+/**
+ * kcsan_set_access_mask - set access mask
+ *
+ * Set the access mask for all accesses for the current context if non-zero.
+ * Only value changes to bits set in the mask will be reported.
+ *
+ * @mask: bitmask
+ */
+void kcsan_set_access_mask(unsigned long mask);
+
+/* Scoped access information. */
+struct kcsan_scoped_access {
+       struct list_head list;
+       const volatile void *ptr;
+       size_t size;
+       int type;
+};
+/*
+ * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes
+ * out of scope; relies on attribute "cleanup", which is supported by all
+ * compilers that support KCSAN.
+ */
+#define __kcsan_cleanup_scoped                                                 \
+       __maybe_unused __attribute__((__cleanup__(kcsan_end_scoped_access)))
+
+/**
+ * kcsan_begin_scoped_access - begin scoped access
+ *
+ * Begin scoped access and initialize @sa, which will cause KCSAN to
+ * continuously check the memory range in the current thread until
+ * kcsan_end_scoped_access() is called for @sa.
+ *
+ * Scoped accesses are implemented by appending @sa to an internal list for the
+ * current execution context, and then checked on every call into the KCSAN
+ * runtime.
+ *
+ * @ptr: address of access
+ * @size: size of access
+ * @type: access type modifier
+ * @sa: struct kcsan_scoped_access to use for the scope of the access
+ */
+struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+                         struct kcsan_scoped_access *sa);
+
+/**
+ * kcsan_end_scoped_access - end scoped access
+ *
+ * End a scoped access, which will stop KCSAN checking the memory range.
+ * Requires that kcsan_begin_scoped_access() was previously called once for @sa.
+ *
+ * @sa: a previously initialized struct kcsan_scoped_access
+ */
+void kcsan_end_scoped_access(struct kcsan_scoped_access *sa);
+
+
+#else /* CONFIG_KCSAN */
+
+static inline void __kcsan_check_access(const volatile void *ptr, size_t size,
+                                       int type) { }
+
+static inline void kcsan_disable_current(void)         { }
+static inline void kcsan_enable_current(void)          { }
+static inline void kcsan_enable_current_nowarn(void)   { }
+static inline void kcsan_nestable_atomic_begin(void)   { }
+static inline void kcsan_nestable_atomic_end(void)     { }
+static inline void kcsan_flat_atomic_begin(void)       { }
+static inline void kcsan_flat_atomic_end(void)         { }
+static inline void kcsan_atomic_next(int n)            { }
+static inline void kcsan_set_access_mask(unsigned long mask) { }
+
+struct kcsan_scoped_access { };
+#define __kcsan_cleanup_scoped __maybe_unused
+static inline struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+                         struct kcsan_scoped_access *sa) { return sa; }
+static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { }
+
+#endif /* CONFIG_KCSAN */
+
+#ifdef __SANITIZE_THREAD__
+/*
+ * Only calls into the runtime when the particular compilation unit has KCSAN
+ * instrumentation enabled. May be used in header files.
+ */
+#define kcsan_check_access __kcsan_check_access
+
+/*
+ * Only use these to disable KCSAN for accesses in the current compilation unit;
+ * calls into libraries may still perform KCSAN checks.
+ */
+#define __kcsan_disable_current kcsan_disable_current
+#define __kcsan_enable_current kcsan_enable_current_nowarn
+#else
+static inline void kcsan_check_access(const volatile void *ptr, size_t size,
+                                     int type) { }
+static inline void __kcsan_enable_current(void)  { }
+static inline void __kcsan_disable_current(void) { }
+#endif
+
+/**
+ * __kcsan_check_read - check regular read access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0)
+
+/**
+ * __kcsan_check_write - check regular write access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define __kcsan_check_write(ptr, size)                                         \
+       __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE)
+
+/**
+ * kcsan_check_read - check regular read access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0)
+
+/**
+ * kcsan_check_write - check regular write access for races
+ *
+ * @ptr: address of access
+ * @size: size of access
+ */
+#define kcsan_check_write(ptr, size)                                           \
+       kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE)
+
+/*
+ * Check for atomic accesses: if atomic accesses are not ignored, this simply
+ * aliases to kcsan_check_access(), otherwise becomes a no-op.
+ */
+#ifdef CONFIG_KCSAN_IGNORE_ATOMICS
+#define kcsan_check_atomic_read(...)   do { } while (0)
+#define kcsan_check_atomic_write(...)  do { } while (0)
+#else
+#define kcsan_check_atomic_read(ptr, size)                                     \
+       kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC)
+#define kcsan_check_atomic_write(ptr, size)                                    \
+       kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE)
+#endif
+
+/**
+ * ASSERT_EXCLUSIVE_WRITER - assert no concurrent writes to @var
+ *
+ * Assert that there are no concurrent writes to @var; other readers are
+ * allowed. This assertion can be used to specify properties of concurrent code,
+ * where violation cannot be detected as a normal data race.
+ *
+ * For example, if we only have a single writer, but multiple concurrent
+ * readers, to avoid data races, all these accesses must be marked; even
+ * concurrent marked writes racing with the single writer are bugs.
+ * Unfortunately, due to being marked, they are no longer data races. For cases
+ * like these, we can use the macro as follows:
+ *
+ * .. code-block:: c
+ *
+ *     void writer(void) {
+ *             spin_lock(&update_foo_lock);
+ *             ASSERT_EXCLUSIVE_WRITER(shared_foo);
+ *             WRITE_ONCE(shared_foo, ...);
+ *             spin_unlock(&update_foo_lock);
+ *     }
+ *     void reader(void) {
+ *             // update_foo_lock does not need to be held!
+ *             ... = READ_ONCE(shared_foo);
+ *     }
+ *
+ * Note: ASSERT_EXCLUSIVE_WRITER_SCOPED(), if applicable, performs more thorough
+ * checking if a clear scope where no concurrent writes are expected exists.
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_WRITER(var)                                           \
+       __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT)
+
+/*
+ * Helper macros for implementation of for ASSERT_EXCLUSIVE_*_SCOPED(). @id is
+ * expected to be unique for the scope in which instances of kcsan_scoped_access
+ * are declared.
+ */
+#define __kcsan_scoped_name(c, suffix) __kcsan_scoped_##c##suffix
+#define __ASSERT_EXCLUSIVE_SCOPED(var, type, id)                               \
+       struct kcsan_scoped_access __kcsan_scoped_name(id, _)                  \
+               __kcsan_cleanup_scoped;                                        \
+       struct kcsan_scoped_access *__kcsan_scoped_name(id, _dummy_p)          \
+               __maybe_unused = kcsan_begin_scoped_access(                    \
+                       &(var), sizeof(var), KCSAN_ACCESS_SCOPED | (type),     \
+                       &__kcsan_scoped_name(id, _))
+
+/**
+ * ASSERT_EXCLUSIVE_WRITER_SCOPED - assert no concurrent writes to @var in scope
+ *
+ * Scoped variant of ASSERT_EXCLUSIVE_WRITER().
+ *
+ * Assert that there are no concurrent writes to @var for the duration of the
+ * scope in which it is introduced. This provides a better way to fully cover
+ * the enclosing scope, compared to multiple ASSERT_EXCLUSIVE_WRITER(), and
+ * increases the likelihood for KCSAN to detect racing accesses.
+ *
+ * For example, it allows finding race-condition bugs that only occur due to
+ * state changes within the scope itself:
+ *
+ * .. code-block:: c
+ *
+ *     void writer(void) {
+ *             spin_lock(&update_foo_lock);
+ *             {
+ *                     ASSERT_EXCLUSIVE_WRITER_SCOPED(shared_foo);
+ *                     WRITE_ONCE(shared_foo, 42);
+ *                     ...
+ *                     // shared_foo should still be 42 here!
+ *             }
+ *             spin_unlock(&update_foo_lock);
+ *     }
+ *     void buggy(void) {
+ *             if (READ_ONCE(shared_foo) == 42)
+ *                     WRITE_ONCE(shared_foo, 1); // bug!
+ *     }
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_WRITER_SCOPED(var)                                    \
+       __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_ASSERT, __COUNTER__)
+
+/**
+ * ASSERT_EXCLUSIVE_ACCESS - assert no concurrent accesses to @var
+ *
+ * Assert that there are no concurrent accesses to @var (no readers nor
+ * writers). This assertion can be used to specify properties of concurrent
+ * code, where violation cannot be detected as a normal data race.
+ *
+ * For example, where exclusive access is expected after determining no other
+ * users of an object are left, but the object is not actually freed. We can
+ * check that this property actually holds as follows:
+ *
+ * .. code-block:: c
+ *
+ *     if (refcount_dec_and_test(&obj->refcnt)) {
+ *             ASSERT_EXCLUSIVE_ACCESS(*obj);
+ *             do_some_cleanup(obj);
+ *             release_for_reuse(obj);
+ *     }
+ *
+ * Note: ASSERT_EXCLUSIVE_ACCESS_SCOPED(), if applicable, performs more thorough
+ * checking if a clear scope where no concurrent accesses are expected exists.
+ *
+ * Note: For cases where the object is freed, `KASAN <kasan.html>`_ is a better
+ * fit to detect use-after-free bugs.
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_ACCESS(var)                                           \
+       __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT)
+
+/**
+ * ASSERT_EXCLUSIVE_ACCESS_SCOPED - assert no concurrent accesses to @var in scope
+ *
+ * Scoped variant of ASSERT_EXCLUSIVE_ACCESS().
+ *
+ * Assert that there are no concurrent accesses to @var (no readers nor writers)
+ * for the entire duration of the scope in which it is introduced. This provides
+ * a better way to fully cover the enclosing scope, compared to multiple
+ * ASSERT_EXCLUSIVE_ACCESS(), and increases the likelihood for KCSAN to detect
+ * racing accesses.
+ *
+ * @var: variable to assert on
+ */
+#define ASSERT_EXCLUSIVE_ACCESS_SCOPED(var)                                    \
+       __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT, __COUNTER__)
+
+/**
+ * ASSERT_EXCLUSIVE_BITS - assert no concurrent writes to subset of bits in @var
+ *
+ * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER().
+ *
+ * Assert that there are no concurrent writes to a subset of bits in @var;
+ * concurrent readers are permitted. This assertion captures more detailed
+ * bit-level properties, compared to the other (word granularity) assertions.
+ * Only the bits set in @mask are checked for concurrent modifications, while
+ * ignoring the remaining bits, i.e. concurrent writes (or reads) to ~mask bits
+ * are ignored.
+ *
+ * Use this for variables, where some bits must not be modified concurrently,
+ * yet other bits are expected to be modified concurrently.
+ *
+ * For example, variables where, after initialization, some bits are read-only,
+ * but other bits may still be modified concurrently. A reader may wish to
+ * assert that this is true as follows:
+ *
+ * .. code-block:: c
+ *
+ *     ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK);
+ *     foo = (READ_ONCE(flags) & READ_ONLY_MASK) >> READ_ONLY_SHIFT;
+ *
+ * Note: The access that immediately follows ASSERT_EXCLUSIVE_BITS() is assumed
+ * to access the masked bits only, and KCSAN optimistically assumes it is
+ * therefore safe, even in the presence of data races, and marking it with
+ * READ_ONCE() is optional from KCSAN's point-of-view. We caution, however, that
+ * it may still be advisable to do so, since we cannot reason about all compiler
+ * optimizations when it comes to bit manipulations (on the reader and writer
+ * side). If you are sure nothing can go wrong, we can write the above simply
+ * as:
+ *
+ * .. code-block:: c
+ *
+ *     ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK);
+ *     foo = (flags & READ_ONLY_MASK) >> READ_ONLY_SHIFT;
+ *
+ * Another example, where this may be used, is when certain bits of @var may
+ * only be modified when holding the appropriate lock, but other bits may still
+ * be modified concurrently. Writers, where other bits may change concurrently,
+ * could use the assertion as follows:
+ *
+ * .. code-block:: c
+ *
+ *     spin_lock(&foo_lock);
+ *     ASSERT_EXCLUSIVE_BITS(flags, FOO_MASK);
+ *     old_flags = flags;
+ *     new_flags = (old_flags & ~FOO_MASK) | (new_foo << FOO_SHIFT);
+ *     if (cmpxchg(&flags, old_flags, new_flags) != old_flags) { ... }
+ *     spin_unlock(&foo_lock);
+ *
+ * @var: variable to assert on
+ * @mask: only check for modifications to bits set in @mask
+ */
+#define ASSERT_EXCLUSIVE_BITS(var, mask)                                       \
+       do {                                                                   \
+               kcsan_set_access_mask(mask);                                   \
+               __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT);\
+               kcsan_set_access_mask(0);                                      \
+               kcsan_atomic_next(1);                                          \
+       } while (0)
+
+#endif /* _LINUX_KCSAN_CHECKS_H */
diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h
new file mode 100644 (file)
index 0000000..53340d8
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_KCSAN_H
+#define _LINUX_KCSAN_H
+
+#include <linux/kcsan-checks.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KCSAN
+
+/*
+ * Context for each thread of execution: for tasks, this is stored in
+ * task_struct, and interrupts access internal per-CPU storage.
+ */
+struct kcsan_ctx {
+       int disable_count; /* disable counter */
+       int atomic_next; /* number of following atomic ops */
+
+       /*
+        * We distinguish between: (a) nestable atomic regions that may contain
+        * other nestable regions; and (b) flat atomic regions that do not keep
+        * track of nesting. Both (a) and (b) are entirely independent of each
+        * other, and a flat region may be started in a nestable region or
+        * vice-versa.
+        *
+        * This is required because, for example, in the annotations for
+        * seqlocks, we declare seqlock writer critical sections as (a) nestable
+        * atomic regions, but reader critical sections as (b) flat atomic
+        * regions, but have encountered cases where seqlock reader critical
+        * sections are contained within writer critical sections (the opposite
+        * may be possible, too).
+        *
+        * To support these cases, we independently track the depth of nesting
+        * for (a), and whether the leaf level is flat for (b).
+        */
+       int atomic_nest_count;
+       bool in_flat_atomic;
+
+       /*
+        * Access mask for all accesses if non-zero.
+        */
+       unsigned long access_mask;
+
+       /* List of scoped accesses. */
+       struct list_head scoped_accesses;
+};
+
+/**
+ * kcsan_init - initialize KCSAN runtime
+ */
+void kcsan_init(void);
+
+#else /* CONFIG_KCSAN */
+
+static inline void kcsan_init(void)                    { }
+
+#endif /* CONFIG_KCSAN */
+
+#endif /* _LINUX_KCSAN_H */
index c2a274b..65b81e0 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 
+struct mm_struct;
+
 __printf(4, 5)
 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
                                           void *data,
@@ -199,6 +201,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work);
 
 void kthread_destroy_worker(struct kthread_worker *worker);
 
+void kthread_use_mm(struct mm_struct *mm);
+void kthread_unuse_mm(struct mm_struct *mm);
+
 struct cgroup_subsys_state;
 
 #ifdef CONFIG_BLK_CGROUP
index f43b59b..62ec926 100644 (file)
@@ -206,6 +206,7 @@ struct kvm_async_pf {
        unsigned long addr;
        struct kvm_arch_async_pf arch;
        bool   wakeup_all;
+       bool notpresent_injected;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
@@ -318,7 +319,6 @@ struct kvm_vcpu {
        bool preempted;
        bool ready;
        struct kvm_vcpu_arch arch;
-       struct dentry *debugfs_dentry;
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -888,7 +888,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
-void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
 #endif
 
 int kvm_arch_hardware_enable(void);
@@ -1421,8 +1421,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
-int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end, bool blockable);
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+                                           unsigned long start, unsigned long end);
 
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
index d9a543a..c51a841 100644 (file)
@@ -4,11 +4,6 @@
 
 #include <asm/mmu_context.h>
 
-struct mm_struct;
-
-void use_mm(struct mm_struct *mm);
-void unuse_mm(struct mm_struct *mm);
-
 /* Architectures that care about IRQ state in switch_mm can override this. */
 #ifndef switch_mm_irqs_off
 # define switch_mm_irqs_off switch_mm
index df1f084..c4c37fd 100644 (file)
@@ -660,9 +660,21 @@ struct deferred_split {
  * per-zone basis.
  */
 typedef struct pglist_data {
+       /*
+        * node_zones contains just the zones for THIS node. Not all of the
+        * zones may be populated, but it is the full list. It is referenced by
+        * this node's node_zonelists as well as other node's node_zonelists.
+        */
        struct zone node_zones[MAX_NR_ZONES];
+
+       /*
+        * node_zonelists contains references to all zones in all nodes.
+        * Generally the first zones will be references to this node's
+        * node_zones.
+        */
        struct zonelist node_zonelists[MAX_ZONELISTS];
-       int nr_zones;
+
+       int nr_zones; /* number of populated zones in this node */
 #ifdef CONFIG_FLAT_NODE_MEM_MAP        /* means !SPARSEMEM */
        struct page *node_mem_map;
 #ifdef CONFIG_PAGE_EXTENSION
index c5d96e3..4ea612e 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/task_io_accounting.h>
 #include <linux/posix-timers.h>
 #include <linux/rseq.h>
+#include <linux/kcsan.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -1197,6 +1198,9 @@ struct task_struct {
 #ifdef CONFIG_KASAN
        unsigned int                    kasan_depth;
 #endif
+#ifdef CONFIG_KCSAN
+       struct kcsan_ctx                kcsan_ctx;
+#endif
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        /* Index of current stored address in ret_stack: */
index 0491d96..8b97204 100644 (file)
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/compiler.h>
+#include <linux/kcsan-checks.h>
 #include <asm/processor.h>
 
+/*
+ * The seqlock interface does not prescribe a precise sequence of read
+ * begin/retry/end. For readers, typically there is a call to
+ * read_seqcount_begin() and read_seqcount_retry(), however, there are more
+ * esoteric cases which do not follow this pattern.
+ *
+ * As a consequence, we take the following best-effort approach for raw usage
+ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
+ * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
+ * atomics; if there is a matching read_seqcount_retry() call, no following
+ * memory operations are considered atomic. Usage of seqlocks via seqlock_t
+ * interface is not affected.
+ */
+#define KCSAN_SEQLOCK_REGION_MAX 1000
+
 /*
  * Version using sequence counter only.
  * This can be used when code has its own mutex protecting the
@@ -115,6 +131,7 @@ repeat:
                cpu_relax();
                goto repeat;
        }
+       kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
        return ret;
 }
 
@@ -131,6 +148,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
 {
        unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
+       kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
        return ret;
 }
 
@@ -183,6 +201,7 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
        unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
+       kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
        return ret & ~1;
 }
 
@@ -202,7 +221,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
  */
 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
-       return unlikely(s->sequence != start);
+       kcsan_atomic_next(0);
+       return unlikely(READ_ONCE(s->sequence) != start);
 }
 
 /**
@@ -225,6 +245,7 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 
 static inline void raw_write_seqcount_begin(seqcount_t *s)
 {
+       kcsan_nestable_atomic_begin();
        s->sequence++;
        smp_wmb();
 }
@@ -233,6 +254,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 {
        smp_wmb();
        s->sequence++;
+       kcsan_nestable_atomic_end();
 }
 
 /**
@@ -243,6 +265,13 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  * usual consistency guarantee. It is one wmb cheaper, because we can
  * collapse the two back-to-back wmb()s.
  *
+ * Note that writes surrounding the barrier should be declared atomic (e.g.
+ * via WRITE_ONCE): a) to ensure the writes become visible to other threads
+ * atomically, avoiding compiler optimizations; b) to document which writes are
+ * meant to propagate to the reader critical section. This is necessary because
+ * neither writes before and after the barrier are enclosed in a seq-writer
+ * critical section that would ensure readers are aware of ongoing writes.
+ *
  *      seqcount_t seq;
  *      bool X = true, Y = false;
  *
@@ -262,18 +291,20 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  *
  *      void write(void)
  *      {
- *              Y = true;
+ *              WRITE_ONCE(Y, true);
  *
  *              raw_write_seqcount_barrier(seq);
  *
- *              X = false;
+ *              WRITE_ONCE(X, false);
  *      }
  */
 static inline void raw_write_seqcount_barrier(seqcount_t *s)
 {
+       kcsan_nestable_atomic_begin();
        s->sequence++;
        smp_wmb();
        s->sequence++;
+       kcsan_nestable_atomic_end();
 }
 
 static inline int raw_read_seqcount_latch(seqcount_t *s)
@@ -398,7 +429,9 @@ static inline void write_seqcount_end(seqcount_t *s)
 static inline void write_seqcount_invalidate(seqcount_t *s)
 {
        smp_wmb();
+       kcsan_nestable_atomic_begin();
        s->sequence+=2;
+       kcsan_nestable_atomic_end();
 }
 
 typedef struct {
@@ -430,11 +463,21 @@ typedef struct {
  */
 static inline unsigned read_seqbegin(const seqlock_t *sl)
 {
-       return read_seqcount_begin(&sl->seqcount);
+       unsigned ret = read_seqcount_begin(&sl->seqcount);
+
+       kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() */
+       kcsan_flat_atomic_begin();
+       return ret;
 }
 
 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 {
+       /*
+        * Assume not nested: read_seqretry() may be called multiple times when
+        * completing read critical section.
+        */
+       kcsan_flat_atomic_end();
+
        return read_seqcount_retry(&sl->seqcount, start);
 }
 
index 83bd8cb..b7af8cc 100644 (file)
@@ -64,7 +64,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
 struct stack_trace {
        unsigned int nr_entries, max_entries;
        unsigned long *entries;
-       int skip;       /* input argument: How many entries to skip */
+       unsigned int skip;      /* input argument: How many entries to skip */
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
index dac1db0..7bcadca 100644 (file)
@@ -2,9 +2,9 @@
 #ifndef __LINUX_UACCESS_H__
 #define __LINUX_UACCESS_H__
 
+#include <linux/instrumented.h>
 #include <linux/sched.h>
 #include <linux/thread_info.h>
-#include <linux/kasan-checks.h>
 
 #define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS)
 
@@ -58,7 +58,7 @@
 static __always_inline __must_check unsigned long
 __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
-       kasan_check_write(to, n);
+       instrument_copy_from_user(to, from, n);
        check_object_size(to, n, false);
        return raw_copy_from_user(to, from, n);
 }
@@ -67,7 +67,7 @@ static __always_inline __must_check unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        might_fault();
-       kasan_check_write(to, n);
+       instrument_copy_from_user(to, from, n);
        check_object_size(to, n, false);
        return raw_copy_from_user(to, from, n);
 }
@@ -88,7 +88,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 static __always_inline __must_check unsigned long
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
-       kasan_check_read(from, n);
+       instrument_copy_to_user(to, from, n);
        check_object_size(from, n, true);
        return raw_copy_to_user(to, from, n);
 }
@@ -97,7 +97,7 @@ static __always_inline __must_check unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
-       kasan_check_read(from, n);
+       instrument_copy_to_user(to, from, n);
        check_object_size(from, n, true);
        return raw_copy_to_user(to, from, n);
 }
@@ -109,7 +109,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
        unsigned long res = n;
        might_fault();
        if (likely(access_ok(from, n))) {
-               kasan_check_write(to, n);
+               instrument_copy_from_user(to, from, n);
                res = raw_copy_from_user(to, from, n);
        }
        if (unlikely(res))
@@ -127,7 +127,7 @@ _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
        if (access_ok(to, n)) {
-               kasan_check_read(from, n);
+               instrument_copy_to_user(to, from, n);
                n = raw_copy_to_user(to, from, n);
        }
        return n;
index 81b43f5..1257f26 100644 (file)
@@ -261,9 +261,9 @@ TRACE_EVENT(block_bio_bounce,
  */
 TRACE_EVENT(block_bio_complete,
 
-       TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+       TP_PROTO(struct request_queue *q, struct bio *bio),
 
-       TP_ARGS(q, bio, error),
+       TP_ARGS(q, bio),
 
        TP_STRUCT__entry(
                __field( dev_t,         dev             )
@@ -277,7 +277,7 @@ TRACE_EVENT(block_bio_complete,
                __entry->dev            = bio_dev(bio);
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio_sectors(bio);
-               __entry->error          = error;
+               __entry->error          = blk_status_to_errno(bio->bi_status);
                blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
        ),
 
index a90ac70..15089d1 100644 (file)
@@ -174,6 +174,16 @@ struct task_struct init_task
 #ifdef CONFIG_KASAN
        .kasan_depth    = 1,
 #endif
+#ifdef CONFIG_KCSAN
+       .kcsan_ctx = {
+               .disable_count          = 0,
+               .atomic_next            = 0,
+               .atomic_nest_count      = 0,
+               .in_flat_atomic         = false,
+               .access_mask            = 0,
+               .scoped_accesses        = {LIST_POISON1, NULL},
+       },
+#endif
 #ifdef CONFIG_TRACE_IRQFLAGS
        .softirqs_enabled = 1,
 #endif
index b59e093..0ead83e 100644 (file)
@@ -95,6 +95,7 @@
 #include <linux/rodata_test.h>
 #include <linux/jump_label.h>
 #include <linux/mem_encrypt.h>
+#include <linux/kcsan.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1036,6 +1037,7 @@ asmlinkage __visible void __init start_kernel(void)
        acpi_subsystem_init();
        arch_post_acpi_subsys_init();
        sfi_init_late();
+       kcsan_init();
 
        /* Do the rest non-__init'ed, we're now alive */
        arch_call_rest_init();
index c332eb9..ce8716a 100644 (file)
@@ -23,6 +23,9 @@ endif
 # Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
 # in coverage traces.
 KCOV_INSTRUMENT_softirq.o := n
+# Avoid KCSAN instrumentation in softirq ("No shared variables, all the data
+# are CPU local" => assume no data races), to reduce overhead in interrupts.
+KCSAN_SANITIZE_softirq.o = n
 # These are called from save_stack_trace() on slub debug path,
 # and produce insane amounts of uninteresting coverage.
 KCOV_INSTRUMENT_module.o := n
@@ -31,6 +34,7 @@ KCOV_INSTRUMENT_stacktrace.o := n
 # Don't self-instrument.
 KCOV_INSTRUMENT_kcov.o := n
 KASAN_SANITIZE_kcov.o := n
+KCSAN_SANITIZE_kcov.o := n
 CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 
 # cond_syscall is currently not LTO compatible
@@ -103,6 +107,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
@@ -121,6 +126,7 @@ obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o
 
 obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
 KASAN_SANITIZE_stackleak.o := n
+KCSAN_SANITIZE_stackleak.o := n
 KCOV_INSTRUMENT_stackleak.o := n
 
 $(obj)/configs.o: $(obj)/config_data.gz
index 55c5d88..6afae0b 100644 (file)
@@ -427,7 +427,8 @@ void kcov_task_exit(struct task_struct *t)
         *        WARN_ON(!kcov->remote && kcov->t != t);
         *
         * For KCOV_REMOTE_ENABLE devices, the exiting task is either:
-        * 2. A remote task between kcov_remote_start() and kcov_remote_stop().
+        *
+        * 1. A remote task between kcov_remote_start() and kcov_remote_stop().
         *    In this case we should print a warning right away, since a task
         *    shouldn't be exiting when it's in a kcov coverage collection
         *    section. Here t points to the task that is collecting remote
@@ -437,7 +438,7 @@ void kcov_task_exit(struct task_struct *t)
         *        WARN_ON(kcov->remote && kcov->t != t);
         *
         * 2. The task that created kcov exiting without calling KCOV_DISABLE,
-        *    and then again we can make sure that t->kcov->t == t:
+        *    and then again we make sure that t->kcov->t == t:
         *        WARN_ON(kcov->remote && kcov->t != t);
         *
         * By combining all three checks into one we get:
@@ -764,7 +765,7 @@ static const struct file_operations kcov_fops = {
  * Internally, kcov_remote_start() looks up the kcov device associated with the
  * provided handle, allocates an area for coverage collection, and saves the
  * pointers to kcov and area into the current task_struct to allow coverage to
- * be collected via __sanitizer_cov_trace_pc()
+ * be collected via __sanitizer_cov_trace_pc().
  * In turns kcov_remote_stop() clears those pointers from task_struct to stop
  * collecting coverage and copies all collected coverage into the kcov area.
  */
@@ -972,16 +973,25 @@ void kcov_remote_stop(void)
                local_irq_restore(flags);
                return;
        }
-       kcov = t->kcov;
-       area = t->kcov_area;
-       size = t->kcov_size;
-       sequence = t->kcov_sequence;
-
+       /*
+        * When in softirq, check if the corresponding kcov_remote_start()
+        * actually found the remote handle and started collecting coverage.
+        */
+       if (in_serving_softirq() && !t->kcov_softirq) {
+               local_irq_restore(flags);
+               return;
+       }
+       /* Make sure that kcov_softirq is only set when in softirq. */
        if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
                local_irq_restore(flags);
                return;
        }
 
+       kcov = t->kcov;
+       area = t->kcov_area;
+       size = t->kcov_size;
+       sequence = t->kcov_sequence;
+
        kcov_stop(t);
        if (in_serving_softirq()) {
                t->kcov_softirq = 0;
diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
new file mode 100644 (file)
index 0000000..d4999b3
--- /dev/null
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+KCSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+UBSAN_SANITIZE := n
+
+CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_debugfs.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
+
+CFLAGS_core.o := $(call cc-option,-fno-conserve-stack,) \
+       $(call cc-option,-fno-stack-protector,)
+
+obj-y := core.o debugfs.o report.o
+obj-$(CONFIG_KCSAN_SELFTEST) += test.o
diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
new file mode 100644 (file)
index 0000000..be9e625
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _KERNEL_KCSAN_ATOMIC_H
+#define _KERNEL_KCSAN_ATOMIC_H
+
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+
+/*
+ * Special rules for certain memory where concurrent conflicting accesses are
+ * common, however, the current convention is to not mark them; returns true if
+ * access to @ptr should be considered atomic. Called from slow-path.
+ */
+static bool kcsan_is_atomic_special(const volatile void *ptr)
+{
+       /* volatile globals that have been observed in data races. */
+       return ptr == &jiffies || ptr == &current->state;
+}
+
+#endif /* _KERNEL_KCSAN_ATOMIC_H */
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
new file mode 100644 (file)
index 0000000..15f6794
--- /dev/null
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include "atomic.h"
+#include "encoding.h"
+#include "kcsan.h"
+
+static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
+unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
+unsigned int kcsan_udelay_interrupt = CONFIG_KCSAN_UDELAY_INTERRUPT;
+static long kcsan_skip_watch = CONFIG_KCSAN_SKIP_WATCH;
+static bool kcsan_interrupt_watcher = IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER);
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "kcsan."
+module_param_named(early_enable, kcsan_early_enable, bool, 0);
+module_param_named(udelay_task, kcsan_udelay_task, uint, 0644);
+module_param_named(udelay_interrupt, kcsan_udelay_interrupt, uint, 0644);
+module_param_named(skip_watch, kcsan_skip_watch, long, 0644);
+module_param_named(interrupt_watcher, kcsan_interrupt_watcher, bool, 0444);
+
+bool kcsan_enabled;
+
+/* Per-CPU kcsan_ctx for interrupts */
+static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
+       .disable_count          = 0,
+       .atomic_next            = 0,
+       .atomic_nest_count      = 0,
+       .in_flat_atomic         = false,
+       .access_mask            = 0,
+       .scoped_accesses        = {LIST_POISON1, NULL},
+};
+
+/*
+ * Helper macros to index into adjacent slots, starting from address slot
+ * itself, followed by the right and left slots.
+ *
+ * The purpose is 2-fold:
+ *
+ *     1. if during insertion the address slot is already occupied, check if
+ *        any adjacent slots are free;
+ *     2. accesses that straddle a slot boundary due to size that exceeds a
+ *        slot's range may check adjacent slots if any watchpoint matches.
+ *
+ * Note that accesses with very large size may still miss a watchpoint; however,
+ * given this should be rare, this is a reasonable trade-off to make, since this
+ * will avoid:
+ *
+ *     1. excessive contention between watchpoint checks and setup;
+ *     2. larger number of simultaneous watchpoints without sacrificing
+ *        performance.
+ *
+ * Example: SLOT_IDX values for KCSAN_CHECK_ADJACENT=1, where i is [0, 1, 2]:
+ *
+ *   slot=0:  [ 1,  2,  0]
+ *   slot=9:  [10, 11,  9]
+ *   slot=63: [64, 65, 63]
+ */
+#define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS))
+
+/*
+ * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary
+ * slot (middle) is fine if we assume that races occur rarely. The set of
+ * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to
+ * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}.
+ */
+#define SLOT_IDX_FAST(slot, i) (slot + i)
+
+/*
+ * Watchpoints, with each entry encoded as defined in encoding.h: in order to be
+ * able to safely update and access a watchpoint without introducing locking
+ * overhead, we encode each watchpoint as a single atomic long. The initial
+ * zero-initialized state matches INVALID_WATCHPOINT.
+ *
+ * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to
+ * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path.
+ */
+static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
+
+/*
+ * Instructions to skip watching counter, used in should_watch(). We use a
+ * per-CPU counter to avoid excessive contention.
+ */
+static DEFINE_PER_CPU(long, kcsan_skip);
+
+static __always_inline atomic_long_t *find_watchpoint(unsigned long addr,
+                                                     size_t size,
+                                                     bool expect_write,
+                                                     long *encoded_watchpoint)
+{
+       const int slot = watchpoint_slot(addr);
+       const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
+       atomic_long_t *watchpoint;
+       unsigned long wp_addr_masked;
+       size_t wp_size;
+       bool is_write;
+       int i;
+
+       BUILD_BUG_ON(CONFIG_KCSAN_NUM_WATCHPOINTS < NUM_SLOTS);
+
+       for (i = 0; i < NUM_SLOTS; ++i) {
+               watchpoint = &watchpoints[SLOT_IDX_FAST(slot, i)];
+               *encoded_watchpoint = atomic_long_read(watchpoint);
+               if (!decode_watchpoint(*encoded_watchpoint, &wp_addr_masked,
+                                      &wp_size, &is_write))
+                       continue;
+
+               if (expect_write && !is_write)
+                       continue;
+
+               /* Check if the watchpoint matches the access. */
+               if (matching_access(wp_addr_masked, wp_size, addr_masked, size))
+                       return watchpoint;
+       }
+
+       return NULL;
+}
+
+static inline atomic_long_t *
+insert_watchpoint(unsigned long addr, size_t size, bool is_write)
+{
+       const int slot = watchpoint_slot(addr);
+       const long encoded_watchpoint = encode_watchpoint(addr, size, is_write);
+       atomic_long_t *watchpoint;
+       int i;
+
+       /* Check slot index logic, ensuring we stay within array bounds. */
+       BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT);
+       BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0);
+       BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1);
+       BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS);
+
+       for (i = 0; i < NUM_SLOTS; ++i) {
+               long expect_val = INVALID_WATCHPOINT;
+
+               /* Try to acquire this slot. */
+               watchpoint = &watchpoints[SLOT_IDX(slot, i)];
+               if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint))
+                       return watchpoint;
+       }
+
+       return NULL;
+}
+
+/*
+ * Return true if watchpoint was successfully consumed, false otherwise.
+ *
+ * This may return false if:
+ *
+ *     1. another thread already consumed the watchpoint;
+ *     2. the thread that set up the watchpoint already removed it;
+ *     3. the watchpoint was removed and then re-used.
+ */
+static __always_inline bool
+try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
+{
+       return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
+}
+
+/* Return true if watchpoint was not touched, false if already consumed. */
+static inline bool consume_watchpoint(atomic_long_t *watchpoint)
+{
+       return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT;
+}
+
+/* Remove the watchpoint -- its slot may be reused after. */
+static inline void remove_watchpoint(atomic_long_t *watchpoint)
+{
+       atomic_long_set(watchpoint, INVALID_WATCHPOINT);
+}
+
+static __always_inline struct kcsan_ctx *get_ctx(void)
+{
+       /*
+        * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
+        * also result in calls that generate warnings in uaccess regions.
+        */
+       return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
+}
+
+/* Check scoped accesses; never inline because this is a slow-path! */
+static noinline void kcsan_check_scoped_accesses(void)
+{
+       struct kcsan_ctx *ctx = get_ctx();
+       struct list_head *prev_save = ctx->scoped_accesses.prev;
+       struct kcsan_scoped_access *scoped_access;
+
+       ctx->scoped_accesses.prev = NULL;  /* Avoid recursion. */
+       list_for_each_entry(scoped_access, &ctx->scoped_accesses, list)
+               __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type);
+       ctx->scoped_accesses.prev = prev_save;
+}
+
+/* Rules for generic atomic accesses. Called from fast-path. */
+static __always_inline bool
+is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
+{
+       if (type & KCSAN_ACCESS_ATOMIC)
+               return true;
+
+       /*
+        * Unless explicitly declared atomic, never consider an assertion access
+        * as atomic. This allows using them also in atomic regions, such as
+        * seqlocks, without implicitly changing their semantics.
+        */
+       if (type & KCSAN_ACCESS_ASSERT)
+               return false;
+
+       if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) &&
+           (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) &&
+           IS_ALIGNED((unsigned long)ptr, size))
+               return true; /* Assume aligned writes up to word size are atomic. */
+
+       if (ctx->atomic_next > 0) {
+               /*
+                * Because we do not have separate contexts for nested
+                * interrupts, in case atomic_next is set, we simply assume that
+                * the outer interrupt set atomic_next. In the worst case, we
+                * will conservatively consider operations as atomic. This is a
+                * reasonable trade-off to make, since this case should be
+                * extremely rare; however, even if extremely rare, it could
+                * lead to false positives otherwise.
+                */
+               if ((hardirq_count() >> HARDIRQ_SHIFT) < 2)
+                       --ctx->atomic_next; /* in task, or outer interrupt */
+               return true;
+       }
+
+       return ctx->atomic_nest_count > 0 || ctx->in_flat_atomic;
+}
+
+static __always_inline bool
+should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx)
+{
+       /*
+        * Never set up watchpoints when memory operations are atomic.
+        *
+        * Need to check this first, before kcsan_skip check below: (1) atomics
+        * should not count towards skipped instructions, and (2) to actually
+        * decrement kcsan_atomic_next for consecutive instruction stream.
+        */
+       if (is_atomic(ptr, size, type, ctx))
+               return false;
+
+       if (this_cpu_dec_return(kcsan_skip) >= 0)
+               return false;
+
+       /*
+        * NOTE: If we get here, kcsan_skip must always be reset in slow path
+        * via reset_kcsan_skip() to avoid underflow.
+        */
+
+       /* this operation should be watched */
+       return true;
+}
+
+static inline void reset_kcsan_skip(void)
+{
+       long skip_count = kcsan_skip_watch -
+                         (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
+                                  prandom_u32_max(kcsan_skip_watch) :
+                                  0);
+       this_cpu_write(kcsan_skip, skip_count);
+}
+
+static __always_inline bool kcsan_is_enabled(void)
+{
+       return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
+}
+
+static inline unsigned int get_delay(void)
+{
+       unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt;
+       return delay - (IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ?
+                               prandom_u32_max(delay) :
+                               0);
+}
+
+/*
+ * Pull everything together: check_access() below contains the performance
+ * critical operations; the fast-path (including check_access) functions should
+ * all be inlinable by the instrumentation functions.
+ *
+ * The slow-path (kcsan_found_watchpoint, kcsan_setup_watchpoint) are
+ * non-inlinable -- note that, we prefix these with "kcsan_" to ensure they can
+ * be filtered from the stacktrace, as well as give them unique names for the
+ * UACCESS whitelist of objtool. Each function uses user_access_save/restore(),
+ * since they do not access any user memory, but instrumentation is still
+ * emitted in UACCESS regions.
+ */
+
+static noinline void kcsan_found_watchpoint(const volatile void *ptr,
+                                           size_t size,
+                                           int type,
+                                           atomic_long_t *watchpoint,
+                                           long encoded_watchpoint)
+{
+       unsigned long flags;
+       bool consumed;
+
+       if (!kcsan_is_enabled())
+               return;
+
+       /*
+        * The access_mask check relies on value-change comparison. To avoid
+        * reporting a race where e.g. the writer set up the watchpoint, but the
+        * reader has access_mask!=0, we have to ignore the found watchpoint.
+        */
+       if (get_ctx()->access_mask != 0)
+               return;
+
+       /*
+        * Consume the watchpoint as soon as possible, to minimize the chances
+        * of !consumed. Consuming the watchpoint must always be guarded by
+        * kcsan_is_enabled() check, as otherwise we might erroneously
+        * triggering reports when disabled.
+        */
+       consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
+
+       /* keep this after try_consume_watchpoint */
+       flags = user_access_save();
+
+       if (consumed) {
+               kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE,
+                            KCSAN_REPORT_CONSUMED_WATCHPOINT,
+                            watchpoint - watchpoints);
+       } else {
+               /*
+                * The other thread may not print any diagnostics, as it has
+                * already removed the watchpoint, or another thread consumed
+                * the watchpoint before this thread.
+                */
+               kcsan_counter_inc(KCSAN_COUNTER_REPORT_RACES);
+       }
+
+       if ((type & KCSAN_ACCESS_ASSERT) != 0)
+               kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
+       else
+               kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES);
+
+       user_access_restore(flags);
+}
+
+static noinline void
+kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
+{
+       const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
+       const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
+       atomic_long_t *watchpoint;
+       union {
+               u8 _1;
+               u16 _2;
+               u32 _4;
+               u64 _8;
+       } expect_value;
+       unsigned long access_mask;
+       enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
+       unsigned long ua_flags = user_access_save();
+       unsigned long irq_flags = 0;
+
+       /*
+        * Always reset kcsan_skip counter in slow-path to avoid underflow; see
+        * should_watch().
+        */
+       reset_kcsan_skip();
+
+       if (!kcsan_is_enabled())
+               goto out;
+
+       /*
+        * Special atomic rules: unlikely to be true, so we check them here in
+        * the slow-path, and not in the fast-path in is_atomic(). Call after
+        * kcsan_is_enabled(), as we may access memory that is not yet
+        * initialized during early boot.
+        */
+       if (!is_assert && kcsan_is_atomic_special(ptr))
+               goto out;
+
+       if (!check_encodable((unsigned long)ptr, size)) {
+               kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES);
+               goto out;
+       }
+
+       if (!kcsan_interrupt_watcher)
+               /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
+               raw_local_irq_save(irq_flags);
+
+       watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
+       if (watchpoint == NULL) {
+               /*
+                * Out of capacity: the size of 'watchpoints', and the frequency
+                * with which should_watch() returns true should be tweaked so
+                * that this case happens very rarely.
+                */
+               kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY);
+               goto out_unlock;
+       }
+
+       kcsan_counter_inc(KCSAN_COUNTER_SETUP_WATCHPOINTS);
+       kcsan_counter_inc(KCSAN_COUNTER_USED_WATCHPOINTS);
+
+       /*
+        * Read the current value, to later check and infer a race if the data
+        * was modified via a non-instrumented access, e.g. from a device.
+        */
+       expect_value._8 = 0;
+       switch (size) {
+       case 1:
+               expect_value._1 = READ_ONCE(*(const u8 *)ptr);
+               break;
+       case 2:
+               expect_value._2 = READ_ONCE(*(const u16 *)ptr);
+               break;
+       case 4:
+               expect_value._4 = READ_ONCE(*(const u32 *)ptr);
+               break;
+       case 8:
+               expect_value._8 = READ_ONCE(*(const u64 *)ptr);
+               break;
+       default:
+               break; /* ignore; we do not diff the values */
+       }
+
+       if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) {
+               kcsan_disable_current();
+               pr_err("KCSAN: watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n",
+                      is_write ? "write" : "read", size, ptr,
+                      watchpoint_slot((unsigned long)ptr),
+                      encode_watchpoint((unsigned long)ptr, size, is_write));
+               kcsan_enable_current();
+       }
+
+       /*
+        * Delay this thread, to increase probability of observing a racy
+        * conflicting access.
+        */
+       udelay(get_delay());
+
+       /*
+        * Re-read value, and check if it is as expected; if not, we infer a
+        * racy access.
+        */
+       access_mask = get_ctx()->access_mask;
+       switch (size) {
+       case 1:
+               expect_value._1 ^= READ_ONCE(*(const u8 *)ptr);
+               if (access_mask)
+                       expect_value._1 &= (u8)access_mask;
+               break;
+       case 2:
+               expect_value._2 ^= READ_ONCE(*(const u16 *)ptr);
+               if (access_mask)
+                       expect_value._2 &= (u16)access_mask;
+               break;
+       case 4:
+               expect_value._4 ^= READ_ONCE(*(const u32 *)ptr);
+               if (access_mask)
+                       expect_value._4 &= (u32)access_mask;
+               break;
+       case 8:
+               expect_value._8 ^= READ_ONCE(*(const u64 *)ptr);
+               if (access_mask)
+                       expect_value._8 &= (u64)access_mask;
+               break;
+       default:
+               break; /* ignore; we do not diff the values */
+       }
+
+       /* Were we able to observe a value-change? */
+       if (expect_value._8 != 0)
+               value_change = KCSAN_VALUE_CHANGE_TRUE;
+
+       /* Check if this access raced with another. */
+       if (!consume_watchpoint(watchpoint)) {
+               /*
+                * Depending on the access type, map a value_change of MAYBE to
+                * TRUE (always report) or FALSE (never report).
+                */
+               if (value_change == KCSAN_VALUE_CHANGE_MAYBE) {
+                       if (access_mask != 0) {
+                               /*
+                                * For access with access_mask, we require a
+                                * value-change, as it is likely that races on
+                                * ~access_mask bits are expected.
+                                */
+                               value_change = KCSAN_VALUE_CHANGE_FALSE;
+                       } else if (size > 8 || is_assert) {
+                               /* Always assume a value-change. */
+                               value_change = KCSAN_VALUE_CHANGE_TRUE;
+                       }
+               }
+
+               /*
+                * No need to increment 'data_races' counter, as the racing
+                * thread already did.
+                *
+                * Count 'assert_failures' for each failed ASSERT access,
+                * therefore both this thread and the racing thread may
+                * increment this counter.
+                */
+               if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE)
+                       kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
+
+               kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL,
+                            watchpoint - watchpoints);
+       } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) {
+               /* Inferring a race, since the value should not have changed. */
+
+               kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN);
+               if (is_assert)
+                       kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
+
+               if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert)
+                       kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE,
+                                    KCSAN_REPORT_RACE_UNKNOWN_ORIGIN,
+                                    watchpoint - watchpoints);
+       }
+
+       /*
+        * Remove watchpoint; must be after reporting, since the slot may be
+        * reused after this point.
+        */
+       remove_watchpoint(watchpoint);
+       kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
+out_unlock:
+       if (!kcsan_interrupt_watcher)
+               raw_local_irq_restore(irq_flags);
+out:
+       user_access_restore(ua_flags);
+}
+
+static __always_inline void check_access(const volatile void *ptr, size_t size,
+                                        int type)
+{
+       const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0;
+       atomic_long_t *watchpoint;
+       long encoded_watchpoint;
+
+       /*
+        * Do nothing for 0 sized check; this comparison will be optimized out
+        * for constant sized instrumentation (__tsan_{read,write}N).
+        */
+       if (unlikely(size == 0))
+               return;
+
+       /*
+        * Avoid user_access_save in fast-path: find_watchpoint is safe without
+        * user_access_save, as the address that ptr points to is only used to
+        * check if a watchpoint exists; ptr is never dereferenced.
+        */
+       watchpoint = find_watchpoint((unsigned long)ptr, size, !is_write,
+                                    &encoded_watchpoint);
+       /*
+        * It is safe to check kcsan_is_enabled() after find_watchpoint in the
+        * slow-path, as long as no state changes that cause a race to be
+        * detected and reported have occurred until kcsan_is_enabled() is
+        * checked.
+        */
+
+       if (unlikely(watchpoint != NULL))
+               kcsan_found_watchpoint(ptr, size, type, watchpoint,
+                                      encoded_watchpoint);
+       else {
+               struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */
+
+               if (unlikely(should_watch(ptr, size, type, ctx)))
+                       kcsan_setup_watchpoint(ptr, size, type);
+               else if (unlikely(ctx->scoped_accesses.prev))
+                       kcsan_check_scoped_accesses();
+       }
+}
+
+/* === Public interface ===================================================== */
+
+void __init kcsan_init(void)
+{
+       BUG_ON(!in_task());
+
+       kcsan_debugfs_init();
+
+       /*
+        * We are in the init task, and no other tasks should be running;
+        * WRITE_ONCE without memory barrier is sufficient.
+        */
+       if (kcsan_early_enable)
+               WRITE_ONCE(kcsan_enabled, true);
+}
+
+/* === Exported interface =================================================== */
+
+void kcsan_disable_current(void)
+{
+       ++get_ctx()->disable_count;
+}
+EXPORT_SYMBOL(kcsan_disable_current);
+
+void kcsan_enable_current(void)
+{
+       if (get_ctx()->disable_count-- == 0) {
+               /*
+                * Warn if kcsan_enable_current() calls are unbalanced with
+                * kcsan_disable_current() calls, which causes disable_count to
+                * become negative and should not happen.
+                */
+               kcsan_disable_current(); /* restore to 0, KCSAN still enabled */
+               kcsan_disable_current(); /* disable to generate warning */
+               WARN(1, "Unbalanced %s()", __func__);
+               kcsan_enable_current();
+       }
+}
+EXPORT_SYMBOL(kcsan_enable_current);
+
+void kcsan_enable_current_nowarn(void)
+{
+       if (get_ctx()->disable_count-- == 0)
+               kcsan_disable_current();
+}
+EXPORT_SYMBOL(kcsan_enable_current_nowarn);
+
+void kcsan_nestable_atomic_begin(void)
+{
+       /*
+        * Do *not* check and warn if we are in a flat atomic region: nestable
+        * and flat atomic regions are independent from each other.
+        * See include/linux/kcsan.h: struct kcsan_ctx comments for more
+        * comments.
+        */
+
+       ++get_ctx()->atomic_nest_count;
+}
+EXPORT_SYMBOL(kcsan_nestable_atomic_begin);
+
+void kcsan_nestable_atomic_end(void)
+{
+       if (get_ctx()->atomic_nest_count-- == 0) {
+               /*
+                * Warn if kcsan_nestable_atomic_end() calls are unbalanced with
+                * kcsan_nestable_atomic_begin() calls, which causes
+                * atomic_nest_count to become negative and should not happen.
+                */
+               kcsan_nestable_atomic_begin(); /* restore to 0 */
+               kcsan_disable_current(); /* disable to generate warning */
+               WARN(1, "Unbalanced %s()", __func__);
+               kcsan_enable_current();
+       }
+}
+EXPORT_SYMBOL(kcsan_nestable_atomic_end);
+
+void kcsan_flat_atomic_begin(void)
+{
+       get_ctx()->in_flat_atomic = true;
+}
+EXPORT_SYMBOL(kcsan_flat_atomic_begin);
+
+void kcsan_flat_atomic_end(void)
+{
+       get_ctx()->in_flat_atomic = false;
+}
+EXPORT_SYMBOL(kcsan_flat_atomic_end);
+
+void kcsan_atomic_next(int n)
+{
+       get_ctx()->atomic_next = n;
+}
+EXPORT_SYMBOL(kcsan_atomic_next);
+
+void kcsan_set_access_mask(unsigned long mask)
+{
+       get_ctx()->access_mask = mask;
+}
+EXPORT_SYMBOL(kcsan_set_access_mask);
+
+struct kcsan_scoped_access *
+kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
+                         struct kcsan_scoped_access *sa)
+{
+       struct kcsan_ctx *ctx = get_ctx();
+
+       __kcsan_check_access(ptr, size, type);
+
+       ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
+
+       INIT_LIST_HEAD(&sa->list);
+       sa->ptr = ptr;
+       sa->size = size;
+       sa->type = type;
+
+       if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */
+               INIT_LIST_HEAD(&ctx->scoped_accesses);
+       list_add(&sa->list, &ctx->scoped_accesses);
+
+       ctx->disable_count--;
+       return sa;
+}
+EXPORT_SYMBOL(kcsan_begin_scoped_access);
+
+void kcsan_end_scoped_access(struct kcsan_scoped_access *sa)
+{
+       struct kcsan_ctx *ctx = get_ctx();
+
+       if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__))
+               return;
+
+       ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */
+
+       list_del(&sa->list);
+       if (list_empty(&ctx->scoped_accesses))
+               /*
+                * Ensure we do not enter kcsan_check_scoped_accesses()
+                * slow-path if unnecessary, and avoids requiring list_empty()
+                * in the fast-path (to avoid a READ_ONCE() and potential
+                * uaccess warning).
+                */
+               ctx->scoped_accesses.prev = NULL;
+
+       ctx->disable_count--;
+
+       __kcsan_check_access(sa->ptr, sa->size, sa->type);
+}
+EXPORT_SYMBOL(kcsan_end_scoped_access);
+
+void __kcsan_check_access(const volatile void *ptr, size_t size, int type)
+{
+       check_access(ptr, size, type);
+}
+EXPORT_SYMBOL(__kcsan_check_access);
+
+/*
+ * KCSAN uses the same instrumentation that is emitted by supported compilers
+ * for ThreadSanitizer (TSAN).
+ *
+ * When enabled, the compiler emits instrumentation calls (the functions
+ * prefixed with "__tsan" below) for all loads and stores that it generated;
+ * inline asm is not instrumented.
+ *
+ * Note that, not all supported compiler versions distinguish aligned/unaligned
+ * accesses, but e.g. recent versions of Clang do. We simply alias the unaligned
+ * version to the generic version, which can handle both.
+ */
+
+#define DEFINE_TSAN_READ_WRITE(size)                                           \
+       void __tsan_read##size(void *ptr)                                      \
+       {                                                                      \
+               check_access(ptr, size, 0);                                    \
+       }                                                                      \
+       EXPORT_SYMBOL(__tsan_read##size);                                      \
+       void __tsan_unaligned_read##size(void *ptr)                            \
+               __alias(__tsan_read##size);                                    \
+       EXPORT_SYMBOL(__tsan_unaligned_read##size);                            \
+       void __tsan_write##size(void *ptr)                                     \
+       {                                                                      \
+               check_access(ptr, size, KCSAN_ACCESS_WRITE);                   \
+       }                                                                      \
+       EXPORT_SYMBOL(__tsan_write##size);                                     \
+       void __tsan_unaligned_write##size(void *ptr)                           \
+               __alias(__tsan_write##size);                                   \
+       EXPORT_SYMBOL(__tsan_unaligned_write##size)
+
+DEFINE_TSAN_READ_WRITE(1);
+DEFINE_TSAN_READ_WRITE(2);
+DEFINE_TSAN_READ_WRITE(4);
+DEFINE_TSAN_READ_WRITE(8);
+DEFINE_TSAN_READ_WRITE(16);
+
+void __tsan_read_range(void *ptr, size_t size)
+{
+       check_access(ptr, size, 0);
+}
+EXPORT_SYMBOL(__tsan_read_range);
+
+void __tsan_write_range(void *ptr, size_t size)
+{
+       check_access(ptr, size, KCSAN_ACCESS_WRITE);
+}
+EXPORT_SYMBOL(__tsan_write_range);
+
+/*
+ * Use of explicit volatile is generally disallowed [1], however, volatile is
+ * still used in various concurrent context, whether in low-level
+ * synchronization primitives or for legacy reasons.
+ * [1] https://lwn.net/Articles/233479/
+ *
+ * We only consider volatile accesses atomic if they are aligned and would pass
+ * the size-check of compiletime_assert_rwonce_type().
+ */
+#define DEFINE_TSAN_VOLATILE_READ_WRITE(size)                                  \
+       void __tsan_volatile_read##size(void *ptr)                             \
+       {                                                                      \
+               const bool is_atomic = size <= sizeof(long long) &&            \
+                                      IS_ALIGNED((unsigned long)ptr, size);   \
+               if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic)      \
+                       return;                                                \
+               check_access(ptr, size, is_atomic ? KCSAN_ACCESS_ATOMIC : 0);  \
+       }                                                                      \
+       EXPORT_SYMBOL(__tsan_volatile_read##size);                             \
+       void __tsan_unaligned_volatile_read##size(void *ptr)                   \
+               __alias(__tsan_volatile_read##size);                           \
+       EXPORT_SYMBOL(__tsan_unaligned_volatile_read##size);                   \
+       void __tsan_volatile_write##size(void *ptr)                            \
+       {                                                                      \
+               const bool is_atomic = size <= sizeof(long long) &&            \
+                                      IS_ALIGNED((unsigned long)ptr, size);   \
+               if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS) && is_atomic)      \
+                       return;                                                \
+               check_access(ptr, size,                                        \
+                            KCSAN_ACCESS_WRITE |                              \
+                                    (is_atomic ? KCSAN_ACCESS_ATOMIC : 0));   \
+       }                                                                      \
+       EXPORT_SYMBOL(__tsan_volatile_write##size);                            \
+       void __tsan_unaligned_volatile_write##size(void *ptr)                  \
+               __alias(__tsan_volatile_write##size);                          \
+       EXPORT_SYMBOL(__tsan_unaligned_volatile_write##size)
+
+DEFINE_TSAN_VOLATILE_READ_WRITE(1);
+DEFINE_TSAN_VOLATILE_READ_WRITE(2);
+DEFINE_TSAN_VOLATILE_READ_WRITE(4);
+DEFINE_TSAN_VOLATILE_READ_WRITE(8);
+DEFINE_TSAN_VOLATILE_READ_WRITE(16);
+
+/*
+ * The below are not required by KCSAN, but can still be emitted by the
+ * compiler.
+ */
+void __tsan_func_entry(void *call_pc)
+{
+}
+EXPORT_SYMBOL(__tsan_func_entry);
+void __tsan_func_exit(void)
+{
+}
+EXPORT_SYMBOL(__tsan_func_exit);
+void __tsan_init(void)
+{
+}
+EXPORT_SYMBOL(__tsan_init);
diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
new file mode 100644 (file)
index 0000000..023e49c
--- /dev/null
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/atomic.h>
+#include <linux/bsearch.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+
+#include "kcsan.h"
+
+/*
+ * Statistics counters.
+ */
+static atomic_long_t counters[KCSAN_COUNTER_COUNT];
+
+/*
+ * Addresses for filtering functions from reporting. This list can be used as a
+ * whitelist or blacklist.
+ */
+static struct {
+       unsigned long   *addrs;         /* array of addresses */
+       size_t          size;           /* current size */
+       int             used;           /* number of elements used */
+       bool            sorted;         /* if elements are sorted */
+       bool            whitelist;      /* if list is a blacklist or whitelist */
+} report_filterlist = {
+       .addrs          = NULL,
+       .size           = 8,            /* small initial size */
+       .used           = 0,
+       .sorted         = false,
+       .whitelist      = false,        /* default is blacklist */
+};
+static DEFINE_SPINLOCK(report_filterlist_lock);
+
+static const char *counter_to_name(enum kcsan_counter_id id)
+{
+       switch (id) {
+       case KCSAN_COUNTER_USED_WATCHPOINTS:            return "used_watchpoints";
+       case KCSAN_COUNTER_SETUP_WATCHPOINTS:           return "setup_watchpoints";
+       case KCSAN_COUNTER_DATA_RACES:                  return "data_races";
+       case KCSAN_COUNTER_ASSERT_FAILURES:             return "assert_failures";
+       case KCSAN_COUNTER_NO_CAPACITY:                 return "no_capacity";
+       case KCSAN_COUNTER_REPORT_RACES:                return "report_races";
+       case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN:        return "races_unknown_origin";
+       case KCSAN_COUNTER_UNENCODABLE_ACCESSES:        return "unencodable_accesses";
+       case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES:    return "encoding_false_positives";
+       case KCSAN_COUNTER_COUNT:
+               BUG();
+       }
+       return NULL;
+}
+
+void kcsan_counter_inc(enum kcsan_counter_id id)
+{
+       atomic_long_inc(&counters[id]);
+}
+
+void kcsan_counter_dec(enum kcsan_counter_id id)
+{
+       atomic_long_dec(&counters[id]);
+}
+
+/*
+ * The microbenchmark allows benchmarking KCSAN core runtime only. To run
+ * multiple threads, pipe 'microbench=<iters>' from multiple tasks into the
+ * debugfs file. This will not generate any conflicts, and tests fast-path only.
+ */
+static noinline void microbenchmark(unsigned long iters)
+{
+       const struct kcsan_ctx ctx_save = current->kcsan_ctx;
+       const bool was_enabled = READ_ONCE(kcsan_enabled);
+       cycles_t cycles;
+
+       /* We may have been called from an atomic region; reset context. */
+       memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
+       /*
+        * Disable to benchmark fast-path for all accesses, and (expected
+        * negligible) call into slow-path, but never set up watchpoints.
+        */
+       WRITE_ONCE(kcsan_enabled, false);
+
+       pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
+
+       cycles = get_cycles();
+       while (iters--) {
+               unsigned long addr = iters & ((PAGE_SIZE << 8) - 1);
+               int type = !(iters & 0x7f) ? KCSAN_ACCESS_ATOMIC :
+                               (!(iters & 0xf) ? KCSAN_ACCESS_WRITE : 0);
+               __kcsan_check_access((void *)addr, sizeof(long), type);
+       }
+       cycles = get_cycles() - cycles;
+
+       pr_info("KCSAN: %s end   | cycles: %llu\n", __func__, cycles);
+
+       WRITE_ONCE(kcsan_enabled, was_enabled);
+       /* restore context */
+       current->kcsan_ctx = ctx_save;
+}
+
+/*
+ * Simple test to create conflicting accesses. Write 'test=<iters>' to KCSAN's
+ * debugfs file from multiple tasks to generate real conflicts and show reports.
+ */
+static long test_dummy;
+static long test_flags;
+static long test_scoped;
+static noinline void test_thread(unsigned long iters)
+{
+       const long CHANGE_BITS = 0xff00ff00ff00ff00L;
+       const struct kcsan_ctx ctx_save = current->kcsan_ctx;
+       cycles_t cycles;
+
+       /* We may have been called from an atomic region; reset context. */
+       memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
+
+       pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
+       pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n",
+               &test_dummy, &test_flags, &test_scoped);
+
+       cycles = get_cycles();
+       while (iters--) {
+               /* These all should generate reports. */
+               __kcsan_check_read(&test_dummy, sizeof(test_dummy));
+               ASSERT_EXCLUSIVE_WRITER(test_dummy);
+               ASSERT_EXCLUSIVE_ACCESS(test_dummy);
+
+               ASSERT_EXCLUSIVE_BITS(test_flags, ~CHANGE_BITS); /* no report */
+               __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */
+
+               ASSERT_EXCLUSIVE_BITS(test_flags, CHANGE_BITS); /* report */
+               __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */
+
+               /* not actually instrumented */
+               WRITE_ONCE(test_dummy, iters);  /* to observe value-change */
+               __kcsan_check_write(&test_dummy, sizeof(test_dummy));
+
+               test_flags ^= CHANGE_BITS; /* generate value-change */
+               __kcsan_check_write(&test_flags, sizeof(test_flags));
+
+               BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
+               {
+                       /* Should generate reports anywhere in this block. */
+                       ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped);
+                       ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped);
+                       BUG_ON(!current->kcsan_ctx.scoped_accesses.prev);
+                       /* Unrelated accesses. */
+                       __kcsan_check_access(&cycles, sizeof(cycles), 0);
+                       __kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC);
+               }
+               BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
+       }
+       cycles = get_cycles() - cycles;
+
+       pr_info("KCSAN: %s end   | cycles: %llu\n", __func__, cycles);
+
+       /* restore context */
+       current->kcsan_ctx = ctx_save;
+}
+
+static int cmp_filterlist_addrs(const void *rhs, const void *lhs)
+{
+       const unsigned long a = *(const unsigned long *)rhs;
+       const unsigned long b = *(const unsigned long *)lhs;
+
+       return a < b ? -1 : a == b ? 0 : 1;
+}
+
+bool kcsan_skip_report_debugfs(unsigned long func_addr)
+{
+       unsigned long symbolsize, offset;
+       unsigned long flags;
+       bool ret = false;
+
+       if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset))
+               return false;
+       func_addr -= offset; /* Get function start */
+
+       spin_lock_irqsave(&report_filterlist_lock, flags);
+       if (report_filterlist.used == 0)
+               goto out;
+
+       /* Sort array if it is unsorted, and then do a binary search. */
+       if (!report_filterlist.sorted) {
+               sort(report_filterlist.addrs, report_filterlist.used,
+                    sizeof(unsigned long), cmp_filterlist_addrs, NULL);
+               report_filterlist.sorted = true;
+       }
+       ret = !!bsearch(&func_addr, report_filterlist.addrs,
+                       report_filterlist.used, sizeof(unsigned long),
+                       cmp_filterlist_addrs);
+       if (report_filterlist.whitelist)
+               ret = !ret;
+
+out:
+       spin_unlock_irqrestore(&report_filterlist_lock, flags);
+       return ret;
+}
+
+static void set_report_filterlist_whitelist(bool whitelist)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&report_filterlist_lock, flags);
+       report_filterlist.whitelist = whitelist;
+       spin_unlock_irqrestore(&report_filterlist_lock, flags);
+}
+
+/* Returns 0 on success, error-code otherwise. */
+static ssize_t insert_report_filterlist(const char *func)
+{
+       unsigned long flags;
+       unsigned long addr = kallsyms_lookup_name(func);
+       ssize_t ret = 0;
+
+       if (!addr) {
+               pr_err("KCSAN: could not find function: '%s'\n", func);
+               return -ENOENT;
+       }
+
+       spin_lock_irqsave(&report_filterlist_lock, flags);
+
+       if (report_filterlist.addrs == NULL) {
+               /* initial allocation */
+               report_filterlist.addrs =
+                       kmalloc_array(report_filterlist.size,
+                                     sizeof(unsigned long), GFP_ATOMIC);
+               if (report_filterlist.addrs == NULL) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       } else if (report_filterlist.used == report_filterlist.size) {
+               /* resize filterlist */
+               size_t new_size = report_filterlist.size * 2;
+               unsigned long *new_addrs =
+                       krealloc(report_filterlist.addrs,
+                                new_size * sizeof(unsigned long), GFP_ATOMIC);
+
+               if (new_addrs == NULL) {
+                       /* leave filterlist itself untouched */
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               report_filterlist.size = new_size;
+               report_filterlist.addrs = new_addrs;
+       }
+
+       /* Note: deduplicating should be done in userspace. */
+       report_filterlist.addrs[report_filterlist.used++] =
+               kallsyms_lookup_name(func);
+       report_filterlist.sorted = false;
+
+out:
+       spin_unlock_irqrestore(&report_filterlist_lock, flags);
+
+       return ret;
+}
+
+static int show_info(struct seq_file *file, void *v)
+{
+       int i;
+       unsigned long flags;
+
+       /* show stats */
+       seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled));
+       for (i = 0; i < KCSAN_COUNTER_COUNT; ++i)
+               seq_printf(file, "%s: %ld\n", counter_to_name(i),
+                          atomic_long_read(&counters[i]));
+
+       /* show filter functions, and filter type */
+       spin_lock_irqsave(&report_filterlist_lock, flags);
+       seq_printf(file, "\n%s functions: %s\n",
+                  report_filterlist.whitelist ? "whitelisted" : "blacklisted",
+                  report_filterlist.used == 0 ? "none" : "");
+       for (i = 0; i < report_filterlist.used; ++i)
+               seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
+       spin_unlock_irqrestore(&report_filterlist_lock, flags);
+
+       return 0;
+}
+
+static int debugfs_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_info, NULL);
+}
+
+static ssize_t
+debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
+{
+       char kbuf[KSYM_NAME_LEN];
+       char *arg;
+       int read_len = count < (sizeof(kbuf) - 1) ? count : (sizeof(kbuf) - 1);
+
+       if (copy_from_user(kbuf, buf, read_len))
+               return -EFAULT;
+       kbuf[read_len] = '\0';
+       arg = strstrip(kbuf);
+
+       if (!strcmp(arg, "on")) {
+               WRITE_ONCE(kcsan_enabled, true);
+       } else if (!strcmp(arg, "off")) {
+               WRITE_ONCE(kcsan_enabled, false);
+       } else if (!strncmp(arg, "microbench=", sizeof("microbench=") - 1)) {
+               unsigned long iters;
+
+               if (kstrtoul(&arg[sizeof("microbench=") - 1], 0, &iters))
+                       return -EINVAL;
+               microbenchmark(iters);
+       } else if (!strncmp(arg, "test=", sizeof("test=") - 1)) {
+               unsigned long iters;
+
+               if (kstrtoul(&arg[sizeof("test=") - 1], 0, &iters))
+                       return -EINVAL;
+               test_thread(iters);
+       } else if (!strcmp(arg, "whitelist")) {
+               set_report_filterlist_whitelist(true);
+       } else if (!strcmp(arg, "blacklist")) {
+               set_report_filterlist_whitelist(false);
+       } else if (arg[0] == '!') {
+               ssize_t ret = insert_report_filterlist(&arg[1]);
+
+               if (ret < 0)
+                       return ret;
+       } else {
+               return -EINVAL;
+       }
+
+       return count;
+}
+
+static const struct file_operations debugfs_ops =
+{
+       .read    = seq_read,
+       .open    = debugfs_open,
+       .write   = debugfs_write,
+       .release = single_release
+};
+
+void __init kcsan_debugfs_init(void)
+{
+       debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops);
+}
diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
new file mode 100644 (file)
index 0000000..f03562a
--- /dev/null
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _KERNEL_KCSAN_ENCODING_H
+#define _KERNEL_KCSAN_ENCODING_H
+
+#include <linux/bits.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+
+#include "kcsan.h"
+
+#define SLOT_RANGE PAGE_SIZE
+
+#define INVALID_WATCHPOINT  0
+#define CONSUMED_WATCHPOINT 1
+
+/*
+ * The maximum useful size of accesses for which we set up watchpoints is the
+ * max range of slots we check on an access.
+ */
+#define MAX_ENCODABLE_SIZE (SLOT_RANGE * (1 + KCSAN_CHECK_ADJACENT))
+
+/*
+ * Number of bits we use to store size info.
+ */
+#define WATCHPOINT_SIZE_BITS bits_per(MAX_ENCODABLE_SIZE)
+/*
+ * This encoding for addresses discards the upper (1 for is-write + SIZE_BITS);
+ * however, most 64-bit architectures do not use the full 64-bit address space.
+ * Also, in order for a false positive to be observable 2 things need to happen:
+ *
+ *     1. different addresses but with the same encoded address race;
+ *     2. and both map onto the same watchpoint slots;
+ *
+ * Both these are assumed to be very unlikely. However, in case it still happens
+ * happens, the report logic will filter out the false positive (see report.c).
+ */
+#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS)
+
+/*
+ * Masks to set/retrieve the encoded data.
+ */
+#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1)
+#define WATCHPOINT_SIZE_MASK                                                   \
+       GENMASK(BITS_PER_LONG-2, BITS_PER_LONG-2 - WATCHPOINT_SIZE_BITS)
+#define WATCHPOINT_ADDR_MASK                                                   \
+       GENMASK(BITS_PER_LONG-3 - WATCHPOINT_SIZE_BITS, 0)
+
+static inline bool check_encodable(unsigned long addr, size_t size)
+{
+       return size <= MAX_ENCODABLE_SIZE;
+}
+
+static inline long
+encode_watchpoint(unsigned long addr, size_t size, bool is_write)
+{
+       return (long)((is_write ? WATCHPOINT_WRITE_MASK : 0) |
+                     (size << WATCHPOINT_ADDR_BITS) |
+                     (addr & WATCHPOINT_ADDR_MASK));
+}
+
+static __always_inline bool decode_watchpoint(long watchpoint,
+                                             unsigned long *addr_masked,
+                                             size_t *size,
+                                             bool *is_write)
+{
+       if (watchpoint == INVALID_WATCHPOINT ||
+           watchpoint == CONSUMED_WATCHPOINT)
+               return false;
+
+       *addr_masked =    (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK;
+       *size        =   ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> WATCHPOINT_ADDR_BITS;
+       *is_write    = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK);
+
+       return true;
+}
+
+/*
+ * Return watchpoint slot for an address.
+ */
+static __always_inline int watchpoint_slot(unsigned long addr)
+{
+       return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
+}
+
+static __always_inline bool matching_access(unsigned long addr1, size_t size1,
+                                           unsigned long addr2, size_t size2)
+{
+       unsigned long end_range1 = addr1 + size1 - 1;
+       unsigned long end_range2 = addr2 + size2 - 1;
+
+       return addr1 <= end_range2 && addr2 <= end_range1;
+}
+
+#endif /* _KERNEL_KCSAN_ENCODING_H */
diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h
new file mode 100644 (file)
index 0000000..763d6d0
--- /dev/null
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * The Kernel Concurrency Sanitizer (KCSAN) infrastructure. For more info please
+ * see Documentation/dev-tools/kcsan.rst.
+ */
+
+#ifndef _KERNEL_KCSAN_KCSAN_H
+#define _KERNEL_KCSAN_KCSAN_H
+
+#include <linux/kcsan.h>
+
+/* The number of adjacent watchpoints to check. */
+#define KCSAN_CHECK_ADJACENT 1
+#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT)
+
+extern unsigned int kcsan_udelay_task;
+extern unsigned int kcsan_udelay_interrupt;
+
+/*
+ * Globally enable and disable KCSAN.
+ */
+extern bool kcsan_enabled;
+
+/*
+ * Initialize debugfs file.
+ */
+void kcsan_debugfs_init(void);
+
+enum kcsan_counter_id {
+       /*
+        * Number of watchpoints currently in use.
+        */
+       KCSAN_COUNTER_USED_WATCHPOINTS,
+
+       /*
+        * Total number of watchpoints set up.
+        */
+       KCSAN_COUNTER_SETUP_WATCHPOINTS,
+
+       /*
+        * Total number of data races.
+        */
+       KCSAN_COUNTER_DATA_RACES,
+
+       /*
+        * Total number of ASSERT failures due to races. If the observed race is
+        * due to two conflicting ASSERT type accesses, then both will be
+        * counted.
+        */
+       KCSAN_COUNTER_ASSERT_FAILURES,
+
+       /*
+        * Number of times no watchpoints were available.
+        */
+       KCSAN_COUNTER_NO_CAPACITY,
+
+       /*
+        * A thread checking a watchpoint raced with another checking thread;
+        * only one will be reported.
+        */
+       KCSAN_COUNTER_REPORT_RACES,
+
+       /*
+        * Observed data value change, but writer thread unknown.
+        */
+       KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN,
+
+       /*
+        * The access cannot be encoded to a valid watchpoint.
+        */
+       KCSAN_COUNTER_UNENCODABLE_ACCESSES,
+
+       /*
+        * Watchpoint encoding caused a watchpoint to fire on mismatching
+        * accesses.
+        */
+       KCSAN_COUNTER_ENCODING_FALSE_POSITIVES,
+
+       KCSAN_COUNTER_COUNT, /* number of counters */
+};
+
+/*
+ * Increment/decrement counter with given id; avoid calling these in fast-path.
+ */
+extern void kcsan_counter_inc(enum kcsan_counter_id id);
+extern void kcsan_counter_dec(enum kcsan_counter_id id);
+
+/*
+ * Returns true if data races in the function symbol that maps to func_addr
+ * (offsets are ignored) should *not* be reported.
+ */
+extern bool kcsan_skip_report_debugfs(unsigned long func_addr);
+
+/*
+ * Value-change states.
+ */
+enum kcsan_value_change {
+       /*
+        * Did not observe a value-change, however, it is valid to report the
+        * race, depending on preferences.
+        */
+       KCSAN_VALUE_CHANGE_MAYBE,
+
+       /*
+        * Did not observe a value-change, and it is invalid to report the race.
+        */
+       KCSAN_VALUE_CHANGE_FALSE,
+
+       /*
+        * The value was observed to change, and the race should be reported.
+        */
+       KCSAN_VALUE_CHANGE_TRUE,
+};
+
+enum kcsan_report_type {
+       /*
+        * The thread that set up the watchpoint and briefly stalled was
+        * signalled that another thread triggered the watchpoint.
+        */
+       KCSAN_REPORT_RACE_SIGNAL,
+
+       /*
+        * A thread found and consumed a matching watchpoint.
+        */
+       KCSAN_REPORT_CONSUMED_WATCHPOINT,
+
+       /*
+        * No other thread was observed to race with the access, but the data
+        * value before and after the stall differs.
+        */
+       KCSAN_REPORT_RACE_UNKNOWN_ORIGIN,
+};
+
+/*
+ * Print a race report from thread that encountered the race.
+ */
+extern void kcsan_report(const volatile void *ptr, size_t size, int access_type,
+                        enum kcsan_value_change value_change,
+                        enum kcsan_report_type type, int watchpoint_idx);
+
+#endif /* _KERNEL_KCSAN_KCSAN_H */
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
new file mode 100644 (file)
index 0000000..ac5f834
--- /dev/null
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/debug_locks.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/preempt.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/stacktrace.h>
+
+#include "kcsan.h"
+#include "encoding.h"
+
+/*
+ * Max. number of stack entries to show in the report.
+ */
+#define NUM_STACK_ENTRIES 64
+
+/* Common access info. */
+struct access_info {
+       const volatile void     *ptr;
+       size_t                  size;
+       int                     access_type;
+       int                     task_pid;
+       int                     cpu_id;
+};
+
+/*
+ * Other thread info: communicated from other racing thread to thread that set
+ * up the watchpoint, which then prints the complete report atomically.
+ */
+struct other_info {
+       struct access_info      ai;
+       unsigned long           stack_entries[NUM_STACK_ENTRIES];
+       int                     num_stack_entries;
+
+       /*
+        * Optionally pass @current. Typically we do not need to pass @current
+        * via @other_info since just @task_pid is sufficient. Passing @current
+        * has additional overhead.
+        *
+        * To safely pass @current, we must either use get_task_struct/
+        * put_task_struct, or stall the thread that populated @other_info.
+        *
+        * We cannot rely on get_task_struct/put_task_struct in case
+        * release_report() races with a task being released, and would have to
+        * free it in release_report(). This may result in deadlock if we want
+        * to use KCSAN on the allocators.
+        *
+        * Since we also want to reliably print held locks for
+        * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
+        * that populated @other_info until it has been consumed.
+        */
+       struct task_struct      *task;
+};
+
+/*
+ * To never block any producers of struct other_info, we need as many elements
+ * as we have watchpoints (upper bound on concurrent races to report).
+ */
+static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
+
+/*
+ * Information about reported races; used to rate limit reporting.
+ */
+struct report_time {
+       /*
+        * The last time the race was reported.
+        */
+       unsigned long time;
+
+       /*
+        * The frames of the 2 threads; if only 1 thread is known, one frame
+        * will be 0.
+        */
+       unsigned long frame1;
+       unsigned long frame2;
+};
+
+/*
+ * Since we also want to be able to debug allocators with KCSAN, to avoid
+ * deadlock, report_times cannot be dynamically resized with krealloc in
+ * rate_limit_report.
+ *
+ * Therefore, we use a fixed-size array, which at most will occupy a page. This
+ * still adequately rate limits reports, assuming that a) number of unique data
+ * races is not excessive, and b) occurrence of unique races within the
+ * same time window is limited.
+ */
+#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
+#define REPORT_TIMES_SIZE                                                      \
+       (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
+                REPORT_TIMES_MAX :                                            \
+                CONFIG_KCSAN_REPORT_ONCE_IN_MS)
+static struct report_time report_times[REPORT_TIMES_SIZE];
+
+/*
+ * Spinlock serializing report generation, and access to @other_infos. Although
+ * it could make sense to have a finer-grained locking story for @other_infos,
+ * report generation needs to be serialized either way, so not much is gained.
+ */
+static DEFINE_RAW_SPINLOCK(report_lock);
+
+/*
+ * Checks if the race identified by thread frames frame1 and frame2 has
+ * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
+ */
+static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
+{
+       struct report_time *use_entry = &report_times[0];
+       unsigned long invalid_before;
+       int i;
+
+       BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
+
+       if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
+               return false;
+
+       invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS);
+
+       /* Check if a matching race report exists. */
+       for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
+               struct report_time *rt = &report_times[i];
+
+               /*
+                * Must always select an entry for use to store info as we
+                * cannot resize report_times; at the end of the scan, use_entry
+                * will be the oldest entry, which ideally also happened before
+                * KCSAN_REPORT_ONCE_IN_MS ago.
+                */
+               if (time_before(rt->time, use_entry->time))
+                       use_entry = rt;
+
+               /*
+                * Initially, no need to check any further as this entry as well
+                * as following entries have never been used.
+                */
+               if (rt->time == 0)
+                       break;
+
+               /* Check if entry expired. */
+               if (time_before(rt->time, invalid_before))
+                       continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
+
+               /* Reported recently, check if race matches. */
+               if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
+                   (rt->frame1 == frame2 && rt->frame2 == frame1))
+                       return true;
+       }
+
+       use_entry->time = jiffies;
+       use_entry->frame1 = frame1;
+       use_entry->frame2 = frame2;
+       return false;
+}
+
+/*
+ * Special rules to skip reporting.
+ */
+static bool
+skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
+{
+       /* Should never get here if value_change==FALSE. */
+       WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE);
+
+       /*
+        * The first call to skip_report always has value_change==TRUE, since we
+        * cannot know the value written of an instrumented access. For the 2nd
+        * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
+        *
+        * 1. read watchpoint, conflicting write (value_change==TRUE): report;
+        * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
+        * 3. write watchpoint, conflicting write (value_change==TRUE): report;
+        * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
+        * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
+        * 6. write watchpoint, conflicting read (value_change==TRUE): report;
+        *
+        * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
+        * data races where the write may have rewritten the same value; case 6
+        * is possible either if the size is larger than what we check value
+        * changes for or the access type is KCSAN_ACCESS_ASSERT.
+        */
+       if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) &&
+           value_change == KCSAN_VALUE_CHANGE_MAYBE) {
+               /*
+                * The access is a write, but the data value did not change.
+                *
+                * We opt-out of this filter for certain functions at request of
+                * maintainers.
+                */
+               char buf[64];
+               int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
+
+               if (!strnstr(buf, "rcu_", len) &&
+                   !strnstr(buf, "_rcu", len) &&
+                   !strnstr(buf, "_srcu", len))
+                       return true;
+       }
+
+       return kcsan_skip_report_debugfs(top_frame);
+}
+
+static const char *get_access_type(int type)
+{
+       if (type & KCSAN_ACCESS_ASSERT) {
+               if (type & KCSAN_ACCESS_SCOPED) {
+                       if (type & KCSAN_ACCESS_WRITE)
+                               return "assert no accesses (scoped)";
+                       else
+                               return "assert no writes (scoped)";
+               } else {
+                       if (type & KCSAN_ACCESS_WRITE)
+                               return "assert no accesses";
+                       else
+                               return "assert no writes";
+               }
+       }
+
+       switch (type) {
+       case 0:
+               return "read";
+       case KCSAN_ACCESS_ATOMIC:
+               return "read (marked)";
+       case KCSAN_ACCESS_WRITE:
+               return "write";
+       case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
+               return "write (marked)";
+       case KCSAN_ACCESS_SCOPED:
+               return "read (scoped)";
+       case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
+               return "read (marked, scoped)";
+       case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
+               return "write (scoped)";
+       case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
+               return "write (marked, scoped)";
+       default:
+               BUG();
+       }
+}
+
+static const char *get_bug_type(int type)
+{
+       return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race";
+}
+
+/* Return thread description: in task or interrupt. */
+static const char *get_thread_desc(int task_id)
+{
+       if (task_id != -1) {
+               static char buf[32]; /* safe: protected by report_lock */
+
+               snprintf(buf, sizeof(buf), "task %i", task_id);
+               return buf;
+       }
+       return "interrupt";
+}
+
+/* Helper to skip KCSAN-related functions in stack-trace. */
+static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
+{
+       char buf[64];
+       char *cur;
+       int len, skip;
+
+       for (skip = 0; skip < num_entries; ++skip) {
+               len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
+
+               /* Never show tsan_* or {read,write}_once_size. */
+               if (strnstr(buf, "tsan_", len) ||
+                   strnstr(buf, "_once_size", len))
+                       continue;
+
+               cur = strnstr(buf, "kcsan_", len);
+               if (cur) {
+                       cur += sizeof("kcsan_") - 1;
+                       if (strncmp(cur, "test", sizeof("test") - 1))
+                               continue; /* KCSAN runtime function. */
+                       /* KCSAN related test. */
+               }
+
+               /*
+                * No match for runtime functions -- @skip entries to skip to
+                * get to first frame of interest.
+                */
+               break;
+       }
+
+       return skip;
+}
+
+/* Compares symbolized strings of addr1 and addr2. */
+static int sym_strcmp(void *addr1, void *addr2)
+{
+       char buf1[64];
+       char buf2[64];
+
+       snprintf(buf1, sizeof(buf1), "%pS", addr1);
+       snprintf(buf2, sizeof(buf2), "%pS", addr2);
+
+       return strncmp(buf1, buf2, sizeof(buf1));
+}
+
+static void print_verbose_info(struct task_struct *task)
+{
+       if (!task)
+               return;
+
+       pr_err("\n");
+       debug_show_held_locks(task);
+       print_irqtrace_events(task);
+}
+
+/*
+ * Returns true if a report was generated, false otherwise.
+ */
+static bool print_report(enum kcsan_value_change value_change,
+                        enum kcsan_report_type type,
+                        const struct access_info *ai,
+                        const struct other_info *other_info)
+{
+       unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
+       int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
+       int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
+       unsigned long this_frame = stack_entries[skipnr];
+       unsigned long other_frame = 0;
+       int other_skipnr = 0; /* silence uninit warnings */
+
+       /*
+        * Must check report filter rules before starting to print.
+        */
+       if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr]))
+               return false;
+
+       if (type == KCSAN_REPORT_RACE_SIGNAL) {
+               other_skipnr = get_stack_skipnr(other_info->stack_entries,
+                                               other_info->num_stack_entries);
+               other_frame = other_info->stack_entries[other_skipnr];
+
+               /* @value_change is only known for the other thread */
+               if (skip_report(value_change, other_frame))
+                       return false;
+       }
+
+       if (rate_limit_report(this_frame, other_frame))
+               return false;
+
+       /* Print report header. */
+       pr_err("==================================================================\n");
+       switch (type) {
+       case KCSAN_REPORT_RACE_SIGNAL: {
+               int cmp;
+
+               /*
+                * Order functions lexographically for consistent bug titles.
+                * Do not print offset of functions to keep title short.
+                */
+               cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
+               pr_err("BUG: KCSAN: %s in %ps / %ps\n",
+                      get_bug_type(ai->access_type | other_info->ai.access_type),
+                      (void *)(cmp < 0 ? other_frame : this_frame),
+                      (void *)(cmp < 0 ? this_frame : other_frame));
+       } break;
+
+       case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
+               pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
+                      (void *)this_frame);
+               break;
+
+       default:
+               BUG();
+       }
+
+       pr_err("\n");
+
+       /* Print information about the racing accesses. */
+       switch (type) {
+       case KCSAN_REPORT_RACE_SIGNAL:
+               pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
+                      get_access_type(other_info->ai.access_type), other_info->ai.ptr,
+                      other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
+                      other_info->ai.cpu_id);
+
+               /* Print the other thread's stack trace. */
+               stack_trace_print(other_info->stack_entries + other_skipnr,
+                                 other_info->num_stack_entries - other_skipnr,
+                                 0);
+
+               if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+                       print_verbose_info(other_info->task);
+
+               pr_err("\n");
+               pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
+                      get_access_type(ai->access_type), ai->ptr, ai->size,
+                      get_thread_desc(ai->task_pid), ai->cpu_id);
+               break;
+
+       case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
+               pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
+                      get_access_type(ai->access_type), ai->ptr, ai->size,
+                      get_thread_desc(ai->task_pid), ai->cpu_id);
+               break;
+
+       default:
+               BUG();
+       }
+       /* Print stack trace of this thread. */
+       stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
+                         0);
+
+       if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+               print_verbose_info(current);
+
+       /* Print report footer. */
+       pr_err("\n");
+       pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
+       dump_stack_print_info(KERN_DEFAULT);
+       pr_err("==================================================================\n");
+
+       return true;
+}
+
+static void release_report(unsigned long *flags, struct other_info *other_info)
+{
+       if (other_info)
+               /*
+                * Use size to denote valid/invalid, since KCSAN entirely
+                * ignores 0-sized accesses.
+                */
+               other_info->ai.size = 0;
+
+       raw_spin_unlock_irqrestore(&report_lock, *flags);
+}
+
+/*
+ * Sets @other_info->task and awaits consumption of @other_info.
+ *
+ * Precondition: report_lock is held.
+ * Postcondition: report_lock is held.
+ */
+static void set_other_info_task_blocking(unsigned long *flags,
+                                        const struct access_info *ai,
+                                        struct other_info *other_info)
+{
+       /*
+        * We may be instrumenting a code-path where current->state is already
+        * something other than TASK_RUNNING.
+        */
+       const bool is_running = current->state == TASK_RUNNING;
+       /*
+        * To avoid deadlock in case we are in an interrupt here and this is a
+        * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
+        * timeout to ensure this works in all contexts.
+        *
+        * Await approximately the worst case delay of the reporting thread (if
+        * we are not interrupted).
+        */
+       int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
+
+       other_info->task = current;
+       do {
+               if (is_running) {
+                       /*
+                        * Let lockdep know the real task is sleeping, to print
+                        * the held locks (recall we turned lockdep off, so
+                        * locking/unlocking @report_lock won't be recorded).
+                        */
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+               }
+               raw_spin_unlock_irqrestore(&report_lock, *flags);
+               /*
+                * We cannot call schedule() since we also cannot reliably
+                * determine if sleeping here is permitted -- see in_atomic().
+                */
+
+               udelay(1);
+               raw_spin_lock_irqsave(&report_lock, *flags);
+               if (timeout-- < 0) {
+                       /*
+                        * Abort. Reset @other_info->task to NULL, since it
+                        * appears the other thread is still going to consume
+                        * it. It will result in no verbose info printed for
+                        * this task.
+                        */
+                       other_info->task = NULL;
+                       break;
+               }
+               /*
+                * If invalid, or @ptr nor @current matches, then @other_info
+                * has been consumed and we may continue. If not, retry.
+                */
+       } while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
+                other_info->task == current);
+       if (is_running)
+               set_current_state(TASK_RUNNING);
+}
+
+/* Populate @other_info; requires that the provided @other_info not in use. */
+static void prepare_report_producer(unsigned long *flags,
+                                   const struct access_info *ai,
+                                   struct other_info *other_info)
+{
+       raw_spin_lock_irqsave(&report_lock, *flags);
+
+       /*
+        * The same @other_infos entry cannot be used concurrently, because
+        * there is a one-to-one mapping to watchpoint slots (@watchpoints in
+        * core.c), and a watchpoint is only released for reuse after reporting
+        * is done by the consumer of @other_info. Therefore, it is impossible
+        * for another concurrent prepare_report_producer() to set the same
+        * @other_info, and are guaranteed exclusivity for the @other_infos
+        * entry pointed to by @other_info.
+        *
+        * To check this property holds, size should never be non-zero here,
+        * because every consumer of struct other_info resets size to 0 in
+        * release_report().
+        */
+       WARN_ON(other_info->ai.size);
+
+       other_info->ai = *ai;
+       other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
+
+       if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
+               set_other_info_task_blocking(flags, ai, other_info);
+
+       raw_spin_unlock_irqrestore(&report_lock, *flags);
+}
+
+/* Awaits producer to fill @other_info and then returns. */
+static bool prepare_report_consumer(unsigned long *flags,
+                                   const struct access_info *ai,
+                                   struct other_info *other_info)
+{
+
+       raw_spin_lock_irqsave(&report_lock, *flags);
+       while (!other_info->ai.size) { /* Await valid @other_info. */
+               raw_spin_unlock_irqrestore(&report_lock, *flags);
+               cpu_relax();
+               raw_spin_lock_irqsave(&report_lock, *flags);
+       }
+
+       /* Should always have a matching access based on watchpoint encoding. */
+       if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
+                                    (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
+               goto discard;
+
+       if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
+                            (unsigned long)ai->ptr, ai->size)) {
+               /*
+                * If the actual accesses to not match, this was a false
+                * positive due to watchpoint encoding.
+                */
+               kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
+               goto discard;
+       }
+
+       return true;
+
+discard:
+       release_report(flags, other_info);
+       return false;
+}
+
+/*
+ * Depending on the report type either sets @other_info and returns false, or
+ * awaits @other_info and returns true. If @other_info is not required for the
+ * report type, simply acquires @report_lock and returns true.
+ */
+static noinline bool prepare_report(unsigned long *flags,
+                                   enum kcsan_report_type type,
+                                   const struct access_info *ai,
+                                   struct other_info *other_info)
+{
+       switch (type) {
+       case KCSAN_REPORT_CONSUMED_WATCHPOINT:
+               prepare_report_producer(flags, ai, other_info);
+               return false;
+       case KCSAN_REPORT_RACE_SIGNAL:
+               return prepare_report_consumer(flags, ai, other_info);
+       default:
+               /* @other_info not required; just acquire @report_lock. */
+               raw_spin_lock_irqsave(&report_lock, *flags);
+               return true;
+       }
+}
+
+void kcsan_report(const volatile void *ptr, size_t size, int access_type,
+                 enum kcsan_value_change value_change,
+                 enum kcsan_report_type type, int watchpoint_idx)
+{
+       unsigned long flags = 0;
+       const struct access_info ai = {
+               .ptr            = ptr,
+               .size           = size,
+               .access_type    = access_type,
+               .task_pid       = in_task() ? task_pid_nr(current) : -1,
+               .cpu_id         = raw_smp_processor_id()
+       };
+       struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
+                                       ? NULL : &other_infos[watchpoint_idx];
+
+       kcsan_disable_current();
+       if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
+               goto out;
+
+       /*
+        * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
+        * we do not turn off lockdep here; this could happen due to recursion
+        * into lockdep via KCSAN if we detect a race in utilities used by
+        * lockdep.
+        */
+       lockdep_off();
+
+       if (prepare_report(&flags, type, &ai, other_info)) {
+               /*
+                * Never report if value_change is FALSE, only if we it is
+                * either TRUE or MAYBE. In case of MAYBE, further filtering may
+                * be done once we know the full stack trace in print_report().
+                */
+               bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
+                               print_report(value_change, type, &ai, other_info);
+
+               if (reported && panic_on_warn)
+                       panic("panic_on_warn set ...\n");
+
+               release_report(&flags, other_info);
+       }
+
+       lockdep_on();
+out:
+       kcsan_enable_current();
+}
diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c
new file mode 100644 (file)
index 0000000..d26a052
--- /dev/null
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/types.h>
+
+#include "encoding.h"
+
+#define ITERS_PER_TEST 2000
+
+/* Test requirements. */
+static bool test_requires(void)
+{
+       /* random should be initialized for the below tests */
+       return prandom_u32() + prandom_u32() != 0;
+}
+
+/*
+ * Test watchpoint encode and decode: check that encoding some access's info,
+ * and then subsequent decode preserves the access's info.
+ */
+static bool test_encode_decode(void)
+{
+       int i;
+
+       for (i = 0; i < ITERS_PER_TEST; ++i) {
+               size_t size = prandom_u32_max(MAX_ENCODABLE_SIZE) + 1;
+               bool is_write = !!prandom_u32_max(2);
+               unsigned long addr;
+
+               prandom_bytes(&addr, sizeof(addr));
+               if (WARN_ON(!check_encodable(addr, size)))
+                       return false;
+
+               /* Encode and decode */
+               {
+                       const long encoded_watchpoint =
+                               encode_watchpoint(addr, size, is_write);
+                       unsigned long verif_masked_addr;
+                       size_t verif_size;
+                       bool verif_is_write;
+
+                       /* Check special watchpoints */
+                       if (WARN_ON(decode_watchpoint(
+                                   INVALID_WATCHPOINT, &verif_masked_addr,
+                                   &verif_size, &verif_is_write)))
+                               return false;
+                       if (WARN_ON(decode_watchpoint(
+                                   CONSUMED_WATCHPOINT, &verif_masked_addr,
+                                   &verif_size, &verif_is_write)))
+                               return false;
+
+                       /* Check decoding watchpoint returns same data */
+                       if (WARN_ON(!decode_watchpoint(
+                                   encoded_watchpoint, &verif_masked_addr,
+                                   &verif_size, &verif_is_write)))
+                               return false;
+                       if (WARN_ON(verif_masked_addr !=
+                                   (addr & WATCHPOINT_ADDR_MASK)))
+                               goto fail;
+                       if (WARN_ON(verif_size != size))
+                               goto fail;
+                       if (WARN_ON(is_write != verif_is_write))
+                               goto fail;
+
+                       continue;
+fail:
+                       pr_err("%s fail: %s %zu bytes @ %lx -> encoded: %lx -> %s %zu bytes @ %lx\n",
+                              __func__, is_write ? "write" : "read", size,
+                              addr, encoded_watchpoint,
+                              verif_is_write ? "write" : "read", verif_size,
+                              verif_masked_addr);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+/* Test access matching function. */
+static bool test_matching_access(void)
+{
+       if (WARN_ON(!matching_access(10, 1, 10, 1)))
+               return false;
+       if (WARN_ON(!matching_access(10, 2, 11, 1)))
+               return false;
+       if (WARN_ON(!matching_access(10, 1, 9, 2)))
+               return false;
+       if (WARN_ON(matching_access(10, 1, 11, 1)))
+               return false;
+       if (WARN_ON(matching_access(9, 1, 10, 1)))
+               return false;
+
+       /*
+        * An access of size 0 could match another access, as demonstrated here.
+        * Rather than add more comparisons to 'matching_access()', which would
+        * end up in the fast-path for *all* checks, check_access() simply
+        * returns for all accesses of size 0.
+        */
+       if (WARN_ON(!matching_access(8, 8, 12, 0)))
+               return false;
+
+       return true;
+}
+
+static int __init kcsan_selftest(void)
+{
+       int passed = 0;
+       int total = 0;
+
+#define RUN_TEST(do_test)                                                      \
+       do {                                                                   \
+               ++total;                                                       \
+               if (do_test())                                                 \
+                       ++passed;                                              \
+               else                                                           \
+                       pr_err("KCSAN selftest: " #do_test " failed");         \
+       } while (0)
+
+       RUN_TEST(test_requires);
+       RUN_TEST(test_encode_decode);
+       RUN_TEST(test_matching_access);
+
+       pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total);
+       if (passed != total)
+               panic("KCSAN selftests failed");
+       return 0;
+}
+postcore_initcall(kcsan_selftest);
index b84fc7e..8e3d2d7 100644 (file)
@@ -1,13 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Kernel thread helper functions.
  *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
+ *   Copyright (C) 2009 Red Hat, Inc.
  *
  * Creation is done via kthreadd, so that we get a clean environment
  * even if we're invoked from userspace (think modprobe, hotplug cpu,
  * etc.).
  */
 #include <uapi/linux/sched/types.h>
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
@@ -25,6 +29,7 @@
 #include <linux/numa.h>
 #include <trace/events/sched.h>
 
+
 static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
@@ -48,6 +53,7 @@ struct kthread {
        unsigned int cpu;
        int (*threadfn)(void *);
        void *data;
+       mm_segment_t oldfs;
        struct completion parked;
        struct completion exited;
 #ifdef CONFIG_BLK_CGROUP
@@ -1220,6 +1226,61 @@ void kthread_destroy_worker(struct kthread_worker *worker)
 }
 EXPORT_SYMBOL(kthread_destroy_worker);
 
+/**
+ * kthread_use_mm - make the calling kthread operate on an address space
+ * @mm: address space to operate on
+ */
+void kthread_use_mm(struct mm_struct *mm)
+{
+       struct mm_struct *active_mm;
+       struct task_struct *tsk = current;
+
+       WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
+       WARN_ON_ONCE(tsk->mm);
+
+       task_lock(tsk);
+       active_mm = tsk->active_mm;
+       if (active_mm != mm) {
+               mmgrab(mm);
+               tsk->active_mm = mm;
+       }
+       tsk->mm = mm;
+       switch_mm(active_mm, mm, tsk);
+       task_unlock(tsk);
+#ifdef finish_arch_post_lock_switch
+       finish_arch_post_lock_switch();
+#endif
+
+       if (active_mm != mm)
+               mmdrop(active_mm);
+
+       to_kthread(tsk)->oldfs = get_fs();
+       set_fs(USER_DS);
+}
+EXPORT_SYMBOL_GPL(kthread_use_mm);
+
+/**
+ * kthread_unuse_mm - reverse the effect of kthread_use_mm()
+ * @mm: address space to operate on
+ */
+void kthread_unuse_mm(struct mm_struct *mm)
+{
+       struct task_struct *tsk = current;
+
+       WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
+       WARN_ON_ONCE(!tsk->mm);
+
+       set_fs(to_kthread(tsk)->oldfs);
+
+       task_lock(tsk);
+       sync_mm_rss(mm);
+       tsk->mm = NULL;
+       /* active_mm is still 'mm' */
+       enter_lazy_tlb(mm, tsk);
+       task_unlock(tsk);
+}
+EXPORT_SYMBOL_GPL(kthread_unuse_mm);
+
 #ifdef CONFIG_BLK_CGROUP
 /**
  * kthread_associate_blkcg - associate blkcg to current kthread
index 45452fa..6d11cfb 100644 (file)
@@ -5,6 +5,9 @@ KCOV_INSTRUMENT         := n
 
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
+# Avoid recursion lockdep -> KCSAN -> ... -> lockdep.
+KCSAN_SANITIZE_lockdep.o := n
+
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE)
index 21fb5a5..5fc9c9b 100644 (file)
@@ -7,6 +7,12 @@ endif
 # that is not a function of syscall inputs. E.g. involuntary context switches.
 KCOV_INSTRUMENT := n
 
+# There are numerous data races here, however, most of them are due to plain accesses.
+# This would make it even harder for syzbot to find reproducers, because these
+# bugs trigger without specific input. Disable by default, but should re-enable
+# eventually.
+KCSAN_SANITIZE := n
+
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
index 222a7a9..5d4d9bb 100644 (file)
@@ -74,7 +74,7 @@ static void scs_check_usage(struct task_struct *tsk)
        for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
                if (!READ_ONCE_NOCHECK(*p))
                        break;
-               used++;
+               used += sizeof(*p);
        }
 
        while (used > curr) {
index 7cb09c4..02441ea 100644 (file)
@@ -928,14 +928,12 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 
        clocksource_arch_init(cs);
 
-#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE
        if (cs->vdso_clock_mode < 0 ||
            cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
                pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
                        cs->name, cs->vdso_clock_mode);
                cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
        }
-#endif
 
        /* Initialize mult/shift and max_idle_ns */
        __clocksource_update_freq_scale(cs, scale, freq);
index 1d8aaa5..6575bb0 100644 (file)
@@ -6,6 +6,9 @@ ifdef CONFIG_FUNCTION_TRACER
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 
+# Avoid recursion due to instrumentation.
+KCSAN_SANITIZE := n
+
 ifdef CONFIG_FTRACE_SELFTEST
 # selftest needs instrumentation
 CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE)
index ea47f20..5773f0b 100644 (file)
@@ -885,10 +885,10 @@ static void blk_add_trace_bio_bounce(void *ignore,
 }
 
 static void blk_add_trace_bio_complete(void *ignore,
-                                      struct request_queue *q, struct bio *bio,
-                                      int error)
+                                      struct request_queue *q, struct bio *bio)
 {
-       blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
+       blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,
+                         blk_status_to_errno(bio->bi_status));
 }
 
 static void blk_add_trace_bio_backmerge(void *ignore,
@@ -995,8 +995,10 @@ static void blk_add_trace_split(void *ignore,
 
                __blk_add_trace(bt, bio->bi_iter.bi_sector,
                                bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-                               BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
-                               &rpdu, blk_trace_bio_get_cgid(q, bio));
+                               BLK_TA_SPLIT,
+                               blk_status_to_errno(bio->bi_status),
+                               sizeof(rpdu), &rpdu,
+                               blk_trace_bio_get_cgid(q, bio));
        }
        rcu_read_unlock();
 }
@@ -1033,7 +1035,8 @@ static void blk_add_trace_bio_remap(void *ignore,
        r.sector_from = cpu_to_be64(from);
 
        __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
+                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP,
+                       blk_status_to_errno(bio->bi_status),
                        sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
        rcu_read_unlock();
 }
@@ -1253,21 +1256,10 @@ static inline __u16 t_error(const struct trace_entry *ent)
 
 static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg)
 {
-       const __u64 *val = pdu_start(ent, has_cg);
+       const __be64 *val = pdu_start(ent, has_cg);
        return be64_to_cpu(*val);
 }
 
-static void get_pdu_remap(const struct trace_entry *ent,
-                         struct blk_io_trace_remap *r, bool has_cg)
-{
-       const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
-       __u64 sector_from = __r->sector_from;
-
-       r->device_from = be32_to_cpu(__r->device_from);
-       r->device_to   = be32_to_cpu(__r->device_to);
-       r->sector_from = be64_to_cpu(sector_from);
-}
-
 typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act,
        bool has_cg);
 
@@ -1407,13 +1399,13 @@ static void blk_log_with_error(struct trace_seq *s,
 
 static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
 {
-       struct blk_io_trace_remap r = { .device_from = 0, };
+       const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
 
-       get_pdu_remap(ent, &r, has_cg);
        trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
                         t_sector(ent), t_sec(ent),
-                        MAJOR(r.device_from), MINOR(r.device_from),
-                        (unsigned long long)r.sector_from);
+                        MAJOR(be32_to_cpu(__r->device_from)),
+                        MINOR(be32_to_cpu(__r->device_from)),
+                        be64_to_cpu(__r->sector_from));
 }
 
 static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
index cb98741..ef675be 100644 (file)
@@ -1570,6 +1570,8 @@ config PROVIDE_OHCI1394_DMA_INIT
 
 source "samples/Kconfig"
 
+source "lib/Kconfig.kcsan"
+
 config ARCH_HAS_DEVMEM_IS_ALLOWED
        bool
 
@@ -2052,15 +2054,15 @@ config TEST_LKM
          If unsure, say N.
 
 config TEST_BITOPS
-       tristate "Test module for compilation of clear_bit/set_bit operations"
+       tristate "Test module for compilation of bitops operations"
        depends on m
        help
          This builds the "test_bitops" module that is much like the
          TEST_LKM module except that it does a basic exercise of the
-         clear_bit and set_bit macros to make sure there are no compiler
-         warnings from C=1 sparse checker or -Wextra compilations. It has
-         no dependencies and doesn't run or load unless explicitly requested
-         by name.  for example: modprobe test_bitops.
+         set/clear_bit macros and get_count_order/long to make sure there are
+         no compiler warnings from C=1 sparse checker or -Wextra
+         compilations. It has no dependencies and doesn't run or load unless
+         explicitly requested by name.  for example: modprobe test_bitops.
 
          If unsure, say N.
 
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
new file mode 100644 (file)
index 0000000..5ee88e5
--- /dev/null
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config HAVE_ARCH_KCSAN
+       bool
+
+config HAVE_KCSAN_COMPILER
+       def_bool CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-distinguish-volatile=1)
+       help
+         For the list of compilers that support KCSAN, please see
+         <file:Documentation/dev-tools/kcsan.rst>.
+
+config KCSAN_KCOV_BROKEN
+       def_bool KCOV && CC_HAS_SANCOV_TRACE_PC
+       depends on CC_IS_CLANG
+       depends on !$(cc-option,-Werror=unused-command-line-argument -fsanitize=thread -fsanitize-coverage=trace-pc)
+       help
+         Some versions of clang support either KCSAN and KCOV but not the
+         combination of the two.
+         See https://bugs.llvm.org/show_bug.cgi?id=45831 for the status
+         in newer releases.
+
+menuconfig KCSAN
+       bool "KCSAN: dynamic data race detector"
+       depends on HAVE_ARCH_KCSAN && HAVE_KCSAN_COMPILER
+       depends on DEBUG_KERNEL && !KASAN
+       depends on !KCSAN_KCOV_BROKEN
+       select STACKTRACE
+       help
+         The Kernel Concurrency Sanitizer (KCSAN) is a dynamic
+         data-race detector that relies on compile-time instrumentation.
+         KCSAN uses a watchpoint-based sampling approach to detect races.
+
+         While KCSAN's primary purpose is to detect data races, it
+         also provides assertions to check data access constraints.
+         These assertions can expose bugs that do not manifest as
+         data races.
+
+         See <file:Documentation/dev-tools/kcsan.rst> for more details.
+
+if KCSAN
+
+config KCSAN_VERBOSE
+       bool "Show verbose reports with more information about system state"
+       depends on PROVE_LOCKING
+       help
+         If enabled, reports show more information about the system state that
+         may help better analyze and debug races. This includes held locks and
+         IRQ trace events.
+
+         While this option should generally be benign, we call into more
+         external functions on report generation; if a race report is
+         generated from any one of them, system stability may suffer due to
+         deadlocks or recursion.  If in doubt, say N.
+
+config KCSAN_DEBUG
+       bool "Debugging of KCSAN internals"
+
+config KCSAN_SELFTEST
+       bool "Perform short selftests on boot"
+       default y
+       help
+         Run KCSAN selftests on boot. On test failure, causes the kernel to panic.
+
+config KCSAN_EARLY_ENABLE
+       bool "Early enable during boot"
+       default y
+       help
+         If KCSAN should be enabled globally as soon as possible. KCSAN can
+         later be enabled/disabled via debugfs.
+
+config KCSAN_NUM_WATCHPOINTS
+       int "Number of available watchpoints"
+       default 64
+       help
+         Total number of available watchpoints. An address range maps into a
+         specific watchpoint slot as specified in kernel/kcsan/encoding.h.
+         Although larger number of watchpoints may not be usable due to
+         limited number of CPUs, a larger value helps to improve performance
+         due to reducing cache-line contention. The chosen default is a
+         conservative value; we should almost never observe "no_capacity"
+         events (see /sys/kernel/debug/kcsan).
+
+config KCSAN_UDELAY_TASK
+       int "Delay in microseconds (for tasks)"
+       default 80
+       help
+         For tasks, the microsecond delay after setting up a watchpoint.
+
+config KCSAN_UDELAY_INTERRUPT
+       int "Delay in microseconds (for interrupts)"
+       default 20
+       help
+         For interrupts, the microsecond delay after setting up a watchpoint.
+         Interrupts have tighter latency requirements, and their delay should
+         be lower than for tasks.
+
+config KCSAN_DELAY_RANDOMIZE
+       bool "Randomize above delays"
+       default y
+       help
+         If delays should be randomized, where the maximum is KCSAN_UDELAY_*.
+         If false, the chosen delays are always the KCSAN_UDELAY_* values
+         as defined above.
+
+config KCSAN_SKIP_WATCH
+       int "Skip instructions before setting up watchpoint"
+       default 4000
+       help
+         The number of per-CPU memory operations to skip, before another
+         watchpoint is set up, i.e. one in KCSAN_WATCH_SKIP per-CPU
+         memory operations are used to set up a watchpoint. A smaller value
+         results in more aggressive race detection, whereas a larger value
+         improves system performance at the cost of missing some races.
+
+config KCSAN_SKIP_WATCH_RANDOMIZE
+       bool "Randomize watchpoint instruction skip count"
+       default y
+       help
+         If instruction skip count should be randomized, where the maximum is
+         KCSAN_WATCH_SKIP. If false, the chosen value is always
+         KCSAN_WATCH_SKIP.
+
+config KCSAN_INTERRUPT_WATCHER
+       bool "Interruptible watchers"
+       help
+         If enabled, a task that set up a watchpoint may be interrupted while
+         delayed. This option will allow KCSAN to detect races between
+         interrupted tasks and other threads of execution on the same CPU.
+
+         Currently disabled by default, because not all safe per-CPU access
+         primitives and patterns may be accounted for, and therefore could
+         result in false positives.
+
+config KCSAN_REPORT_ONCE_IN_MS
+       int "Duration in milliseconds, in which any given race is only reported once"
+       default 3000
+       help
+         Any given race is only reported once in the defined time window.
+         Different races may still generate reports within a duration that is
+         smaller than the duration defined here. This allows rate limiting
+         reporting to avoid flooding the console with reports.  Setting this
+         to 0 disables rate limiting.
+
+# The main purpose of the below options is to control reported data races (e.g.
+# in fuzzer configs), and are not expected to be switched frequently by other
+# users. We could turn some of them into boot parameters, but given they should
+# not be switched normally, let's keep them here to simplify configuration.
+#
+# The defaults below are chosen to be very conservative, and may miss certain
+# bugs.
+
+config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
+       bool "Report races of unknown origin"
+       default y
+       help
+         If KCSAN should report races where only one access is known, and the
+         conflicting access is of unknown origin. This type of race is
+         reported if it was only possible to infer a race due to a data value
+         change while an access is being delayed on a watchpoint.
+
+config KCSAN_REPORT_VALUE_CHANGE_ONLY
+       bool "Only report races where watcher observed a data value change"
+       default y
+       help
+         If enabled and a conflicting write is observed via a watchpoint, but
+         the data value of the memory location was observed to remain
+         unchanged, do not report the data race.
+
+config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
+       bool "Assume that plain aligned writes up to word size are atomic"
+       default y
+       help
+         Assume that plain aligned writes up to word size are atomic by
+         default, and also not subject to other unsafe compiler optimizations
+         resulting in data races. This will cause KCSAN to not report data
+         races due to conflicts where the only plain accesses are aligned
+         writes up to word size: conflicts between marked reads and plain
+         aligned writes up to word size will not be reported as data races;
+         notice that data races between two conflicting plain aligned writes
+         will also not be reported.
+
+config KCSAN_IGNORE_ATOMICS
+       bool "Do not instrument marked atomic accesses"
+       help
+         Never instrument marked atomic accesses. This option can be used for
+         additional filtering. Conflicting marked atomic reads and plain
+         writes will never be reported as a data race, however, will cause
+         plain reads and marked writes to result in "unknown origin" reports.
+         If combined with CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n, data
+         races where at least one access is marked atomic will never be
+         reported.
+
+         Similar to KCSAN_ASSUME_PLAIN_WRITES_ATOMIC, but including unaligned
+         accesses, conflicting marked atomic reads and plain writes will not
+         be reported as data races; however, unlike that option, data races
+         due to two conflicting plain writes will be reported (aligned and
+         unaligned, if CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n).
+
+endif # KCSAN
index 27bcc25..774315d 100644 (file)
@@ -26,9 +26,20 @@ config UBSAN_TRAP
          the system. For some system builders this is an acceptable
          trade-off.
 
+config UBSAN_KCOV_BROKEN
+       def_bool KCOV && CC_HAS_SANCOV_TRACE_PC
+       depends on CC_IS_CLANG
+       depends on !$(cc-option,-Werror=unused-command-line-argument -fsanitize=bounds -fsanitize-coverage=trace-pc)
+       help
+         Some versions of clang support either UBSAN or KCOV but not the
+         combination of the two.
+         See https://bugs.llvm.org/show_bug.cgi?id=45831 for the status
+         in newer releases.
+
 config UBSAN_BOUNDS
        bool "Perform array index bounds checking"
        default UBSAN
+       depends on !UBSAN_KCOV_BROKEN
        help
          This option enables detection of directly indexed out of bounds
          array accesses, where the array size is known at compile time.
index 315516f..b1c42c1 100644 (file)
@@ -25,6 +25,9 @@ KASAN_SANITIZE_string.o := n
 CFLAGS_string.o := $(call cc-option, -fno-stack-protector)
 endif
 
+# Used by KCSAN while enabled, avoid recursion.
+KCSAN_SANITIZE_random32.o := n
+
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
         rbtree.o radix-tree.o timerqueue.o xarray.o \
         idr.o extable.o sha1.o irq_regs.o argv_split.o \
@@ -296,6 +299,7 @@ endif
 
 UBSAN_SANITIZE_ubsan.o := n
 KASAN_SANITIZE_ubsan.o := n
+KCSAN_SANITIZE_ubsan.o := n
 CFLAGS_ubsan.o := $(call cc-option, -fno-stack-protector) $(DISABLE_STACKLEAK_PLUGIN)
 
 obj-$(CONFIG_SBITMAP) += sbitmap.o
index 21a7640..0364452 100644 (file)
@@ -741,8 +741,9 @@ int bitmap_parse(const char *start, unsigned int buflen,
        int chunks = BITS_TO_U32(nmaskbits);
        u32 *bitmap = (u32 *)maskp;
        int unset_bit;
+       int chunk;
 
-       while (1) {
+       for (chunk = 0; ; chunk++) {
                end = bitmap_find_region_reverse(start, end);
                if (start > end)
                        break;
@@ -750,7 +751,11 @@ int bitmap_parse(const char *start, unsigned int buflen,
                if (!chunks--)
                        return -EOVERFLOW;
 
-               end = bitmap_get_x32_reverse(start, end, bitmap++);
+#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
+               end = bitmap_get_x32_reverse(start, end, &bitmap[chunk ^ 1]);
+#else
+               end = bitmap_get_x32_reverse(start, end, &bitmap[chunk]);
+#endif
                if (IS_ERR(end))
                        return PTR_ERR(end);
        }
index 51595bf..bf538c2 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/splice.h>
 #include <net/checksum.h>
 #include <linux/scatterlist.h>
+#include <linux/instrumented.h>
 
 #define PIPE_PARANOIA /* for now */
 
 static int copyout(void __user *to, const void *from, size_t n)
 {
        if (access_ok(to, n)) {
-               kasan_check_read(from, n);
+               instrument_copy_to_user(to, from, n);
                n = raw_copy_to_user(to, from, n);
        }
        return n;
@@ -147,7 +148,7 @@ static int copyout(void __user *to, const void *from, size_t n)
 static int copyin(void *to, const void __user *from, size_t n)
 {
        if (access_ok(from, n)) {
-               kasan_check_write(to, n);
+               instrument_copy_from_user(to, from, n);
                n = raw_copy_from_user(to, from, n);
        }
        return n;
@@ -639,7 +640,7 @@ EXPORT_SYMBOL(_copy_to_iter);
 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
 {
        if (access_ok(to, n)) {
-               kasan_check_read(from, n);
+               instrument_copy_to_user(to, from, n);
                n = copy_to_user_mcsafe((__force void *) to, from, n);
        }
        return n;
index 0c9d3ad..5371dab 100644 (file)
@@ -141,6 +141,9 @@ static FORCE_INLINE int LZ4_decompress_generic(
                 * space in the output for those 18 bytes earlier, upon
                 * entering the shortcut (in other words, there is a
                 * combined check for both stages).
+                *
+                * The & in the likely() below is intentionally not && so that
+                * some compilers can produce better parallelized runtime code
                 */
                if ((endOnInput ? length != RUN_MASK : length <= 8)
                   /*
index 717c940..8ad5ba2 100644 (file)
@@ -268,6 +268,19 @@ m_len_done:
                                *op++ = (M4_MARKER | ((m_off >> 11) & 8)
                                                | (m_len - 2));
                        else {
+                               if (unlikely(((m_off & 0x403f) == 0x403f)
+                                               && (m_len >= 261)
+                                               && (m_len <= 264))
+                                               && likely(bitstream_version)) {
+                                       // Under lzo-rle, block copies
+                                       // for 261 <= length <= 264 and
+                                       // (distance & 0x80f3) == 0x80f3
+                                       // can result in ambiguous
+                                       // output. Adjust length
+                                       // to 260 to prevent ambiguity.
+                                       ip -= m_len - 260;
+                                       m_len = 260;
+                               }
                                m_len -= M4_MAX_LEN;
                                *op++ = (M4_MARKER | ((m_off >> 11) & 8));
                                while (unlikely(m_len > 255)) {
index fd50b3a..ced25e3 100644 (file)
@@ -9,7 +9,11 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 
-/* a tiny module only meant to test set/clear_bit */
+/* a tiny module only meant to test
+ *
+ *   set/clear_bit
+ *   get_count_order/long
+ */
 
 /* use an enum because thats the most common BITMAP usage */
 enum bitops_fun {
@@ -24,14 +28,59 @@ enum bitops_fun {
 
 static DECLARE_BITMAP(g_bitmap, BITOPS_LENGTH);
 
+static unsigned int order_comb[][2] = {
+       {0x00000003,  2},
+       {0x00000004,  2},
+       {0x00001fff, 13},
+       {0x00002000, 13},
+       {0x50000000, 31},
+       {0x80000000, 31},
+       {0x80003000, 32},
+};
+
+#ifdef CONFIG_64BIT
+static unsigned long order_comb_long[][2] = {
+       {0x0000000300000000, 34},
+       {0x0000000400000000, 34},
+       {0x00001fff00000000, 45},
+       {0x0000200000000000, 45},
+       {0x5000000000000000, 63},
+       {0x8000000000000000, 63},
+       {0x8000300000000000, 64},
+};
+#endif
+
 static int __init test_bitops_startup(void)
 {
+       int i;
+
        pr_warn("Loaded test module\n");
        set_bit(BITOPS_4, g_bitmap);
        set_bit(BITOPS_7, g_bitmap);
        set_bit(BITOPS_11, g_bitmap);
        set_bit(BITOPS_31, g_bitmap);
        set_bit(BITOPS_88, g_bitmap);
+
+       for (i = 0; i < ARRAY_SIZE(order_comb); i++) {
+               if (order_comb[i][1] != get_count_order(order_comb[i][0]))
+                       pr_warn("get_count_order wrong for %x\n",
+                                      order_comb[i][0]);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(order_comb); i++) {
+               if (order_comb[i][1] != get_count_order_long(order_comb[i][0]))
+                       pr_warn("get_count_order_long wrong for %x\n",
+                                      order_comb[i][0]);
+       }
+
+#ifdef CONFIG_64BIT
+       for (i = 0; i < ARRAY_SIZE(order_comb_long); i++) {
+               if (order_comb_long[i][1] !=
+                              get_count_order_long(order_comb_long[i][0]))
+                       pr_warn("get_count_order_long wrong for %lx\n",
+                                      order_comb_long[i][0]);
+       }
+#endif
        return 0;
 }
 
@@ -55,6 +104,6 @@ static void __exit test_bitops_unstartup(void)
 module_init(test_bitops_startup);
 module_exit(test_bitops_unstartup);
 
-MODULE_AUTHOR("Jesse Brandeburg <jesse.brandeburg@intel.com>");
+MODULE_AUTHOR("Jesse Brandeburg <jesse.brandeburg@intel.com>, Wei Yang <richard.weiyang@gmail.com>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Bit testing module");
index ca2a697..b26509f 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/uaccess.h>
 #include <linux/bitops.h>
+#include <linux/instrumented.h>
+#include <linux/uaccess.h>
 
 /* out-of-line parts */
 
@@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
        unsigned long res = n;
        might_fault();
        if (likely(access_ok(from, n))) {
-               kasan_check_write(to, n);
+               instrument_copy_from_user(to, from, n);
                res = raw_copy_from_user(to, from, n);
        }
        if (unlikely(res))
@@ -25,7 +26,7 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        might_fault();
        if (likely(access_ok(to, n))) {
-               kasan_check_read(from, n);
+               instrument_copy_to_user(to, from, n);
                n = raw_copy_to_user(to, from, n);
        }
        return n;
index a2909af..bcc9a98 100644 (file)
@@ -38,6 +38,13 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
 }
 #endif
 
+#ifndef vdso_cycles_ok
+static inline bool vdso_cycles_ok(u64 cycles)
+{
+       return true;
+}
+#endif
+
 #ifdef CONFIG_TIME_NS
 static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
                          struct __kernel_timespec *ts)
@@ -62,6 +69,8 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
                        return -1;
 
                cycles = __arch_get_hw_counter(vd->clock_mode);
+               if (unlikely(!vdso_cycles_ok(cycles)))
+                       return -1;
                ns = vdso_ts->nsec;
                last = vd->cycle_last;
                ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -130,6 +139,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
                        return -1;
 
                cycles = __arch_get_hw_counter(vd->clock_mode);
+               if (unlikely(!vdso_cycles_ok(cycles)))
+                       return -1;
                ns = vdso_ts->nsec;
                last = vd->cycle_last;
                ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -210,7 +221,7 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
        return 0;
 }
 
-static __maybe_unused int
+static __always_inline int
 __cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
                             struct __kernel_timespec *ts)
 {
index 662fd15..6e9d46b 100644 (file)
@@ -8,6 +8,14 @@ KASAN_SANITIZE_slab.o := n
 KASAN_SANITIZE_slub.o := n
 KCSAN_SANITIZE_kmemleak.o := n
 
+# These produce frequent data race reports: most of them are due to races on
+# the same word but accesses to different bits of that word. Re-enable KCSAN
+# for these when we have more consensus on what to do about them.
+KCSAN_SANITIZE_slab_common.o := n
+KCSAN_SANITIZE_slab.o := n
+KCSAN_SANITIZE_slub.o := n
+KCSAN_SANITIZE_page_alloc.o := n
+
 # These files are disabled because they produce non-interesting and/or
 # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
 # free pages, or a task is migrated between nodes.
@@ -41,7 +49,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o fadvise.o \
                           maccess.o page-writeback.o \
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
-                          mm_init.o mmu_context.o percpu.o slab_common.o \
+                          mm_init.o percpu.o slab_common.o \
                           compaction.o vmacache.o \
                           interval_tree.o list_lru.o workingset.o \
                           debug.o gup.o $(mmu-y)
index 9ec59c3..e456230 100644 (file)
@@ -60,6 +60,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
 {
        pmd_t pmd = pfn_pmd(pfn, prot);
 
+       if (!has_transparent_hugepage())
+               return;
+
        WARN_ON(!pmd_same(pmd, pmd));
        WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
        WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
@@ -79,6 +82,9 @@ static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
 {
        pud_t pud = pfn_pud(pfn, prot);
 
+       if (!has_transparent_hugepage())
+               return;
+
        WARN_ON(!pud_same(pud, pud));
        WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
        WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
index ababa36..47b8ccb 100644 (file)
@@ -212,15 +212,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
        short addr_lsb = tk->size_shift;
        int ret = 0;
 
-       if ((t->mm == current->mm) || !(flags & MF_ACTION_REQUIRED))
-               pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
+       pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
                        pfn, t->comm, t->pid);
 
        if (flags & MF_ACTION_REQUIRED) {
-               if (t->mm == current->mm)
-                       ret = force_sig_mceerr(BUS_MCEERR_AR,
+               WARN_ON_ONCE(t != current);
+               ret = force_sig_mceerr(BUS_MCEERR_AR,
                                         (void __user *)tk->addr, addr_lsb);
-               /* send no signal to non-current processes */
        } else {
                /*
                 * Don't use force here, it's convenient if the signal
@@ -402,9 +400,15 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
 {
        struct task_struct *t;
 
-       for_each_thread(tsk, t)
-               if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY))
-                       return t;
+       for_each_thread(tsk, t) {
+               if (t->flags & PF_MCE_PROCESS) {
+                       if (t->flags & PF_MCE_EARLY)
+                               return t;
+               } else {
+                       if (sysctl_memory_failure_early_kill)
+                               return t;
+               }
+       }
        return NULL;
 }
 
@@ -413,21 +417,26 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
  * to be signaled when some page under the process is hwpoisoned.
  * Return task_struct of the dedicated thread (main thread unless explicitly
  * specified) if the process is "early kill," and otherwise returns NULL.
+ *
+ * Note that the above is true for Action Optional case, but not for Action
+ * Required case where SIGBUS should sent only to the current thread.
  */
 static struct task_struct *task_early_kill(struct task_struct *tsk,
                                           int force_early)
 {
-       struct task_struct *t;
        if (!tsk->mm)
                return NULL;
-       if (force_early)
-               return tsk;
-       t = find_early_kill_thread(tsk);
-       if (t)
-               return t;
-       if (sysctl_memory_failure_early_kill)
-               return tsk;
-       return NULL;
+       if (force_early) {
+               /*
+                * Comparing ->mm here because current task might represent
+                * a subthread, while tsk always points to the main thread.
+                */
+               if (tsk->mm == current->mm)
+                       return current;
+               else
+                       return NULL;
+       }
+       return find_early_kill_thread(tsk);
 }
 
 /*
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
deleted file mode 100644 (file)
index 3e612ae..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Copyright (C) 2009 Red Hat, Inc.
- *
- * See ../COPYING for licensing terms.
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/task.h>
-#include <linux/mmu_context.h>
-#include <linux/export.h>
-
-#include <asm/mmu_context.h>
-
-/*
- * use_mm
- *     Makes the calling kernel thread take on the specified
- *     mm context.
- *     (Note: this routine is intended to be called only
- *     from a kernel thread context)
- */
-void use_mm(struct mm_struct *mm)
-{
-       struct mm_struct *active_mm;
-       struct task_struct *tsk = current;
-
-       task_lock(tsk);
-       active_mm = tsk->active_mm;
-       if (active_mm != mm) {
-               mmgrab(mm);
-               tsk->active_mm = mm;
-       }
-       tsk->mm = mm;
-       switch_mm(active_mm, mm, tsk);
-       task_unlock(tsk);
-#ifdef finish_arch_post_lock_switch
-       finish_arch_post_lock_switch();
-#endif
-
-       if (active_mm != mm)
-               mmdrop(active_mm);
-}
-EXPORT_SYMBOL_GPL(use_mm);
-
-/*
- * unuse_mm
- *     Reverses the effect of use_mm, i.e. releases the
- *     specified mm context which was earlier taken on
- *     by the calling kernel thread
- *     (Note: this routine is intended to be called only
- *     from a kernel thread context)
- */
-void unuse_mm(struct mm_struct *mm)
-{
-       struct task_struct *tsk = current;
-
-       task_lock(tsk);
-       sync_mm_rss(mm);
-       tsk->mm = NULL;
-       /* active_mm is still 'mm' */
-       enter_lazy_tlb(mm, tsk);
-       task_unlock(tsk);
-}
-EXPORT_SYMBOL_GPL(unuse_mm);
index b4e9491..6e94962 100644 (file)
@@ -126,7 +126,7 @@ static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc)
 
 /*
  * The process p may have detached its own ->mm while exiting or through
- * use_mm(), but one or more of its subthreads may still have a valid
+ * kthread_use_mm(), but one or more of its subthreads may still have a valid
  * pointer.  Return p, or any of its subthreads with a valid ->mm, with
  * task_lock() held.
  */
@@ -919,8 +919,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
                        continue;
                }
                /*
-                * No use_mm() user needs to read from the userspace so we are
-                * ok to reap it.
+                * No kthead_use_mm() user needs to read from the userspace so
+                * we are ok to reap it.
                 */
                if (unlikely(p->flags & PF_KTHREAD))
                        continue;
index d909281..01a6e66 100644 (file)
@@ -24,8 +24,8 @@
  * task's vmacache pertains to a different mm (ie, its own).  There is
  * nothing we can do here.
  *
- * Also handle the case where a kernel thread has adopted this mm via use_mm().
- * That kernel thread's vmacache is not applicable to this mm.
+ * Also handle the case where a kernel thread has adopted this mm via
+ * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm.
  */
 static inline bool vmacache_valid_mm(struct mm_struct *mm)
 {
diff --git a/scripts/Makefile.kcsan b/scripts/Makefile.kcsan
new file mode 100644 (file)
index 0000000..bd4da1a
--- /dev/null
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+ifdef CONFIG_KCSAN
+
+# GCC and Clang accept backend options differently. Do not wrap in cc-option,
+# because Clang accepts "--param" even if it is unused.
+ifdef CONFIG_CC_IS_CLANG
+cc-param = -mllvm -$(1)
+else
+cc-param = --param -$(1)
+endif
+
+# Keep most options here optional, to allow enabling more compilers if absence
+# of some options does not break KCSAN nor causes false positive reports.
+CFLAGS_KCSAN := -fsanitize=thread \
+       $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) \
+       $(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1)) \
+       $(call cc-param,tsan-distinguish-volatile=1)
+
+endif # CONFIG_KCSAN
index 127f2a7..e3f0476 100644 (file)
@@ -152,6 +152,16 @@ _c_flags += $(if $(patsubst n%,, \
        $(CFLAGS_KCOV))
 endif
 
+#
+# Enable KCSAN flags except some files or directories we don't want to check
+# (depends on variables KCSAN_SANITIZE_obj.o, KCSAN_SANITIZE)
+#
+ifeq ($(CONFIG_KCSAN),y)
+_c_flags += $(if $(patsubst n%,, \
+       $(KCSAN_SANITIZE_$(basetarget).o)$(KCSAN_SANITIZE)y), \
+       $(CFLAGS_KCSAN))
+endif
+
 # $(srctree)/$(src) for including checkin headers from generated source files
 # $(objtree)/$(obj) for including generated headers from checkin source files
 ifeq ($(KBUILD_EXTMOD),)
index e38871e..59c0052 100755 (executable)
@@ -1,8 +1,8 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_${pfx}${name}${sfx}_acquire(${params})
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}${name}${sfx}_acquire(${params})
 {
-       ${ret} ret = ${atomic}_${pfx}${name}${sfx}_relaxed(${args});
+       ${ret} ret = ${arch}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
        __atomic_acquire_fence();
        return ret;
 }
index e6f4815..a66635b 100755 (executable)
@@ -1,6 +1,6 @@
 cat <<EOF
 /**
- * ${atomic}_add_negative - add and test if negative
+ * ${arch}${atomic}_add_negative - add and test if negative
  * @i: integer value to add
  * @v: pointer of type ${atomic}_t
  *
@@ -8,9 +8,9 @@ cat <<EOF
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool
-${atomic}_add_negative(${int} i, ${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_add_negative(${int} i, ${atomic}_t *v)
 {
-       return ${atomic}_add_return(i, v) < 0;
+       return ${arch}${atomic}_add_return(i, v) < 0;
 }
 EOF
index 7925338..2ff598a 100755 (executable)
@@ -1,6 +1,6 @@
 cat << EOF
 /**
- * ${atomic}_add_unless - add unless the number is already a given value
+ * ${arch}${atomic}_add_unless - add unless the number is already a given value
  * @v: pointer of type ${atomic}_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -8,9 +8,9 @@ cat << EOF
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-static inline bool
-${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
+static __always_inline bool
+${arch}${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
-       return ${atomic}_fetch_add_unless(v, a, u) != u;
+       return ${arch}${atomic}_fetch_add_unless(v, a, u) != u;
 }
 EOF
index 9f3a321..3f18663 100755 (executable)
@@ -1,7 +1,7 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
 {
-       ${retstmt}${atomic}_${pfx}and${sfx}${order}(~i, v);
+       ${retstmt}${arch}${atomic}_${pfx}and${sfx}${order}(~i, v);
 }
 EOF
index 10bbc82..e2e01f0 100755 (executable)
@@ -1,7 +1,7 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
 {
-       ${retstmt}${atomic}_${pfx}sub${sfx}${order}(1, v);
+       ${retstmt}${arch}${atomic}_${pfx}sub${sfx}${order}(1, v);
 }
 EOF
index 0ce7103..e8a5e49 100755 (executable)
@@ -1,15 +1,15 @@
 cat <<EOF
 /**
- * ${atomic}_dec_and_test - decrement and test
+ * ${arch}${atomic}_dec_and_test - decrement and test
  * @v: pointer of type ${atomic}_t
  *
  * Atomically decrements @v by 1 and
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline bool
-${atomic}_dec_and_test(${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_dec_and_test(${atomic}_t *v)
 {
-       return ${atomic}_dec_return(v) == 0;
+       return ${arch}${atomic}_dec_return(v) == 0;
 }
 EOF
index c52eace..527adec 100755 (executable)
@@ -1,14 +1,14 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_dec_if_positive(${atomic}_t *v)
+static __always_inline ${ret}
+${arch}${atomic}_dec_if_positive(${atomic}_t *v)
 {
-       ${int} dec, c = ${atomic}_read(v);
+       ${int} dec, c = ${arch}${atomic}_read(v);
 
        do {
                dec = c - 1;
                if (unlikely(dec < 0))
                        break;
-       } while (!${atomic}_try_cmpxchg(v, &c, dec));
+       } while (!${arch}${atomic}_try_cmpxchg(v, &c, dec));
 
        return dec;
 }
index 8a2578f..dcab684 100755 (executable)
@@ -1,13 +1,13 @@
 cat <<EOF
-static inline bool
-${atomic}_dec_unless_positive(${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_dec_unless_positive(${atomic}_t *v)
 {
-       ${int} c = ${atomic}_read(v);
+       ${int} c = ${arch}${atomic}_read(v);
 
        do {
                if (unlikely(c > 0))
                        return false;
-       } while (!${atomic}_try_cmpxchg(v, &c, c - 1));
+       } while (!${arch}${atomic}_try_cmpxchg(v, &c, c - 1));
 
        return true;
 }
index 82f68fa..3764fc8 100755 (executable)
@@ -1,10 +1,10 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_${pfx}${name}${sfx}(${params})
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}${name}${sfx}(${params})
 {
        ${ret} ret;
        __atomic_pre_full_fence();
-       ret = ${atomic}_${pfx}${name}${sfx}_relaxed(${args});
+       ret = ${arch}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
        __atomic_post_full_fence();
        return ret;
 }
index d2c091d..0e0b9ae 100755 (executable)
@@ -1,6 +1,6 @@
 cat << EOF
 /**
- * ${atomic}_fetch_add_unless - add unless the number is already a given value
+ * ${arch}${atomic}_fetch_add_unless - add unless the number is already a given value
  * @v: pointer of type ${atomic}_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -8,15 +8,15 @@ cat << EOF
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns original value of @v
  */
-static inline ${int}
-${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
+static __always_inline ${int}
+${arch}${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
-       ${int} c = ${atomic}_read(v);
+       ${int} c = ${arch}${atomic}_read(v);
 
        do {
                if (unlikely(c == u))
                        break;
-       } while (!${atomic}_try_cmpxchg(v, &c, c + a));
+       } while (!${arch}${atomic}_try_cmpxchg(v, &c, c + a));
 
        return c;
 }
index f866b3a..15ec629 100755 (executable)
@@ -1,7 +1,7 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
 {
-       ${retstmt}${atomic}_${pfx}add${sfx}${order}(1, v);
+       ${retstmt}${arch}${atomic}_${pfx}add${sfx}${order}(1, v);
 }
 EOF
index 4e20688..cecc832 100755 (executable)
@@ -1,15 +1,15 @@
 cat <<EOF
 /**
- * ${atomic}_inc_and_test - increment and test
+ * ${arch}${atomic}_inc_and_test - increment and test
  * @v: pointer of type ${atomic}_t
  *
  * Atomically increments @v by 1
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
-${atomic}_inc_and_test(${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_inc_and_test(${atomic}_t *v)
 {
-       return ${atomic}_inc_return(v) == 0;
+       return ${arch}${atomic}_inc_return(v) == 0;
 }
 EOF
index a7c45c8..50f2d4d 100755 (executable)
@@ -1,14 +1,14 @@
 cat <<EOF
 /**
- * ${atomic}_inc_not_zero - increment unless the number is zero
+ * ${arch}${atomic}_inc_not_zero - increment unless the number is zero
  * @v: pointer of type ${atomic}_t
  *
  * Atomically increments @v by 1, if @v is non-zero.
  * Returns true if the increment was done.
  */
-static inline bool
-${atomic}_inc_not_zero(${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_inc_not_zero(${atomic}_t *v)
 {
-       return ${atomic}_add_unless(v, 1, 0);
+       return ${arch}${atomic}_add_unless(v, 1, 0);
 }
 EOF
index 0c266e7..87629e0 100755 (executable)
@@ -1,13 +1,13 @@
 cat <<EOF
-static inline bool
-${atomic}_inc_unless_negative(${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_inc_unless_negative(${atomic}_t *v)
 {
-       ${int} c = ${atomic}_read(v);
+       ${int} c = ${arch}${atomic}_read(v);
 
        do {
                if (unlikely(c < 0))
                        return false;
-       } while (!${atomic}_try_cmpxchg(v, &c, c + 1));
+       } while (!${arch}${atomic}_try_cmpxchg(v, &c, c + 1));
 
        return true;
 }
index 75863b5..341a88d 100755 (executable)
@@ -1,6 +1,6 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_read_acquire(const ${atomic}_t *v)
+static __always_inline ${ret}
+${arch}${atomic}_read_acquire(const ${atomic}_t *v)
 {
        return smp_load_acquire(&(v)->counter);
 }
index 3f628a3..f8906d5 100755 (executable)
@@ -1,8 +1,8 @@
 cat <<EOF
-static inline ${ret}
-${atomic}_${pfx}${name}${sfx}_release(${params})
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}${name}${sfx}_release(${params})
 {
        __atomic_release_fence();
-       ${retstmt}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
+       ${retstmt}${arch}${atomic}_${pfx}${name}${sfx}_relaxed(${args});
 }
 EOF
index 45bb5e0..7606827 100755 (executable)
@@ -1,6 +1,6 @@
 cat <<EOF
-static inline void
-${atomic}_set_release(${atomic}_t *v, ${int} i)
+static __always_inline void
+${arch}${atomic}_set_release(${atomic}_t *v, ${int} i)
 {
        smp_store_release(&(v)->counter, i);
 }
index 289ef17..c580f4c 100755 (executable)
@@ -1,6 +1,6 @@
 cat <<EOF
 /**
- * ${atomic}_sub_and_test - subtract value from variable and test result
+ * ${arch}${atomic}_sub_and_test - subtract value from variable and test result
  * @i: integer value to subtract
  * @v: pointer of type ${atomic}_t
  *
@@ -8,9 +8,9 @@ cat <<EOF
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
-${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
+static __always_inline bool
+${arch}${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
 {
-       return ${atomic}_sub_return(i, v) == 0;
+       return ${arch}${atomic}_sub_return(i, v) == 0;
 }
 EOF
index 4ed85e2..06db0f7 100755 (executable)
@@ -1,9 +1,9 @@
 cat <<EOF
-static inline bool
-${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
+static __always_inline bool
+${arch}${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
 {
        ${int} r, o = *old;
-       r = ${atomic}_cmpxchg${order}(v, o, new);
+       r = ${arch}${atomic}_cmpxchg${order}(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
index 1bd7c17..0fd1cf0 100755 (executable)
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ATOMICDIR=$(dirname $0)
+ARCH=$2
 
 . ${ATOMICDIR}/atomic-tbl.sh
 
-#gen_template_fallback(template, meta, pfx, name, sfx, order, atomic, int, args...)
+#gen_template_fallback(template, meta, pfx, name, sfx, order, arch, atomic, int, args...)
 gen_template_fallback()
 {
        local template="$1"; shift
@@ -14,10 +15,11 @@ gen_template_fallback()
        local name="$1"; shift
        local sfx="$1"; shift
        local order="$1"; shift
+       local arch="$1"; shift
        local atomic="$1"; shift
        local int="$1"; shift
 
-       local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
+       local atomicname="${arch}${atomic}_${pfx}${name}${sfx}${order}"
 
        local ret="$(gen_ret_type "${meta}" "${int}")"
        local retstmt="$(gen_ret_stmt "${meta}")"
@@ -32,7 +34,7 @@ gen_template_fallback()
        fi
 }
 
-#gen_proto_fallback(meta, pfx, name, sfx, order, atomic, int, args...)
+#gen_proto_fallback(meta, pfx, name, sfx, order, arch, atomic, int, args...)
 gen_proto_fallback()
 {
        local meta="$1"; shift
@@ -56,16 +58,17 @@ cat << EOF
 EOF
 }
 
-#gen_proto_order_variants(meta, pfx, name, sfx, atomic, int, args...)
+#gen_proto_order_variants(meta, pfx, name, sfx, arch, atomic, int, args...)
 gen_proto_order_variants()
 {
        local meta="$1"; shift
        local pfx="$1"; shift
        local name="$1"; shift
        local sfx="$1"; shift
-       local atomic="$1"
+       local arch="$1"
+       local atomic="$2"
 
-       local basename="${atomic}_${pfx}${name}${sfx}"
+       local basename="${arch}${atomic}_${pfx}${name}${sfx}"
 
        local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "${order}")"
 
@@ -94,7 +97,7 @@ gen_proto_order_variants()
        gen_basic_fallbacks "${basename}"
 
        if [ ! -z "${template}" ]; then
-               printf "#endif /* ${atomic}_${pfx}${name}${sfx} */\n\n"
+               printf "#endif /* ${arch}${atomic}_${pfx}${name}${sfx} */\n\n"
                gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
                gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
                gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
@@ -149,20 +152,19 @@ cat << EOF
 #ifndef _LINUX_ATOMIC_FALLBACK_H
 #define _LINUX_ATOMIC_FALLBACK_H
 
+#include <linux/compiler.h>
+
 EOF
 
-for xchg in "xchg" "cmpxchg" "cmpxchg64"; do
+for xchg in "${ARCH}xchg" "${ARCH}cmpxchg" "${ARCH}cmpxchg64"; do
        gen_xchg_fallbacks "${xchg}"
 done
 
 grep '^[a-z]' "$1" | while read name meta args; do
-       gen_proto "${meta}" "${name}" "atomic" "int" ${args}
+       gen_proto "${meta}" "${name}" "${ARCH}" "atomic" "int" ${args}
 done
 
 cat <<EOF
-#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
-#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
@@ -170,12 +172,9 @@ cat <<EOF
 EOF
 
 grep '^[a-z]' "$1" | while read name meta args; do
-       gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
+       gen_proto "${meta}" "${name}" "${ARCH}" "atomic64" "s64" ${args}
 done
 
 cat <<EOF
-#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
-#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
-
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
 EOF
index e098123..6afadf7 100755 (executable)
@@ -20,7 +20,7 @@ gen_param_check()
        # We don't write to constant parameters
        [ ${type#c} != ${type} ] && rw="read"
 
-       printf "\tkasan_check_${rw}(${name}, sizeof(*${name}));\n"
+       printf "\tinstrument_atomic_${rw}(${name}, sizeof(*${name}));\n"
 }
 
 #gen_param_check(arg...)
@@ -84,7 +84,7 @@ gen_proto_order_variant()
        [ ! -z "${guard}" ] && printf "#if ${guard}\n"
 
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomicname}(${params})
 {
 ${checks}
@@ -107,7 +107,7 @@ cat <<EOF
 #define ${xchg}(ptr, ...)                                              \\
 ({                                                                     \\
        typeof(ptr) __ai_ptr = (ptr);                                   \\
-       kasan_check_write(__ai_ptr, ${mult}sizeof(*__ai_ptr));          \\
+       instrument_atomic_write(__ai_ptr, ${mult}sizeof(*__ai_ptr));            \\
        arch_${xchg}(__ai_ptr, __VA_ARGS__);                            \\
 })
 EOF
@@ -147,7 +147,8 @@ cat << EOF
 #define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
 
 #include <linux/build_bug.h>
-#include <linux/kasan-checks.h>
+#include <linux/compiler.h>
+#include <linux/instrumented.h>
 
 EOF
 
index c240a72..e318d3f 100755 (executable)
@@ -46,7 +46,7 @@ gen_proto_order_variant()
        local retstmt="$(gen_ret_stmt "${meta}")"
 
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 atomic_long_${name}(${params})
 {
        ${retstmt}${atomic}_${name}(${argscast});
@@ -64,6 +64,7 @@ cat << EOF
 #ifndef _ASM_GENERIC_ATOMIC_LONG_H
 #define _ASM_GENERIC_ATOMIC_LONG_H
 
+#include <linux/compiler.h>
 #include <asm/types.h>
 
 #ifdef CONFIG_64BIT
index 000dc64..d29e159 100644 (file)
@@ -10,10 +10,11 @@ LINUXDIR=${ATOMICDIR}/../..
 cat <<EOF |
 gen-atomic-instrumented.sh      asm-generic/atomic-instrumented.h
 gen-atomic-long.sh              asm-generic/atomic-long.h
+gen-atomic-fallback.sh          linux/atomic-arch-fallback.h           arch_
 gen-atomic-fallback.sh          linux/atomic-fallback.h
 EOF
-while read script header; do
-       /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
+while read script header args; do
+       /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} ${args} > ${LINUXDIR}/include/${header}
        HASH="$(sha1sum ${LINUXDIR}/include/${header})"
        HASH="${HASH%% *}"
        printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header}
index 197436b..4c82060 100755 (executable)
@@ -2407,7 +2407,7 @@ sub process {
 
                if ($rawline=~/^\+\+\+\s+(\S+)/) {
                        $setup_docs = 0;
-                       if ($1 =~ m@Documentation/admin-guide/kernel-parameters.rst$@) {
+                       if ($1 =~ m@Documentation/admin-guide/kernel-parameters.txt$@) {
                                $setup_docs = 1;
                        }
                        #next;
@@ -5945,6 +5945,14 @@ sub process {
                        }
                }
 
+# check for data_race without a comment.
+               if ($line =~ /\bdata_race\s*\(/) {
+                       if (!ctx_has_comment($first_line, $linenr)) {
+                               WARN("DATA_RACE",
+                                    "data_race without comment\n" . $herecurr);
+                       }
+               }
+
 # check for smp_read_barrier_depends and read_barrier_depends
                if (!$file && $line =~ /\b(smp_|)read_barrier_depends\s*\(/) {
                        WARN("READ_BARRIER_DEPENDS",
@@ -6388,7 +6396,7 @@ sub process {
 
                        if (!grep(/$name/, @setup_docs)) {
                                CHK("UNDOCUMENTED_SETUP",
-                                   "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst\n" . $herecurr);
+                                   "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.txt\n" . $herecurr);
                        }
                }
 
index d9cd24c..c45e9af 100644 (file)
@@ -59,6 +59,7 @@ actualy||actually
 acumulating||accumulating
 acumulative||accumulative
 acumulator||accumulator
+acutally||actually
 adapater||adapter
 addional||additional
 additionaly||additionally
@@ -249,6 +250,7 @@ calescing||coalescing
 calle||called
 callibration||calibration
 callled||called
+callser||caller
 calucate||calculate
 calulate||calculate
 cancelation||cancellation
@@ -671,6 +673,7 @@ hanlde||handle
 hanled||handled
 happend||happened
 harware||hardware
+havind||having
 heirarchically||hierarchically
 helpfull||helpful
 hexdecimal||hexadecimal
@@ -845,6 +848,7 @@ logile||logfile
 loobpack||loopback
 loosing||losing
 losted||lost
+maangement||management
 machinary||machinery
 maibox||mailbox
 maintainance||maintenance
@@ -905,6 +909,7 @@ modfiy||modify
 modulues||modules
 momery||memory
 memomry||memory
+monitring||monitoring
 monochorome||monochrome
 monochromo||monochrome
 monocrome||monochrome
@@ -1010,6 +1015,7 @@ partiton||partition
 pased||passed
 passin||passing
 pathes||paths
+pattrns||patterns
 pecularities||peculiarities
 peformance||performance
 peforming||performing
@@ -1256,6 +1262,7 @@ shoule||should
 shrinked||shrunk
 siginificantly||significantly
 signabl||signal
+significanly||significantly
 similary||similarly
 similiar||similar
 simlar||similar
@@ -1371,6 +1378,7 @@ thead||thread
 therfore||therefore
 thier||their
 threds||threads
+threee||three
 threshhold||threshold
 thresold||threshold
 throught||through
@@ -1410,6 +1418,7 @@ tyep||type
 udpate||update
 uesd||used
 uknown||unknown
+usccess||success
 usupported||unsupported
 uncommited||uncommitted
 unconditionaly||unconditionally
index 63d65a7..5fbb90a 100644 (file)
@@ -505,6 +505,28 @@ static const char *uaccess_safe_builtin[] = {
        "__asan_report_store4_noabort",
        "__asan_report_store8_noabort",
        "__asan_report_store16_noabort",
+       /* KCSAN */
+       "__kcsan_check_access",
+       "kcsan_found_watchpoint",
+       "kcsan_setup_watchpoint",
+       "kcsan_check_scoped_accesses",
+       "kcsan_disable_current",
+       "kcsan_enable_current_nowarn",
+       /* KCSAN/TSAN */
+       "__tsan_func_entry",
+       "__tsan_func_exit",
+       "__tsan_read_range",
+       "__tsan_write_range",
+       "__tsan_read1",
+       "__tsan_read2",
+       "__tsan_read4",
+       "__tsan_read8",
+       "__tsan_read16",
+       "__tsan_write1",
+       "__tsan_write2",
+       "__tsan_write4",
+       "__tsan_write8",
+       "__tsan_write16",
        /* KCOV */
        "write_comp_data",
        "check_kcov_mode",
index f159718..4527871 100644 (file)
@@ -3,6 +3,7 @@
 /s390x/resets
 /s390x/sync_regs_test
 /x86_64/cr4_cpuid_sync_test
+/x86_64/debug_regs
 /x86_64/evmcs_test
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
index b4ff112..4a16658 100644 (file)
@@ -83,7 +83,11 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+ifeq ($(ARCH),x86_64)
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+else
 LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
        -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
index 674151d..b7531c8 100644 (file)
@@ -33,6 +33,7 @@ struct svm_test_data {
 struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
 void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
 void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+bool nested_svm_supported(void);
 void nested_svm_check_supported(void);
 
 static inline bool cpu_has_svm(void)
index ccff3e6..16fa21e 100644 (file)
@@ -598,15 +598,12 @@ union vmx_ctrl_msr {
        };
 };
 
-union vmx_basic basic;
-union vmx_ctrl_msr ctrl_pin_rev;
-union vmx_ctrl_msr ctrl_exit_rev;
-
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
 bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
 bool load_vmcs(struct vmx_pages *vmx);
 
+bool nested_vmx_supported(void);
 void nested_vmx_check_supported(void);
 
 void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
index c9cede5..74776ee 100644 (file)
@@ -195,11 +195,18 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
        case VM_MODE_PXXV48_4K:
 #ifdef __x86_64__
                kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
-               TEST_ASSERT(vm->va_bits == 48, "Linear address width "
-                           "(%d bits) not supported", vm->va_bits);
+               /*
+                * Ignore KVM support for 5-level paging (vm->va_bits == 57),
+                * it doesn't take effect unless a CR4.LA57 is set, which it
+                * isn't for this VM_MODE.
+                */
+               TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
+                           "Linear address width (%d bits) not supported",
+                           vm->va_bits);
                pr_debug("Guest physical address width detected: %d\n",
                         vm->pa_bits);
                vm->pgtable_levels = 4;
+               vm->va_bits = 48;
 #else
                TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
 #endif
index c424010..3a5c72e 100644 (file)
@@ -148,14 +148,18 @@ void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
                : "r15", "memory");
 }
 
-void nested_svm_check_supported(void)
+bool nested_svm_supported(void)
 {
        struct kvm_cpuid_entry2 *entry =
                kvm_get_supported_cpuid_entry(0x80000001);
 
-       if (!(entry->ecx & CPUID_SVM)) {
+       return entry->ecx & CPUID_SVM;
+}
+
+void nested_svm_check_supported(void)
+{
+       if (!nested_svm_supported()) {
                print_skip("nested SVM not enabled");
                exit(KSFT_SKIP);
        }
 }
-
index 4ae104f..f1e00d4 100644 (file)
@@ -379,11 +379,16 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
        init_vmcs_guest_state(guest_rip, guest_rsp);
 }
 
-void nested_vmx_check_supported(void)
+bool nested_vmx_supported(void)
 {
        struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
-       if (!(entry->ecx & CPUID_VMX)) {
+       return entry->ecx & CPUID_VMX;
+}
+
+void nested_vmx_check_supported(void)
+{
+       if (!nested_vmx_supported()) {
                print_skip("nested VMX not enabled");
                exit(KSFT_SKIP);
        }
index e6e62e5..7579281 100644 (file)
@@ -94,9 +94,10 @@ int main(int argc, char *argv[])
 
        vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
-       if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+       if (!nested_vmx_supported() ||
+           !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
            !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-               print_skip("capabilities not available");
+               print_skip("Enlightened VMCS is unsupported");
                exit(KSFT_SKIP);
        }
 
index 4a7967c..745b708 100644 (file)
@@ -170,7 +170,8 @@ int main(int argc, char *argv[])
                case 1:
                        break;
                case 2:
-                       if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+                       if (!nested_vmx_supported() ||
+                           !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
                                print_skip("Enlightened VMCS is unsupported");
                                continue;
                        }
index 6f8f478..ae39a22 100644 (file)
@@ -47,10 +47,10 @@ uint8_t smi_handler[] = {
        0x0f, 0xaa,           /* rsm */
 };
 
-void sync_with_host(uint64_t phase)
+static inline void sync_with_host(uint64_t phase)
 {
        asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
-                    : : "a" (phase));
+                    : "+a" (phase));
 }
 
 void self_smi(void)
@@ -118,16 +118,17 @@ int main(int argc, char *argv[])
        vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
 
        if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-               if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+               if (nested_svm_supported())
                        vcpu_alloc_svm(vm, &nested_gva);
-               else
+               else if (nested_vmx_supported())
                        vcpu_alloc_vmx(vm, &nested_gva);
-               vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
-       } else {
-               pr_info("will skip SMM test with VMX enabled\n");
-               vcpu_args_set(vm, VCPU_ID, 1, 0);
        }
 
+       if (!nested_gva)
+               pr_info("will skip SMM test with VMX enabled\n");
+
+       vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
        for (stage = 1;; stage++) {
                _vcpu_run(vm, VCPU_ID);
                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
index d43b6f9..f6c8b90 100644 (file)
@@ -171,16 +171,17 @@ int main(int argc, char *argv[])
        vcpu_regs_get(vm, VCPU_ID, &regs1);
 
        if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-               if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+               if (nested_svm_supported())
                        vcpu_alloc_svm(vm, &nested_gva);
-               else
+               else if (nested_vmx_supported())
                        vcpu_alloc_vmx(vm, &nested_gva);
-               vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
-       } else {
-               pr_info("will skip nested state checks\n");
-               vcpu_args_set(vm, VCPU_ID, 1, 0);
        }
 
+       if (!nested_gva)
+               pr_info("will skip nested state checks\n");
+
+       vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
        for (stage = 1;; stage++) {
                _vcpu_run(vm, VCPU_ID);
                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
index cc72b61..a7737af 100644 (file)
@@ -31,6 +31,10 @@ bool l2_save_restore_done;
 static u64 l2_vmx_pt_start;
 volatile u64 l2_vmx_pt_finish;
 
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
 void l2_guest_code(void)
 {
        u64 vmx_pt_delta;
index 51b89ce..8b75821 100644 (file)
@@ -502,7 +502,7 @@ static bool wait_for_scan(const char *msg, char *p)
 
        madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
 
-       return !timeout;
+       return timeout == -1;
 }
 
 static void alloc_at_fault(void)
index 5f8f3e8..4579960 100644 (file)
@@ -164,7 +164,9 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
                return 0;
 
-       /* setup delayed work */
+       /* Arch specific code should not do async PF in this case */
+       if (unlikely(kvm_is_error_hva(hva)))
+               return 0;
 
        /*
         * do alloc nowait since if we are going to sleep anyway we
@@ -183,24 +185,15 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        mmget(work->mm);
        kvm_get_kvm(work->vcpu->kvm);
 
-       /* this can't really happen otherwise gfn_to_pfn_async
-          would succeed */
-       if (unlikely(kvm_is_error_hva(work->addr)))
-               goto retry_sync;
-
        INIT_WORK(&work->work, async_pf_execute);
-       if (!schedule_work(&work->work))
-               goto retry_sync;
 
        list_add_tail(&work->queue, &vcpu->async_pf.queue);
        vcpu->async_pf.queued++;
-       kvm_arch_async_page_not_present(vcpu, work);
+       work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work);
+
+       schedule_work(&work->work);
+
        return 1;
-retry_sync:
-       kvm_put_kvm(work->vcpu->kvm);
-       mmput(work->mm);
-       kmem_cache_free(async_pf_cache, work);
-       return 0;
 }
 
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
index 0dfee75..a852af5 100644 (file)
@@ -154,10 +154,9 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
 static unsigned long long kvm_createvm_count;
 static unsigned long long kvm_active_vms;
 
-__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end, bool blockable)
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+                                                  unsigned long start, unsigned long end)
 {
-       return 0;
 }
 
 bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
@@ -383,6 +382,18 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
        return container_of(mn, struct kvm, mmu_notifier);
 }
 
+static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+                                             struct mm_struct *mm,
+                                             unsigned long start, unsigned long end)
+{
+       struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+       srcu_read_unlock(&kvm->srcu, idx);
+}
+
 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                        struct mm_struct *mm,
                                        unsigned long address,
@@ -407,7 +418,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
        int need_tlb_flush = 0, idx;
-       int ret;
 
        idx = srcu_read_lock(&kvm->srcu);
        spin_lock(&kvm->mmu_lock);
@@ -424,14 +434,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                kvm_flush_remote_tlbs(kvm);
 
        spin_unlock(&kvm->mmu_lock);
-
-       ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start,
-                                       range->end,
-                                       mmu_notifier_range_blockable(range));
-
        srcu_read_unlock(&kvm->srcu, idx);
 
-       return ret;
+       return 0;
 }
 
 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -537,6 +542,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 }
 
 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+       .invalidate_range       = kvm_mmu_notifier_invalidate_range,
        .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
        .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
        .clear_flush_young      = kvm_mmu_notifier_clear_flush_young,
@@ -2970,7 +2976,6 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 {
        struct kvm_vcpu *vcpu = filp->private_data;
 
-       debugfs_remove_recursive(vcpu->debugfs_dentry);
        kvm_put_kvm(vcpu->kvm);
        return 0;
 }
@@ -2997,16 +3002,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
+       struct dentry *debugfs_dentry;
        char dir_name[ITOA_MAX_LEN * 2];
 
        if (!debugfs_initialized())
                return;
 
        snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
-       vcpu->debugfs_dentry = debugfs_create_dir(dir_name,
-                                                 vcpu->kvm->debugfs_dentry);
+       debugfs_dentry = debugfs_create_dir(dir_name,
+                                           vcpu->kvm->debugfs_dentry);
 
-       kvm_arch_create_vcpu_debugfs(vcpu);
+       kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry);
 #endif
 }
 
@@ -3743,21 +3749,18 @@ static long kvm_vm_ioctl(struct file *filp,
                if (routing.flags)
                        goto out;
                if (routing.nr) {
-                       r = -ENOMEM;
-                       entries = vmalloc(array_size(sizeof(*entries),
-                                                    routing.nr));
-                       if (!entries)
-                               goto out;
-                       r = -EFAULT;
                        urouting = argp;
-                       if (copy_from_user(entries, urouting->entries,
-                                          routing.nr * sizeof(*entries)))
-                               goto out_free_irq_routing;
+                       entries = vmemdup_user(urouting->entries,
+                                              array_size(sizeof(*entries),
+                                                         routing.nr));
+                       if (IS_ERR(entries)) {
+                               r = PTR_ERR(entries);
+                               goto out;
+                       }
                }
                r = kvm_set_irq_routing(kvm, entries, routing.nr,
                                        routing.flags);
-out_free_irq_routing:
-               vfree(entries);
+               kvfree(entries);
                break;
        }
 #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */