Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jul 2019 23:12:03 +0000 (16:12 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jul 2019 23:12:03 +0000 (16:12 -0700)
Pull locking updates from Ingo Molnar:
 "The main changes in this cycle are:

   - rwsem scalability improvements, phase #2, by Waiman Long, which are
     rather impressive:

       "On a 2-socket 40-core 80-thread Skylake system with 40 reader
        and writer locking threads, the min/mean/max locking operations
        done in a 5-second testing window before the patchset were:

         40 readers, Iterations Min/Mean/Max = 1,807/1,808/1,810
         40 writers, Iterations Min/Mean/Max = 1,807/50,344/151,255

        After the patchset, they became:

         40 readers, Iterations Min/Mean/Max = 30,057/31,359/32,741
         40 writers, Iterations Min/Mean/Max = 94,466/95,845/97,098"

     There's a lot of changes to the locking implementation that makes
     it similar to qrwlock, including owner handoff for more fair
     locking.

     Another microbenchmark shows how across the spectrum the
     improvements are:

       "With a locking microbenchmark running on 5.1 based kernel, the
        total locking rates (in kops/s) on a 2-socket Skylake system
        with equal numbers of readers and writers (mixed) before and
        after this patchset were:

        # of Threads   Before Patch      After Patch
        ------------   ------------      -----------
             2            2,618             4,193
             4            1,202             3,726
             8              802             3,622
            16              729             3,359
            32              319             2,826
            64              102             2,744"

     The changes are extensive and the patch-set has been through
     several iterations addressing various locking workloads. There
     might be more regressions, but unless they are pathological I
     believe we want to use this new implementation as the baseline
     going forward.

   - jump-label optimizations by Daniel Bristot de Oliveira: the primary
     motivation was to remove IPI disturbance of isolated RT-workload
     CPUs, which resulted in the implementation of batched jump-label
     updates. Beyond the improvement of the real-time characteristics
     kernel, in one test this patchset improved static key update
     overhead from 57 msecs to just 1.4 msecs - which is a nice speedup
     as well.

   - atomic64_t cross-arch type cleanups by Mark Rutland: over the last
     ~10 years of atomic64_t existence the various types used by the
     APIs only had to be self-consistent within each architecture -
     which means they became wildly inconsistent across architectures.
     Mark puts and end to this by reworking all the atomic64
     implementations to use 's64' as the base type for atomic64_t, and
     to ensure that this type is consistently used for parameters and
     return values in the API, avoiding further problems in this area.

   - A large set of small improvements to lockdep by Yuyang Du: type
     cleanups, output cleanups, function return type and othr cleanups
     all around the place.

   - A set of percpu ops cleanups and fixes by Peter Zijlstra.

   - Misc other changes - please see the Git log for more details"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (82 commits)
  locking/lockdep: increase size of counters for lockdep statistics
  locking/atomics: Use sed(1) instead of non-standard head(1) option
  locking/lockdep: Move mark_lock() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING
  x86/jump_label: Make tp_vec_nr static
  x86/percpu: Optimize raw_cpu_xchg()
  x86/percpu, sched/fair: Avoid local_clock()
  x86/percpu, x86/irq: Relax {set,get}_irq_regs()
  x86/percpu: Relax smp_processor_id()
  x86/percpu: Differentiate this_cpu_{}() and __this_cpu_{}()
  locking/rwsem: Guard against making count negative
  locking/rwsem: Adaptive disabling of reader optimistic spinning
  locking/rwsem: Enable time-based spinning on reader-owned rwsem
  locking/rwsem: Make rwsem->owner an atomic_long_t
  locking/rwsem: Enable readers spinning on writer
  locking/rwsem: Clarify usage of owner's nonspinaable bit
  locking/rwsem: Wake up almost all readers in wait queue
  locking/rwsem: More optimal RT task handling of null owner
  locking/rwsem: Always release wait_lock before waking up tasks
  locking/rwsem: Implement lock handoff to prevent lock starvation
  locking/rwsem: Make rwsem_spin_on_owner() return owner state
  ...

55 files changed:
Documentation/atomic_t.txt
Documentation/locking/lockdep-design.txt
arch/alpha/include/asm/atomic.h
arch/arc/include/asm/atomic.h
arch/arm/include/asm/atomic.h
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h
arch/ia64/include/asm/atomic.h
arch/mips/include/asm/atomic.h
arch/powerpc/include/asm/atomic.h
arch/riscv/include/asm/atomic.h
arch/s390/include/asm/atomic.h
arch/s390/pci/pci_debug.c
arch/sparc/include/asm/atomic_64.h
arch/x86/events/core.c
arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_32.h
arch/x86/include/asm/atomic64_64.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/irq_regs.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/text-patching.h
arch/x86/kernel/alternative.c
arch/x86/kernel/jump_label.c
drivers/crypto/nx/nx-842-pseries.c
drivers/infiniband/core/device.c
drivers/tty/tty_ldisc.c
fs/dax.c
include/asm-generic/atomic64.h
include/linux/jump_label.h
include/linux/lockdep.h
include/linux/percpu-rwsem.h
include/linux/rwsem.h
include/linux/sched/wake_q.h
include/linux/smp.h
include/linux/types.h
init/init_task.c
kernel/fork.c
kernel/futex.c
kernel/jump_label.c
kernel/locking/Makefile
kernel/locking/lock_events.h
kernel/locking/lock_events_list.h
kernel/locking/lockdep.c
kernel/locking/lockdep_internals.h
kernel/locking/rwsem-xadd.c [deleted file]
kernel/locking/rwsem.c
kernel/locking/rwsem.h
kernel/sched/fair.c
lib/Kconfig.debug
lib/atomic64.c
scripts/atomic/check-atomics.sh
security/apparmor/label.c

index b3afe69..0ab747e 100644 (file)
@@ -81,9 +81,11 @@ Non-RMW ops:
 
 The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
 implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
-smp_store_release() respectively.
+smp_store_release() respectively. Therefore, if you find yourself only using
+the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
+and are doing it wrong.
 
-The one detail to this is that atomic_set{}() should be observable to the RMW
+A subtle detail of atomic_set{}() is that it should be observable to the RMW
 ops. That is:
 
   C atomic-set
@@ -200,6 +202,9 @@ These helper barriers exist because architectures have varying implicit
 ordering on their SMP atomic primitives. For example our TSO architectures
 provide full ordered atomics and these barriers are no-ops.
 
+NOTE: when the atomic RmW ops are fully ordered, they should also imply a
+compiler barrier.
+
 Thus:
 
   atomic_fetch_add();
index 39fae14..f189d13 100644 (file)
@@ -15,34 +15,48 @@ tens of thousands of) instantiations. For example a lock in the inode
 struct is one class, while each inode has its own instantiation of that
 lock class.
 
-The validator tracks the 'state' of lock-classes, and it tracks
-dependencies between different lock-classes. The validator maintains a
-rolling proof that the state and the dependencies are correct.
-
-Unlike an lock instantiation, the lock-class itself never goes away: when
-a lock-class is used for the first time after bootup it gets registered,
-and all subsequent uses of that lock-class will be attached to this
-lock-class.
+The validator tracks the 'usage state' of lock-classes, and it tracks
+the dependencies between different lock-classes. Lock usage indicates
+how a lock is used with regard to its IRQ contexts, while lock
+dependency can be understood as lock order, where L1 -> L2 suggests that
+a task is attempting to acquire L2 while holding L1. From lockdep's
+perspective, the two locks (L1 and L2) are not necessarily related; that
+dependency just means the order ever happened. The validator maintains a
+continuing effort to prove lock usages and dependencies are correct or
+the validator will shoot a splat if incorrect.
+
+A lock-class's behavior is constructed by its instances collectively:
+when the first instance of a lock-class is used after bootup the class
+gets registered, then all (subsequent) instances will be mapped to the
+class and hence their usages and dependecies will contribute to those of
+the class. A lock-class does not go away when a lock instance does, but
+it can be removed if the memory space of the lock class (static or
+dynamic) is reclaimed, this happens for example when a module is
+unloaded or a workqueue is destroyed.
 
 State
 -----
 
-The validator tracks lock-class usage history into 4 * nSTATEs + 1 separate
-state bits:
+The validator tracks lock-class usage history and divides the usage into
+(4 usages * n STATEs + 1) categories:
 
+where the 4 usages can be:
 - 'ever held in STATE context'
 - 'ever held as readlock in STATE context'
 - 'ever held with STATE enabled'
 - 'ever held as readlock with STATE enabled'
 
-Where STATE can be either one of (kernel/locking/lockdep_states.h)
- - hardirq
- - softirq
+where the n STATEs are coded in kernel/locking/lockdep_states.h and as of
+now they include:
+- hardirq
+- softirq
 
+where the last 1 category is:
 - 'ever used'                                       [ == !unused        ]
 
-When locking rules are violated, these state bits are presented in the
-locking error messages, inside curlies. A contrived example:
+When locking rules are violated, these usage bits are presented in the
+locking error messages, inside curlies, with a total of 2 * n STATEs bits.
+A contrived example:
 
    modprobe/2287 is trying to acquire lock:
     (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
@@ -51,28 +65,67 @@ locking error messages, inside curlies. A contrived example:
     (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
 
 
-The bit position indicates STATE, STATE-read, for each of the states listed
-above, and the character displayed in each indicates:
+For a given lock, the bit positions from left to right indicate the usage
+of the lock and readlock (if exists), for each of the n STATEs listed
+above respectively, and the character displayed at each bit position
+indicates:
 
    '.'  acquired while irqs disabled and not in irq context
    '-'  acquired in irq context
    '+'  acquired with irqs enabled
    '?'  acquired in irq context with irqs enabled.
 
-Unused mutexes cannot be part of the cause of an error.
+The bits are illustrated with an example:
+
+    (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+                         ||||
+                         ||| \-> softirq disabled and not in softirq context
+                         || \--> acquired in softirq context
+                         | \---> hardirq disabled and not in hardirq context
+                          \----> acquired in hardirq context
+
+
+For a given STATE, whether the lock is ever acquired in that STATE
+context and whether that STATE is enabled yields four possible cases as
+shown in the table below. The bit character is able to indicate which
+exact case is for the lock as of the reporting time.
+
+   -------------------------------------------
+  |              | irq enabled | irq disabled |
+  |-------------------------------------------|
+  | ever in irq  |      ?      |       -      |
+  |-------------------------------------------|
+  | never in irq |      +      |       .      |
+   -------------------------------------------
+
+The character '-' suggests irq is disabled because if otherwise the
+charactor '?' would have been shown instead. Similar deduction can be
+applied for '+' too.
+
+Unused locks (e.g., mutexes) cannot be part of the cause of an error.
 
 
 Single-lock state rules:
 ------------------------
 
+A lock is irq-safe means it was ever used in an irq context, while a lock
+is irq-unsafe means it was ever acquired with irq enabled.
+
 A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
-following states are exclusive, and only one of them is allowed to be
-set for any lock-class:
+following states must be exclusive: only one of them is allowed to be set
+for any lock-class based on its usage:
+
+ <hardirq-safe> or <hardirq-unsafe>
+ <softirq-safe> or <softirq-unsafe>
 
- <hardirq-safe> and <hardirq-unsafe>
- <softirq-safe> and <softirq-unsafe>
+This is because if a lock can be used in irq context (irq-safe) then it
+cannot be ever acquired with irq enabled (irq-unsafe). Otherwise, a
+deadlock may happen. For example, in the scenario that after this lock
+was acquired but before released, if the context is interrupted this
+lock will be attempted to acquire twice, which creates a deadlock,
+referred to as lock recursion deadlock.
 
-The validator detects and reports lock usage that violate these
+The validator detects and reports lock usage that violates these
 single-lock state rules.
 
 Multi-lock dependency rules:
@@ -81,15 +134,18 @@ Multi-lock dependency rules:
 The same lock-class must not be acquired twice, because this could lead
 to lock recursion deadlocks.
 
-Furthermore, two locks may not be taken in different order:
+Furthermore, two locks can not be taken in inverse order:
 
  <L1> -> <L2>
  <L2> -> <L1>
 
-because this could lead to lock inversion deadlocks. (The validator
-finds such dependencies in arbitrary complexity, i.e. there can be any
-other locking sequence between the acquire-lock operations, the
-validator will still track all dependencies between locks.)
+because this could lead to a deadlock - referred to as lock inversion
+deadlock - as attempts to acquire the two locks form a circle which
+could lead to the two contexts waiting for each other permanently. The
+validator will find such dependency circle in arbitrary complexity,
+i.e., there can be any other locking sequence between the acquire-lock
+operations; the validator will still find whether these locks can be
+acquired in a circular fashion.
 
 Furthermore, the following usage based lock dependencies are not allowed
 between any two lock-classes:
index 150a1c5..2144530 100644 (file)
@@ -93,9 +93,9 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)     \
 }
 
 #define ATOMIC64_OP(op, asm_op)                                                \
-static __inline__ void atomic64_##op(long i, atomic64_t * v)           \
+static __inline__ void atomic64_##op(s64 i, atomic64_t * v)            \
 {                                                                      \
-       unsigned long temp;                                             \
+       s64 temp;                                                       \
        __asm__ __volatile__(                                           \
        "1:     ldq_l %0,%1\n"                                          \
        "       " #asm_op " %0,%2,%0\n"                                 \
@@ -109,9 +109,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)                \
 }                                                                      \
 
 #define ATOMIC64_OP_RETURN(op, asm_op)                                 \
-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v)  \
+static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)    \
 {                                                                      \
-       long temp, result;                                              \
+       s64 temp, result;                                               \
        __asm__ __volatile__(                                           \
        "1:     ldq_l %0,%1\n"                                          \
        "       " #asm_op " %0,%3,%2\n"                                 \
@@ -128,9 +128,9 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v)       \
 }
 
 #define ATOMIC64_FETCH_OP(op, asm_op)                                  \
-static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)   \
+static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)     \
 {                                                                      \
-       long temp, result;                                              \
+       s64 temp, result;                                               \
        __asm__ __volatile__(                                           \
        "1:     ldq_l %2,%1\n"                                          \
        "       " #asm_op " %2,%3,%0\n"                                 \
@@ -246,9 +246,9 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       long c, new, old;
+       s64 c, new, old;
        smp_mb();
        __asm__ __volatile__(
        "1:     ldq_l   %[old],%[mem]\n"
@@ -276,9 +276,9 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
  * The function returns the old value of *v minus 1, even if
  * the atomic variable, v, was not decremented.
  */
-static inline long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
-       long old, tmp;
+       s64 old, tmp;
        smp_mb();
        __asm__ __volatile__(
        "1:     ldq_l   %[old],%[mem]\n"
index 17cf1c6..7298ce8 100644 (file)
@@ -321,14 +321,14 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
  */
 
 typedef struct {
-       aligned_u64 counter;
+       s64 __aligned(8) counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(a) { (a) }
 
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
 {
-       unsigned long long val;
+       s64 val;
 
        __asm__ __volatile__(
        "       ldd   %0, [%1]  \n"
@@ -338,7 +338,7 @@ static inline long long atomic64_read(const atomic64_t *v)
        return val;
 }
 
-static inline void atomic64_set(atomic64_t *v, long long a)
+static inline void atomic64_set(atomic64_t *v, s64 a)
 {
        /*
         * This could have been a simple assignment in "C" but would need
@@ -359,9 +359,9 @@ static inline void atomic64_set(atomic64_t *v, long long a)
 }
 
 #define ATOMIC64_OP(op, op1, op2)                                      \
-static inline void atomic64_##op(long long a, atomic64_t *v)           \
+static inline void atomic64_##op(s64 a, atomic64_t *v)                 \
 {                                                                      \
-       unsigned long long val;                                         \
+       s64 val;                                                        \
                                                                        \
        __asm__ __volatile__(                                           \
        "1:                             \n"                             \
@@ -372,13 +372,13 @@ static inline void atomic64_##op(long long a, atomic64_t *v)              \
        "       bnz     1b              \n"                             \
        : "=&r"(val)                                                    \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");                                                \
+       : "cc");                                                        \
 }                                                                      \
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)                               \
-static inline long long atomic64_##op##_return(long long a, atomic64_t *v)     \
+static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v)         \
 {                                                                      \
-       unsigned long long val;                                         \
+       s64 val;                                                        \
                                                                        \
        smp_mb();                                                       \
                                                                        \
@@ -399,9 +399,9 @@ static inline long long atomic64_##op##_return(long long a, atomic64_t *v)  \
 }
 
 #define ATOMIC64_FETCH_OP(op, op1, op2)                                        \
-static inline long long atomic64_fetch_##op(long long a, atomic64_t *v)        \
+static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v)            \
 {                                                                      \
-       unsigned long long val, orig;                                   \
+       s64 val, orig;                                                  \
                                                                        \
        smp_mb();                                                       \
                                                                        \
@@ -441,10 +441,10 @@ ATOMIC64_OPS(xor, xor, xor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline long long
-atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
+static inline s64
+atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
 {
-       long long prev;
+       s64 prev;
 
        smp_mb();
 
@@ -464,9 +464,9 @@ atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
        return prev;
 }
 
-static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
 {
-       long long prev;
+       s64 prev;
 
        smp_mb();
 
@@ -492,9 +492,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
  * the atomic variable, v, was not decremented.
  */
 
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
-       long long val;
+       s64 val;
 
        smp_mb();
 
@@ -525,10 +525,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
  * Atomically adds @a to @v, if it was not @u.
  * Returns the old value of @v
  */
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
-                                                 long long u)
+static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       long long old, temp;
+       s64 old, temp;
 
        smp_mb();
 
index 50c3ac5..75bb2c5 100644 (file)
@@ -246,15 +246,15 @@ ATOMIC_OPS(xor, ^=, eor)
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
-       long long counter;
+       s64 counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(i) { (i) }
 
 #ifdef CONFIG_ARM_LPAE
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
 {
-       long long result;
+       s64 result;
 
        __asm__ __volatile__("@ atomic64_read\n"
 "      ldrd    %0, %H0, [%1]"
@@ -265,7 +265,7 @@ static inline long long atomic64_read(const atomic64_t *v)
        return result;
 }
 
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
 {
        __asm__ __volatile__("@ atomic64_set\n"
 "      strd    %2, %H2, [%1]"
@@ -274,9 +274,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
        );
 }
 #else
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
 {
-       long long result;
+       s64 result;
 
        __asm__ __volatile__("@ atomic64_read\n"
 "      ldrexd  %0, %H0, [%1]"
@@ -287,9 +287,9 @@ static inline long long atomic64_read(const atomic64_t *v)
        return result;
 }
 
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
 {
-       long long tmp;
+       s64 tmp;
 
        prefetchw(&v->counter);
        __asm__ __volatile__("@ atomic64_set\n"
@@ -304,9 +304,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
 #endif
 
 #define ATOMIC64_OP(op, op1, op2)                                      \
-static inline void atomic64_##op(long long i, atomic64_t *v)           \
+static inline void atomic64_##op(s64 i, atomic64_t *v)                 \
 {                                                                      \
-       long long result;                                               \
+       s64 result;                                                     \
        unsigned long tmp;                                              \
                                                                        \
        prefetchw(&v->counter);                                         \
@@ -323,10 +323,10 @@ static inline void atomic64_##op(long long i, atomic64_t *v)              \
 }                                                                      \
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)                               \
-static inline long long                                                        \
-atomic64_##op##_return_relaxed(long long i, atomic64_t *v)             \
+static inline s64                                                      \
+atomic64_##op##_return_relaxed(s64 i, atomic64_t *v)                   \
 {                                                                      \
-       long long result;                                               \
+       s64 result;                                                     \
        unsigned long tmp;                                              \
                                                                        \
        prefetchw(&v->counter);                                         \
@@ -346,10 +346,10 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v)                \
 }
 
 #define ATOMIC64_FETCH_OP(op, op1, op2)                                        \
-static inline long long                                                        \
-atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v)              \
+static inline s64                                                      \
+atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v)                    \
 {                                                                      \
-       long long result, val;                                          \
+       s64 result, val;                                                \
        unsigned long tmp;                                              \
                                                                        \
        prefetchw(&v->counter);                                         \
@@ -403,10 +403,9 @@ ATOMIC64_OPS(xor, eor, eor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline long long
-atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
+static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
 {
-       long long oldval;
+       s64 oldval;
        unsigned long res;
 
        prefetchw(&ptr->counter);
@@ -427,9 +426,9 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
 }
 #define atomic64_cmpxchg_relaxed       atomic64_cmpxchg_relaxed
 
-static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
+static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
 {
-       long long result;
+       s64 result;
        unsigned long tmp;
 
        prefetchw(&ptr->counter);
@@ -447,9 +446,9 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
 }
 #define atomic64_xchg_relaxed          atomic64_xchg_relaxed
 
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
-       long long result;
+       s64 result;
        unsigned long tmp;
 
        smp_mb();
@@ -475,10 +474,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 }
 #define atomic64_dec_if_positive atomic64_dec_if_positive
 
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
-                                                 long long u)
+static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       long long oldval, newval;
+       s64 oldval, newval;
        unsigned long tmp;
 
        smp_mb();
index 23c3786..c8c850b 100644 (file)
@@ -122,9 +122,9 @@ ATOMIC_OPS(xor, eor)
 
 #define ATOMIC64_OP(op, asm_op)                                                \
 __LL_SC_INLINE void                                                    \
-__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v))              \
+__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v))               \
 {                                                                      \
-       long result;                                                    \
+       s64 result;                                                     \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "\n"                            \
@@ -139,10 +139,10 @@ __LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v))         \
 __LL_SC_EXPORT(arch_atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)         \
-__LL_SC_INLINE long                                                    \
-__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
+__LL_SC_INLINE s64                                                     \
+__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
 {                                                                      \
-       long result;                                                    \
+       s64 result;                                                     \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "_return" #name "\n"            \
@@ -161,10 +161,10 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
 __LL_SC_EXPORT(arch_atomic64_##op##_return##name);
 
 #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)          \
-__LL_SC_INLINE long                                                    \
-__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v))  \
+__LL_SC_INLINE s64                                                     \
+__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v))   \
 {                                                                      \
-       long result, val;                                               \
+       s64 result, val;                                                \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_fetch_" #op #name "\n"                \
@@ -214,10 +214,10 @@ ATOMIC64_OPS(xor, eor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-__LL_SC_INLINE long
+__LL_SC_INLINE s64
 __LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
 {
-       long result;
+       s64 result;
        unsigned long tmp;
 
        asm volatile("// atomic64_dec_if_positive\n"
index 45e030d..69acb1c 100644 (file)
@@ -213,9 +213,9 @@ ATOMIC_FETCH_OP_SUB(        , al, "memory")
 
 #define __LL_SC_ATOMIC64(op)   __LL_SC_CALL(arch_atomic64_##op)
 #define ATOMIC64_OP(op, asm_op)                                                \
-static inline void arch_atomic64_##op(long i, atomic64_t *v)           \
+static inline void arch_atomic64_##op(s64 i, atomic64_t *v)            \
 {                                                                      \
-       register long x0 asm ("x0") = i;                                \
+       register s64 x0 asm ("x0") = i;                                 \
        register atomic64_t *x1 asm ("x1") = v;                         \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),        \
@@ -233,9 +233,9 @@ ATOMIC64_OP(add, stadd)
 #undef ATOMIC64_OP
 
 #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)                 \
-static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
 {                                                                      \
-       register long x0 asm ("x0") = i;                                \
+       register s64 x0 asm ("x0") = i;                                 \
        register atomic64_t *x1 asm ("x1") = v;                         \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
@@ -265,9 +265,9 @@ ATOMIC64_FETCH_OPS(add, ldadd)
 #undef ATOMIC64_FETCH_OPS
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)                                \
-static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \
 {                                                                      \
-       register long x0 asm ("x0") = i;                                \
+       register s64 x0 asm ("x0") = i;                                 \
        register atomic64_t *x1 asm ("x1") = v;                         \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
@@ -291,9 +291,9 @@ ATOMIC64_OP_ADD_RETURN(        , al, "memory")
 
 #undef ATOMIC64_OP_ADD_RETURN
 
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
 {
-       register long x0 asm ("x0") = i;
+       register s64 x0 asm ("x0") = i;
        register atomic64_t *x1 asm ("x1") = v;
 
        asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -309,9 +309,9 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
 }
 
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)                         \
-static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v)        \
+static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v)  \
 {                                                                      \
-       register long x0 asm ("x0") = i;                                \
+       register s64 x0 asm ("x0") = i;                                 \
        register atomic64_t *x1 asm ("x1") = v;                         \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
@@ -335,9 +335,9 @@ ATOMIC64_FETCH_OP_AND(        , al, "memory")
 
 #undef ATOMIC64_FETCH_OP_AND
 
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
 {
-       register long x0 asm ("x0") = i;
+       register s64 x0 asm ("x0") = i;
        register atomic64_t *x1 asm ("x1") = v;
 
        asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -353,9 +353,9 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
 }
 
 #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)                                \
-static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \
 {                                                                      \
-       register long x0 asm ("x0") = i;                                \
+       register s64 x0 asm ("x0") = i;                                 \
        register atomic64_t *x1 asm ("x1") = v;                         \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
@@ -381,9 +381,9 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 #undef ATOMIC64_OP_SUB_RETURN
 
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)                         \
-static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v)        \
+static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v)  \
 {                                                                      \
-       register long x0 asm ("x0") = i;                                \
+       register s64 x0 asm ("x0") = i;                                 \
        register atomic64_t *x1 asm ("x1") = v;                         \
                                                                        \
        asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
@@ -407,7 +407,7 @@ ATOMIC64_FETCH_OP_SUB(        , al, "memory")
 
 #undef ATOMIC64_FETCH_OP_SUB
 
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
        register long x0 asm ("x0") = (long)v;
 
index 206530d..50440f3 100644 (file)
@@ -124,10 +124,10 @@ ATOMIC_FETCH_OP(xor, ^)
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op, c_op)                                          \
-static __inline__ long                                                 \
-ia64_atomic64_##op (__s64 i, atomic64_t *v)                            \
+static __inline__ s64                                                  \
+ia64_atomic64_##op (s64 i, atomic64_t *v)                              \
 {                                                                      \
-       __s64 old, new;                                                 \
+       s64 old, new;                                                   \
        CMPXCHG_BUGCHECK_DECL                                           \
                                                                        \
        do {                                                            \
@@ -139,10 +139,10 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v)                               \
 }
 
 #define ATOMIC64_FETCH_OP(op, c_op)                                    \
-static __inline__ long                                                 \
-ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v)                      \
+static __inline__ s64                                                  \
+ia64_atomic64_fetch_##op (s64 i, atomic64_t *v)                                \
 {                                                                      \
-       __s64 old, new;                                                 \
+       s64 old, new;                                                   \
        CMPXCHG_BUGCHECK_DECL                                           \
                                                                        \
        do {                                                            \
@@ -162,7 +162,7 @@ ATOMIC64_OPS(sub, -)
 
 #define atomic64_add_return(i,v)                                       \
 ({                                                                     \
-       long __ia64_aar_i = (i);                                        \
+       s64 __ia64_aar_i = (i);                                         \
        __ia64_atomic_const(i)                                          \
                ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)       \
                : ia64_atomic64_add(__ia64_aar_i, v);                   \
@@ -170,7 +170,7 @@ ATOMIC64_OPS(sub, -)
 
 #define atomic64_sub_return(i,v)                                       \
 ({                                                                     \
-       long __ia64_asr_i = (i);                                        \
+       s64 __ia64_asr_i = (i);                                         \
        __ia64_atomic_const(i)                                          \
                ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)      \
                : ia64_atomic64_sub(__ia64_asr_i, v);                   \
@@ -178,7 +178,7 @@ ATOMIC64_OPS(sub, -)
 
 #define atomic64_fetch_add(i,v)                                                \
 ({                                                                     \
-       long __ia64_aar_i = (i);                                        \
+       s64 __ia64_aar_i = (i);                                         \
        __ia64_atomic_const(i)                                          \
                ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)       \
                : ia64_atomic64_fetch_add(__ia64_aar_i, v);             \
@@ -186,7 +186,7 @@ ATOMIC64_OPS(sub, -)
 
 #define atomic64_fetch_sub(i,v)                                                \
 ({                                                                     \
-       long __ia64_asr_i = (i);                                        \
+       s64 __ia64_asr_i = (i);                                         \
        __ia64_atomic_const(i)                                          \
                ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)      \
                : ia64_atomic64_fetch_sub(__ia64_asr_i, v);             \
index 9409629..9a82dd1 100644 (file)
@@ -254,10 +254,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic64_set(v, i)     WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC64_OP(op, c_op, asm_op)                                        \
-static __inline__ void atomic64_##op(long i, atomic64_t * v)                 \
+static __inline__ void atomic64_##op(s64 i, atomic64_t * v)                  \
 {                                                                            \
        if (kernel_uses_llsc) {                                               \
-               long temp;                                                    \
+               s64 temp;                                                     \
                                                                              \
                loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
@@ -280,12 +280,12 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)                    \
 }
 
 #define ATOMIC64_OP_RETURN(op, c_op, asm_op)                                 \
-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)   \
 {                                                                            \
-       long result;                                                          \
+       s64 result;                                                           \
                                                                              \
        if (kernel_uses_llsc) {                                               \
-               long temp;                                                    \
+               s64 temp;                                                     \
                                                                              \
                loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
@@ -314,12 +314,12 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
 }
 
 #define ATOMIC64_FETCH_OP(op, c_op, asm_op)                                  \
-static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)  \
+static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)    \
 {                                                                            \
-       long result;                                                          \
+       s64 result;                                                           \
                                                                              \
        if (kernel_uses_llsc) {                                               \
-               long temp;                                                    \
+               s64 temp;                                                     \
                                                                              \
                loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
@@ -386,14 +386,14 @@ ATOMIC64_OPS(xor, ^=, xor)
  * Atomically test @v and subtract @i if @v is greater or equal than @i.
  * The function returns the old value of @v minus @i.
  */
-static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
+static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
 {
-       long result;
+       s64 result;
 
        smp_mb__before_llsc();
 
        if (kernel_uses_llsc) {
-               long temp;
+               s64 temp;
 
                __asm__ __volatile__(
                "       .set    push                                    \n"
index 52eafaf..31c231e 100644 (file)
@@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 
 #define ATOMIC64_INIT(i)       { (i) }
 
-static __inline__ long atomic64_read(const atomic64_t *v)
+static __inline__ s64 atomic64_read(const atomic64_t *v)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
 
        return t;
 }
 
-static __inline__ void atomic64_set(atomic64_t *v, long i)
+static __inline__ void atomic64_set(atomic64_t *v, s64 i)
 {
        __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
 }
 
 #define ATOMIC64_OP(op, asm_op)                                                \
-static __inline__ void atomic64_##op(long a, atomic64_t *v)            \
+static __inline__ void atomic64_##op(s64 a, atomic64_t *v)             \
 {                                                                      \
-       long t;                                                         \
+       s64 t;                                                          \
                                                                        \
        __asm__ __volatile__(                                           \
 "1:    ldarx   %0,0,%3         # atomic64_" #op "\n"                   \
@@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v)               \
 }
 
 #define ATOMIC64_OP_RETURN_RELAXED(op, asm_op)                         \
-static inline long                                                     \
-atomic64_##op##_return_relaxed(long a, atomic64_t *v)                  \
+static inline s64                                                      \
+atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)                   \
 {                                                                      \
-       long t;                                                         \
+       s64 t;                                                          \
                                                                        \
        __asm__ __volatile__(                                           \
 "1:    ldarx   %0,0,%3         # atomic64_" #op "_return_relaxed\n"    \
@@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v)                     \
 }
 
 #define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)                          \
-static inline long                                                     \
-atomic64_fetch_##op##_relaxed(long a, atomic64_t *v)                   \
+static inline s64                                                      \
+atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)                    \
 {                                                                      \
-       long res, t;                                                    \
+       s64 res, t;                                                     \
                                                                        \
        __asm__ __volatile__(                                           \
 "1:    ldarx   %0,0,%4         # atomic64_fetch_" #op "_relaxed\n"     \
@@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor)
 
 static __inline__ void atomic64_inc(atomic64_t *v)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__(
 "1:    ldarx   %0,0,%2         # atomic64_inc\n\
@@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
 }
 #define atomic64_inc atomic64_inc
 
-static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__(
 "1:    ldarx   %0,0,%2         # atomic64_inc_return_relaxed\n"
@@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
 
 static __inline__ void atomic64_dec(atomic64_t *v)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__(
 "1:    ldarx   %0,0,%2         # atomic64_dec\n\
@@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
 }
 #define atomic64_dec atomic64_dec
 
-static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__(
 "1:    ldarx   %0,0,%2         # atomic64_dec_return_relaxed\n"
@@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1.
  */
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__(
        PPC_ATOMIC_ENTRY_BARRIER
@@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       long t;
+       s64 t;
 
        __asm__ __volatile__ (
        PPC_ATOMIC_ENTRY_BARRIER
@@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
  */
 static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
 {
-       long t1, t2;
+       s64 t1, t2;
 
        __asm__ __volatile__ (
        PPC_ATOMIC_ENTRY_BARRIER
index 9038aeb..96f95c9 100644 (file)
@@ -38,11 +38,11 @@ static __always_inline void atomic_set(atomic_t *v, int i)
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC64_INIT(i) { (i) }
-static __always_inline long atomic64_read(const atomic64_t *v)
+static __always_inline s64 atomic64_read(const atomic64_t *v)
 {
        return READ_ONCE(v->counter);
 }
-static __always_inline void atomic64_set(atomic64_t *v, long i)
+static __always_inline void atomic64_set(atomic64_t *v, s64 i)
 {
        WRITE_ONCE(v->counter, i);
 }
@@ -66,11 +66,11 @@ void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)         \
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, I)                                      \
-        ATOMIC_OP (op, asm_op, I, w,  int,   )
+        ATOMIC_OP (op, asm_op, I, w, int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, I)                                      \
-        ATOMIC_OP (op, asm_op, I, w,  int,   )                         \
-        ATOMIC_OP (op, asm_op, I, d, long, 64)
+        ATOMIC_OP (op, asm_op, I, w, int,   )                          \
+        ATOMIC_OP (op, asm_op, I, d, s64, 64)
 #endif
 
 ATOMIC_OPS(add, add,  i)
@@ -127,14 +127,14 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v)    \
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, c_op, I)                                        \
-        ATOMIC_FETCH_OP( op, asm_op,       I, w,  int,   )             \
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, w,  int,   )
+        ATOMIC_FETCH_OP( op, asm_op,       I, w, int,   )              \
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, c_op, I)                                        \
-        ATOMIC_FETCH_OP( op, asm_op,       I, w,  int,   )             \
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, w,  int,   )             \
-        ATOMIC_FETCH_OP( op, asm_op,       I, d, long, 64)             \
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64)
+        ATOMIC_FETCH_OP( op, asm_op,       I, w, int,   )              \
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int,   )              \
+        ATOMIC_FETCH_OP( op, asm_op,       I, d, s64, 64)              \
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64)
 #endif
 
 ATOMIC_OPS(add, add, +,  i)
@@ -166,11 +166,11 @@ ATOMIC_OPS(sub, add, +, -i)
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, I)                                      \
-        ATOMIC_FETCH_OP(op, asm_op, I, w,  int,   )
+        ATOMIC_FETCH_OP(op, asm_op, I, w, int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, I)                                      \
-        ATOMIC_FETCH_OP(op, asm_op, I, w,  int,   )                    \
-        ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64)
+        ATOMIC_FETCH_OP(op, asm_op, I, w, int,   )                     \
+        ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64)
 #endif
 
 ATOMIC_OPS(and, and, i)
@@ -219,9 +219,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #define atomic_fetch_add_unless atomic_fetch_add_unless
 
 #ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-       long prev, rc;
+       s64 prev;
+       long rc;
 
        __asm__ __volatile__ (
                "0:     lr.d     %[p],  %[c]\n"
@@ -290,11 +291,11 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS()                                                   \
-       ATOMIC_OP( int,   , 4)
+       ATOMIC_OP(int,   , 4)
 #else
 #define ATOMIC_OPS()                                                   \
-       ATOMIC_OP( int,   , 4)                                          \
-       ATOMIC_OP(long, 64, 8)
+       ATOMIC_OP(int,   , 4)                                           \
+       ATOMIC_OP(s64, 64, 8)
 #endif
 
 ATOMIC_OPS()
@@ -332,9 +333,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
 #define atomic_dec_if_positive(v)      atomic_sub_if_positive(v, 1)
 
 #ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset)
+static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
 {
-       long prev, rc;
+       s64 prev;
+       long rc;
 
        __asm__ __volatile__ (
                "0:     lr.d     %[p],  %[c]\n"
index fd20ab5..491ad53 100644 (file)
@@ -84,9 +84,9 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 
 #define ATOMIC64_INIT(i)  { (i) }
 
-static inline long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
 {
-       long c;
+       s64 c;
 
        asm volatile(
                "       lg      %0,%1\n"
@@ -94,49 +94,49 @@ static inline long atomic64_read(const atomic64_t *v)
        return c;
 }
 
-static inline void atomic64_set(atomic64_t *v, long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
 {
        asm volatile(
                "       stg     %1,%0\n"
                : "=Q" (v->counter) : "d" (i));
 }
 
-static inline long atomic64_add_return(long i, atomic64_t *v)
+static inline s64 atomic64_add_return(s64 i, atomic64_t *v)
 {
-       return __atomic64_add_barrier(i, &v->counter) + i;
+       return __atomic64_add_barrier(i, (long *)&v->counter) + i;
 }
 
-static inline long atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
-       return __atomic64_add_barrier(i, &v->counter);
+       return __atomic64_add_barrier(i, (long *)&v->counter);
 }
 
-static inline void atomic64_add(long i, atomic64_t *v)
+static inline void atomic64_add(s64 i, atomic64_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
        if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
-               __atomic64_add_const(i, &v->counter);
+               __atomic64_add_const(i, (long *)&v->counter);
                return;
        }
 #endif
-       __atomic64_add(i, &v->counter);
+       __atomic64_add(i, (long *)&v->counter);
 }
 
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
-       return __atomic64_cmpxchg(&v->counter, old, new);
+       return __atomic64_cmpxchg((long *)&v->counter, old, new);
 }
 
 #define ATOMIC64_OPS(op)                                               \
-static inline void atomic64_##op(long i, atomic64_t *v)                        \
+static inline void atomic64_##op(s64 i, atomic64_t *v)                 \
 {                                                                      \
-       __atomic64_##op(i, &v->counter);                                \
+       __atomic64_##op(i, (long *)&v->counter);                        \
 }                                                                      \
-static inline long atomic64_fetch_##op(long i, atomic64_t *v)          \
+static inline long atomic64_fetch_##op(s64 i, atomic64_t *v)           \
 {                                                                      \
-       return __atomic64_##op##_barrier(i, &v->counter);               \
+       return __atomic64_##op##_barrier(i, (long *)&v->counter);       \
 }
 
 ATOMIC64_OPS(and)
@@ -145,8 +145,8 @@ ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
-#define atomic64_sub_return(_i, _v)    atomic64_add_return(-(long)(_i), _v)
-#define atomic64_fetch_sub(_i, _v)     atomic64_fetch_add(-(long)(_i), _v)
-#define atomic64_sub(_i, _v)           atomic64_add(-(long)(_i), _v)
+#define atomic64_sub_return(_i, _v)    atomic64_add_return(-(s64)(_i), _v)
+#define atomic64_fetch_sub(_i, _v)     atomic64_fetch_add(-(s64)(_i), _v)
+#define atomic64_sub(_i, _v)           atomic64_add(-(s64)(_i), _v)
 
 #endif /* __ARCH_S390_ATOMIC__  */
index 6b48ca7..3408c0d 100644 (file)
@@ -74,7 +74,7 @@ static void pci_sw_counter_show(struct seq_file *m)
        int i;
 
        for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
-               seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i],
+               seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
                           atomic64_read(counter));
 }
 
index 6963482..b604483 100644 (file)
 
 #define ATOMIC_OP(op)                                                  \
 void atomic_##op(int, atomic_t *);                                     \
-void atomic64_##op(long, atomic64_t *);
+void atomic64_##op(s64, atomic64_t *);
 
 #define ATOMIC_OP_RETURN(op)                                           \
 int atomic_##op##_return(int, atomic_t *);                             \
-long atomic64_##op##_return(long, atomic64_t *);
+s64 atomic64_##op##_return(s64, atomic64_t *);
 
 #define ATOMIC_FETCH_OP(op)                                            \
 int atomic_fetch_##op(int, atomic_t *);                                        \
-long atomic64_fetch_##op(long, atomic64_t *);
+s64 atomic64_fetch_##op(s64, atomic64_t *);
 
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
@@ -61,7 +61,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
        ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-long atomic64_dec_if_positive(atomic64_t *v);
+s64 atomic64_dec_if_positive(atomic64_t *v);
 #define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
index 3cd94a2..ceb712b 100644 (file)
@@ -2179,7 +2179,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
         * For now, this can't happen because all callers hold mmap_sem
         * for write.  If this changes, we'll need a different solution.
         */
-       lockdep_assert_held_exclusive(&mm->mmap_sem);
+       lockdep_assert_held_write(&mm->mmap_sem);
 
        if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
                on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
index ea3d952..115127c 100644 (file)
@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
 {
        asm volatile(LOCK_PREFIX "addl %1,%0"
                     : "+m" (v->counter)
-                    : "ir" (i));
+                    : "ir" (i) : "memory");
 }
 
 /**
@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
 {
        asm volatile(LOCK_PREFIX "subl %1,%0"
                     : "+m" (v->counter)
-                    : "ir" (i));
+                    : "ir" (i) : "memory");
 }
 
 /**
@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
 static __always_inline void arch_atomic_inc(atomic_t *v)
 {
        asm volatile(LOCK_PREFIX "incl %0"
-                    : "+m" (v->counter));
+                    : "+m" (v->counter) :: "memory");
 }
 #define arch_atomic_inc arch_atomic_inc
 
@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
 static __always_inline void arch_atomic_dec(atomic_t *v)
 {
        asm volatile(LOCK_PREFIX "decl %0"
-                    : "+m" (v->counter));
+                    : "+m" (v->counter) :: "memory");
 }
 #define arch_atomic_dec arch_atomic_dec
 
index 6a5b0ec..52cfaec 100644 (file)
@@ -9,7 +9,7 @@
 /* An 64bit atomic type */
 
 typedef struct {
-       u64 __aligned(8) counter;
+       s64 __aligned(8) counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(val)     { (val) }
@@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless);
  * the old value.
  */
 
-static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
-                                             long long n)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
        return arch_cmpxchg64(&v->counter, o, n);
 }
@@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
  * Atomically xchgs the value of @v to @n and returns
  * the old value.
  */
-static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
 {
-       long long o;
+       s64 o;
        unsigned high = (unsigned)(n >> 32);
        unsigned low = (unsigned)n;
        alternative_atomic64(xchg, "=&A" (o),
@@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
  *
  * Atomically sets the value of @v to @n.
  */
-static inline void arch_atomic64_set(atomic64_t *v, long long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
        unsigned high = (unsigned)(i >> 32);
        unsigned low = (unsigned)i;
@@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i)
  *
  * Atomically reads the value of @v and returns it.
  */
-static inline long long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
 {
-       long long r;
+       s64 r;
        alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
        return r;
 }
@@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v)
  *
  * Atomically adds @i to @v and returns @i + *@v
  */
-static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
 {
        alternative_atomic64(add_return,
                             ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
 /*
  * Other variants with different arithmetic operators:
  */
-static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
 {
        alternative_atomic64(sub_return,
                             ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
        return i;
 }
 
-static inline long long arch_atomic64_inc_return(atomic64_t *v)
+static inline s64 arch_atomic64_inc_return(atomic64_t *v)
 {
-       long long a;
+       s64 a;
        alternative_atomic64(inc_return, "=&A" (a),
                             "S" (v) : "memory", "ecx");
        return a;
 }
 #define arch_atomic64_inc_return arch_atomic64_inc_return
 
-static inline long long arch_atomic64_dec_return(atomic64_t *v)
+static inline s64 arch_atomic64_dec_return(atomic64_t *v)
 {
-       long long a;
+       s64 a;
        alternative_atomic64(dec_return, "=&A" (a),
                             "S" (v) : "memory", "ecx");
        return a;
@@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
  *
  * Atomically adds @i to @v.
  */
-static inline long long arch_atomic64_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
 {
        __alternative_atomic64(add, add_return,
                               ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v)
  *
  * Atomically subtracts @i from @v.
  */
-static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
 {
        __alternative_atomic64(sub, sub_return,
                               ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns non-zero if the add was done, zero otherwise.
  */
-static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
-                                          long long u)
+static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
        unsigned low = (unsigned)u;
        unsigned high = (unsigned)(u >> 32);
@@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 }
 #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 
-static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
-       long long r;
+       s64 r;
        alternative_atomic64(dec_if_positive, "=&A" (r),
                             "S" (v) : "ecx", "memory");
        return r;
@@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
 #undef alternative_atomic64
 #undef __alternative_atomic64
 
-static inline void arch_atomic64_and(long long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
                c = old;
 }
 
-static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
                c = old;
@@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
        return old;
 }
 
-static inline void arch_atomic64_or(long long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
                c = old;
 }
 
-static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
                c = old;
@@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
        return old;
 }
 
-static inline void arch_atomic64_xor(long long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
                c = old;
 }
 
-static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
                c = old;
@@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
        return old;
 }
 
-static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
-       long long old, c = 0;
+       s64 old, c = 0;
 
        while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
                c = old;
index dadc20a..95c6cea 100644 (file)
@@ -17,7 +17,7 @@
  * Atomically reads the value of @v.
  * Doesn't imply a read memory barrier.
  */
-static inline long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
 {
        return READ_ONCE((v)->counter);
 }
@@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v)
  *
  * Atomically sets the value of @v to @i.
  */
-static inline void arch_atomic64_set(atomic64_t *v, long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
        WRITE_ONCE(v->counter, i);
 }
@@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i)
  *
  * Atomically adds @i to @v.
  */
-static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "addq %1,%0"
                     : "=m" (v->counter)
-                    : "er" (i), "m" (v->counter));
+                    : "er" (i), "m" (v->counter) : "memory");
 }
 
 /**
@@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
  *
  * Atomically subtracts @i from @v.
  */
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "subq %1,%0"
                     : "=m" (v->counter)
-                    : "er" (i), "m" (v->counter));
+                    : "er" (i), "m" (v->counter) : "memory");
 }
 
 /**
@@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
        return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
 }
@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "incq %0"
                     : "=m" (v->counter)
-                    : "m" (v->counter));
+                    : "m" (v->counter) : "memory");
 }
 #define arch_atomic64_inc arch_atomic64_inc
 
@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "decq %0"
                     : "=m" (v->counter)
-                    : "m" (v->counter));
+                    : "m" (v->counter) : "memory");
 }
 #define arch_atomic64_dec arch_atomic64_dec
 
@@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
+static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
 {
        return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
 }
@@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
  *
  * Atomically adds @i to @v and returns @i + @v
  */
-static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
 {
        return i + xadd(&v->counter, i);
 }
 
-static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
 {
        return arch_atomic64_add_return(-i, v);
 }
 
-static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
        return xadd(&v->counter, i);
 }
 
-static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
        return xadd(&v->counter, -i);
 }
 
-static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
        return arch_cmpxchg(&v->counter, old, new);
 }
 
 #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
        return try_cmpxchg(&v->counter, old, new);
 }
 
-static inline long arch_atomic64_xchg(atomic64_t *v, long new)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
 {
        return arch_xchg(&v->counter, new);
 }
 
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "andq %1,%0"
                        : "+m" (v->counter)
@@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
                        : "memory");
 }
 
-static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
        s64 val = arch_atomic64_read(v);
 
@@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
        return val;
 }
 
-static inline void arch_atomic64_or(long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "orq %1,%0"
                        : "+m" (v->counter)
@@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v)
                        : "memory");
 }
 
-static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
        s64 val = arch_atomic64_read(v);
 
@@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
        return val;
 }
 
-static inline void arch_atomic64_xor(long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
 {
        asm volatile(LOCK_PREFIX "xorq %1,%0"
                        : "+m" (v->counter)
@@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v)
                        : "memory");
 }
 
-static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
        s64 val = arch_atomic64_read(v);
 
index 14de043..84f848c 100644 (file)
@@ -80,8 +80,8 @@ do {                                                                  \
 })
 
 /* Atomic operations are already serializing on x86 */
-#define __smp_mb__before_atomic()      barrier()
-#define __smp_mb__after_atomic()       barrier()
+#define __smp_mb__before_atomic()      do { } while (0)
+#define __smp_mb__after_atomic()       do { } while (0)
 
 #include <asm-generic/barrier.h>
 
index 8f3bee8..187ce59 100644 (file)
@@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
 
 static inline struct pt_regs *get_irq_regs(void)
 {
-       return this_cpu_read(irq_regs);
+       return __this_cpu_read(irq_regs);
 }
 
 static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
        struct pt_regs *old_regs;
 
        old_regs = get_irq_regs();
-       this_cpu_write(irq_regs, new_regs);
+       __this_cpu_write(irq_regs, new_regs);
 
        return old_regs;
 }
index 65191ce..06c3cc2 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
+#define HAVE_JUMP_LABEL_BATCH
+
 #define JUMP_LABEL_NOP_SIZE 5
 
 #ifdef CONFIG_X86_64
index 1a19d11..2278797 100644 (file)
@@ -87,7 +87,7 @@
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
 
-#define percpu_to_op(op, var, val)                     \
+#define percpu_to_op(qual, op, var, val)               \
 do {                                                   \
        typedef typeof(var) pto_T__;                    \
        if (0) {                                        \
@@ -97,22 +97,22 @@ do {                                                        \
        }                                               \
        switch (sizeof(var)) {                          \
        case 1:                                         \
-               asm(op "b %1,"__percpu_arg(0)           \
+               asm qual (op "b %1,"__percpu_arg(0)     \
                    : "+m" (var)                        \
                    : "qi" ((pto_T__)(val)));           \
                break;                                  \
        case 2:                                         \
-               asm(op "w %1,"__percpu_arg(0)           \
+               asm qual (op "w %1,"__percpu_arg(0)     \
                    : "+m" (var)                        \
                    : "ri" ((pto_T__)(val)));           \
                break;                                  \
        case 4:                                         \
-               asm(op "l %1,"__percpu_arg(0)           \
+               asm qual (op "l %1,"__percpu_arg(0)     \
                    : "+m" (var)                        \
                    : "ri" ((pto_T__)(val)));           \
                break;                                  \
        case 8:                                         \
-               asm(op "q %1,"__percpu_arg(0)           \
+               asm qual (op "q %1,"__percpu_arg(0)     \
                    : "+m" (var)                        \
                    : "re" ((pto_T__)(val)));           \
                break;                                  \
@@ -124,7 +124,7 @@ do {                                                        \
  * Generate a percpu add to memory instruction and optimize code
  * if one is added or subtracted.
  */
-#define percpu_add_op(var, val)                                                \
+#define percpu_add_op(qual, var, val)                                  \
 do {                                                                   \
        typedef typeof(var) pao_T__;                                    \
        const int pao_ID__ = (__builtin_constant_p(val) &&              \
@@ -138,41 +138,41 @@ do {                                                                      \
        switch (sizeof(var)) {                                          \
        case 1:                                                         \
                if (pao_ID__ == 1)                                      \
-                       asm("incb "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
                else if (pao_ID__ == -1)                                \
-                       asm("decb "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
                else                                                    \
-                       asm("addb %1, "__percpu_arg(0)                  \
+                       asm qual ("addb %1, "__percpu_arg(0)            \
                            : "+m" (var)                                \
                            : "qi" ((pao_T__)(val)));                   \
                break;                                                  \
        case 2:                                                         \
                if (pao_ID__ == 1)                                      \
-                       asm("incw "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
                else if (pao_ID__ == -1)                                \
-                       asm("decw "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
                else                                                    \
-                       asm("addw %1, "__percpu_arg(0)                  \
+                       asm qual ("addw %1, "__percpu_arg(0)            \
                            : "+m" (var)                                \
                            : "ri" ((pao_T__)(val)));                   \
                break;                                                  \
        case 4:                                                         \
                if (pao_ID__ == 1)                                      \
-                       asm("incl "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
                else if (pao_ID__ == -1)                                \
-                       asm("decl "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
                else                                                    \
-                       asm("addl %1, "__percpu_arg(0)                  \
+                       asm qual ("addl %1, "__percpu_arg(0)            \
                            : "+m" (var)                                \
                            : "ri" ((pao_T__)(val)));                   \
                break;                                                  \
        case 8:                                                         \
                if (pao_ID__ == 1)                                      \
-                       asm("incq "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
                else if (pao_ID__ == -1)                                \
-                       asm("decq "__percpu_arg(0) : "+m" (var));       \
+                       asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
                else                                                    \
-                       asm("addq %1, "__percpu_arg(0)                  \
+                       asm qual ("addq %1, "__percpu_arg(0)            \
                            : "+m" (var)                                \
                            : "re" ((pao_T__)(val)));                   \
                break;                                                  \
@@ -180,27 +180,27 @@ do {                                                                      \
        }                                                               \
 } while (0)
 
-#define percpu_from_op(op, var)                                \
+#define percpu_from_op(qual, op, var)                  \
 ({                                                     \
        typeof(var) pfo_ret__;                          \
        switch (sizeof(var)) {                          \
        case 1:                                         \
-               asm volatile(op "b "__percpu_arg(1)",%0"\
+               asm qual (op "b "__percpu_arg(1)",%0"   \
                    : "=q" (pfo_ret__)                  \
                    : "m" (var));                       \
                break;                                  \
        case 2:                                         \
-               asm volatile(op "w "__percpu_arg(1)",%0"\
+               asm qual (op "w "__percpu_arg(1)",%0"   \
                    : "=r" (pfo_ret__)                  \
                    : "m" (var));                       \
                break;                                  \
        case 4:                                         \
-               asm volatile(op "l "__percpu_arg(1)",%0"\
+               asm qual (op "l "__percpu_arg(1)",%0"   \
                    : "=r" (pfo_ret__)                  \
                    : "m" (var));                       \
                break;                                  \
        case 8:                                         \
-               asm volatile(op "q "__percpu_arg(1)",%0"\
+               asm qual (op "q "__percpu_arg(1)",%0"   \
                    : "=r" (pfo_ret__)                  \
                    : "m" (var));                       \
                break;                                  \
@@ -238,23 +238,23 @@ do {                                                                      \
        pfo_ret__;                                      \
 })
 
-#define percpu_unary_op(op, var)                       \
+#define percpu_unary_op(qual, op, var)                 \
 ({                                                     \
        switch (sizeof(var)) {                          \
        case 1:                                         \
-               asm(op "b "__percpu_arg(0)              \
+               asm qual (op "b "__percpu_arg(0)        \
                    : "+m" (var));                      \
                break;                                  \
        case 2:                                         \
-               asm(op "w "__percpu_arg(0)              \
+               asm qual (op "w "__percpu_arg(0)        \
                    : "+m" (var));                      \
                break;                                  \
        case 4:                                         \
-               asm(op "l "__percpu_arg(0)              \
+               asm qual (op "l "__percpu_arg(0)        \
                    : "+m" (var));                      \
                break;                                  \
        case 8:                                         \
-               asm(op "q "__percpu_arg(0)              \
+               asm qual (op "q "__percpu_arg(0)        \
                    : "+m" (var));                      \
                break;                                  \
        default: __bad_percpu_size();                   \
@@ -264,27 +264,27 @@ do {                                                                      \
 /*
  * Add return operation
  */
-#define percpu_add_return_op(var, val)                                 \
+#define percpu_add_return_op(qual, var, val)                           \
 ({                                                                     \
        typeof(var) paro_ret__ = val;                                   \
        switch (sizeof(var)) {                                          \
        case 1:                                                         \
-               asm("xaddb %0, "__percpu_arg(1)                         \
+               asm qual ("xaddb %0, "__percpu_arg(1)                   \
                            : "+q" (paro_ret__), "+m" (var)             \
                            : : "memory");                              \
                break;                                                  \
        case 2:                                                         \
-               asm("xaddw %0, "__percpu_arg(1)                         \
+               asm qual ("xaddw %0, "__percpu_arg(1)                   \
                            : "+r" (paro_ret__), "+m" (var)             \
                            : : "memory");                              \
                break;                                                  \
        case 4:                                                         \
-               asm("xaddl %0, "__percpu_arg(1)                         \
+               asm qual ("xaddl %0, "__percpu_arg(1)                   \
                            : "+r" (paro_ret__), "+m" (var)             \
                            : : "memory");                              \
                break;                                                  \
        case 8:                                                         \
-               asm("xaddq %0, "__percpu_arg(1)                         \
+               asm qual ("xaddq %0, "__percpu_arg(1)                   \
                            : "+re" (paro_ret__), "+m" (var)            \
                            : : "memory");                              \
                break;                                                  \
@@ -299,13 +299,13 @@ do {                                                                      \
  * expensive due to the implied lock prefix.  The processor cannot prefetch
  * cachelines if xchg is used.
  */
-#define percpu_xchg_op(var, nval)                                      \
+#define percpu_xchg_op(qual, var, nval)                                        \
 ({                                                                     \
        typeof(var) pxo_ret__;                                          \
        typeof(var) pxo_new__ = (nval);                                 \
        switch (sizeof(var)) {                                          \
        case 1:                                                         \
-               asm("\n\tmov "__percpu_arg(1)",%%al"                    \
+               asm qual ("\n\tmov "__percpu_arg(1)",%%al"              \
                    "\n1:\tcmpxchgb %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
                            : "=&a" (pxo_ret__), "+m" (var)             \
@@ -313,7 +313,7 @@ do {                                                                        \
                            : "memory");                                \
                break;                                                  \
        case 2:                                                         \
-               asm("\n\tmov "__percpu_arg(1)",%%ax"                    \
+               asm qual ("\n\tmov "__percpu_arg(1)",%%ax"              \
                    "\n1:\tcmpxchgw %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
                            : "=&a" (pxo_ret__), "+m" (var)             \
@@ -321,7 +321,7 @@ do {                                                                        \
                            : "memory");                                \
                break;                                                  \
        case 4:                                                         \
-               asm("\n\tmov "__percpu_arg(1)",%%eax"                   \
+               asm qual ("\n\tmov "__percpu_arg(1)",%%eax"             \
                    "\n1:\tcmpxchgl %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
                            : "=&a" (pxo_ret__), "+m" (var)             \
@@ -329,7 +329,7 @@ do {                                                                        \
                            : "memory");                                \
                break;                                                  \
        case 8:                                                         \
-               asm("\n\tmov "__percpu_arg(1)",%%rax"                   \
+               asm qual ("\n\tmov "__percpu_arg(1)",%%rax"             \
                    "\n1:\tcmpxchgq %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
                            : "=&a" (pxo_ret__), "+m" (var)             \
@@ -345,32 +345,32 @@ do {                                                                      \
  * cmpxchg has no such implied lock semantics as a result it is much
  * more efficient for cpu local operations.
  */
-#define percpu_cmpxchg_op(var, oval, nval)                             \
+#define percpu_cmpxchg_op(qual, var, oval, nval)                       \
 ({                                                                     \
        typeof(var) pco_ret__;                                          \
        typeof(var) pco_old__ = (oval);                                 \
        typeof(var) pco_new__ = (nval);                                 \
        switch (sizeof(var)) {                                          \
        case 1:                                                         \
-               asm("cmpxchgb %2, "__percpu_arg(1)                      \
+               asm qual ("cmpxchgb %2, "__percpu_arg(1)                \
                            : "=a" (pco_ret__), "+m" (var)              \
                            : "q" (pco_new__), "0" (pco_old__)          \
                            : "memory");                                \
                break;                                                  \
        case 2:                                                         \
-               asm("cmpxchgw %2, "__percpu_arg(1)                      \
+               asm qual ("cmpxchgw %2, "__percpu_arg(1)                \
                            : "=a" (pco_ret__), "+m" (var)              \
                            : "r" (pco_new__), "0" (pco_old__)          \
                            : "memory");                                \
                break;                                                  \
        case 4:                                                         \
-               asm("cmpxchgl %2, "__percpu_arg(1)                      \
+               asm qual ("cmpxchgl %2, "__percpu_arg(1)                \
                            : "=a" (pco_ret__), "+m" (var)              \
                            : "r" (pco_new__), "0" (pco_old__)          \
                            : "memory");                                \
                break;                                                  \
        case 8:                                                         \
-               asm("cmpxchgq %2, "__percpu_arg(1)                      \
+               asm qual ("cmpxchgq %2, "__percpu_arg(1)                \
                            : "=a" (pco_ret__), "+m" (var)              \
                            : "r" (pco_new__), "0" (pco_old__)          \
                            : "memory");                                \
@@ -391,58 +391,70 @@ do {                                                                      \
  */
 #define this_cpu_read_stable(var)      percpu_stable_op("mov", var)
 
-#define raw_cpu_read_1(pcp)            percpu_from_op("mov", pcp)
-#define raw_cpu_read_2(pcp)            percpu_from_op("mov", pcp)
-#define raw_cpu_read_4(pcp)            percpu_from_op("mov", pcp)
-
-#define raw_cpu_write_1(pcp, val)      percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val)      percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val)      percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val)                percpu_add_op((pcp), val)
-#define raw_cpu_add_2(pcp, val)                percpu_add_op((pcp), val)
-#define raw_cpu_add_4(pcp, val)                percpu_add_op((pcp), val)
-#define raw_cpu_and_1(pcp, val)                percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_2(pcp, val)                percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_4(pcp, val)                percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_1(pcp, val)         percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_2(pcp, val)         percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_4(pcp, val)         percpu_to_op("or", (pcp), val)
-#define raw_cpu_xchg_1(pcp, val)       percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val)       percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val)       percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp)           percpu_from_op("mov", pcp)
-#define this_cpu_read_2(pcp)           percpu_from_op("mov", pcp)
-#define this_cpu_read_4(pcp)           percpu_from_op("mov", pcp)
-#define this_cpu_write_1(pcp, val)     percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_2(pcp, val)     percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_4(pcp, val)     percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val)       percpu_add_op((pcp), val)
-#define this_cpu_add_2(pcp, val)       percpu_add_op((pcp), val)
-#define this_cpu_add_4(pcp, val)       percpu_add_op((pcp), val)
-#define this_cpu_and_1(pcp, val)       percpu_to_op("and", (pcp), val)
-#define this_cpu_and_2(pcp, val)       percpu_to_op("and", (pcp), val)
-#define this_cpu_and_4(pcp, val)       percpu_to_op("and", (pcp), val)
-#define this_cpu_or_1(pcp, val)                percpu_to_op("or", (pcp), val)
-#define this_cpu_or_2(pcp, val)                percpu_to_op("or", (pcp), val)
-#define this_cpu_or_4(pcp, val)                percpu_to_op("or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval)     percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval)     percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval)     percpu_xchg_op(pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val)         percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_2(pcp, val)         percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_4(pcp, val)         percpu_add_return_op(pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval)     percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval)     percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval)     percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val)                percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val)                percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val)                percpu_add_return_op(pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval)    percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval)    percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval)    percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_1(pcp)            percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_2(pcp)            percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_4(pcp)            percpu_from_op(, "mov", pcp)
+
+#define raw_cpu_write_1(pcp, val)      percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val)      percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val)      percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val)                percpu_add_op(, (pcp), val)
+#define raw_cpu_add_2(pcp, val)                percpu_add_op(, (pcp), val)
+#define raw_cpu_add_4(pcp, val)                percpu_add_op(, (pcp), val)
+#define raw_cpu_and_1(pcp, val)                percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val)                percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val)                percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val)         percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val)         percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val)         percpu_to_op(, "or", (pcp), val)
+
+/*
+ * raw_cpu_xchg() can use a load-store since it is not required to be
+ * IRQ-safe.
+ */
+#define raw_percpu_xchg_op(var, nval)                                  \
+({                                                                     \
+       typeof(var) pxo_ret__ = raw_cpu_read(var);                      \
+       raw_cpu_write(var, (nval));                                     \
+       pxo_ret__;                                                      \
+})
+
+#define raw_cpu_xchg_1(pcp, val)       raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val)       raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val)       raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_read_1(pcp)           percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_2(pcp)           percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_4(pcp)           percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val)     percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val)     percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val)     percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_1(pcp, val)       percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val)       percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val)       percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val)       percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val)       percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val)       percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val)                percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val)                percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val)                percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval)     percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_2(pcp, nval)     percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_4(pcp, nval)     percpu_xchg_op(volatile, pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val)         percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_2(pcp, val)         percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_4(pcp, val)         percpu_add_return_op(, pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval)     percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val)                percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val)                percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val)                percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
 
 #ifdef CONFIG_X86_CMPXCHG64
 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)            \
@@ -466,23 +478,23 @@ do {                                                                      \
  * 32 bit must fall back to generic operations.
  */
 #ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp)                    percpu_from_op("mov", pcp)
-#define raw_cpu_write_8(pcp, val)              percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val)                        percpu_add_op((pcp), val)
-#define raw_cpu_and_8(pcp, val)                        percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_8(pcp, val)                 percpu_to_op("or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val)         percpu_add_return_op(pcp, val)
-#define raw_cpu_xchg_8(pcp, nval)              percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval)     percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp)                   percpu_from_op("mov", pcp)
-#define this_cpu_write_8(pcp, val)             percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val)               percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val)               percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val)                        percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val)                percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval)             percpu_xchg_op(pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_8(pcp)                    percpu_from_op("mov", pcp)
+#define raw_cpu_write_8(pcp, val)              percpu_to_op("mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val)                        percpu_add_op((pcp), val)
+#define raw_cpu_and_8(pcp, val)                        percpu_to_op("and", (pcp), val)
+#define raw_cpu_or_8(pcp, val)                 percpu_to_op("or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val)         percpu_add_return_op(pcp, val)
+#define raw_cpu_xchg_8(pcp, nval)              raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval)     percpu_cmpxchg_op(pcp, oval, nval)
+
+#define this_cpu_read_8(pcp)                   percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val)             percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val)               percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val)               percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val)                        percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val)                percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval)             percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(volatile, pcp, oval, nval)
 
 /*
  * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
index da545df..0d3fe06 100644 (file)
@@ -162,7 +162,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+#define raw_smp_processor_id()  this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
 
 #ifdef CONFIG_X86_32
 extern int safe_smp_processor_id(void);
index 880b551..d83e9f7 100644 (file)
@@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end NULL
 #endif
 
+/*
+ * Currently, the max observed size in the kernel code is
+ * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
+ * Raise it if needed.
+ */
+#define POKE_MAX_OPCODE_SIZE   5
+
+struct text_poke_loc {
+       void *detour;
+       void *addr;
+       size_t len;
+       const char opcode[POKE_MAX_OPCODE_SIZE];
+};
+
 extern void text_poke_early(void *addr, const void *opcode, size_t len);
 
 /*
@@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
 extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
 extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
index 390596b..bd542f9 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kdebug.h>
 #include <linux/kprobes.h>
 #include <linux/mmu_context.h>
+#include <linux/bsearch.h>
 #include <asm/text-patching.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
@@ -848,81 +849,133 @@ static void do_sync_core(void *info)
        sync_core();
 }
 
-static bool bp_patching_in_progress;
-static void *bp_int3_handler, *bp_int3_addr;
+static struct bp_patching_desc {
+       struct text_poke_loc *vec;
+       int nr_entries;
+} bp_patching;
+
+static int patch_cmp(const void *key, const void *elt)
+{
+       struct text_poke_loc *tp = (struct text_poke_loc *) elt;
+
+       if (key < tp->addr)
+               return -1;
+       if (key > tp->addr)
+               return 1;
+       return 0;
+}
+NOKPROBE_SYMBOL(patch_cmp);
 
 int poke_int3_handler(struct pt_regs *regs)
 {
+       struct text_poke_loc *tp;
+       unsigned char int3 = 0xcc;
+       void *ip;
+
        /*
         * Having observed our INT3 instruction, we now must observe
-        * bp_patching_in_progress.
+        * bp_patching.nr_entries.
         *
-        *      in_progress = TRUE              INT3
+        *      nr_entries != 0                 INT3
         *      WMB                             RMB
-        *      write INT3                      if (in_progress)
+        *      write INT3                      if (nr_entries)
         *
-        * Idem for bp_int3_handler.
+        * Idem for other elements in bp_patching.
         */
        smp_rmb();
 
-       if (likely(!bp_patching_in_progress))
+       if (likely(!bp_patching.nr_entries))
                return 0;
 
-       if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
+       if (user_mode(regs))
                return 0;
 
-       /* set up the specified breakpoint handler */
-       regs->ip = (unsigned long) bp_int3_handler;
+       /*
+        * Discount the sizeof(int3). See text_poke_bp_batch().
+        */
+       ip = (void *) regs->ip - sizeof(int3);
+
+       /*
+        * Skip the binary search if there is a single member in the vector.
+        */
+       if (unlikely(bp_patching.nr_entries > 1)) {
+               tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
+                            sizeof(struct text_poke_loc),
+                            patch_cmp);
+               if (!tp)
+                       return 0;
+       } else {
+               tp = bp_patching.vec;
+               if (tp->addr != ip)
+                       return 0;
+       }
+
+       /* set up the specified breakpoint detour */
+       regs->ip = (unsigned long) tp->detour;
 
        return 1;
 }
 NOKPROBE_SYMBOL(poke_int3_handler);
 
 /**
- * text_poke_bp() -- update instructions on live kernel on SMP
- * @addr:      address to patch
- * @opcode:    opcode of new instruction
- * @len:       length to copy
- * @handler:   address to jump to when the temporary breakpoint is hit
+ * text_poke_bp_batch() -- update instructions on live kernel on SMP
+ * @tp:                        vector of instructions to patch
+ * @nr_entries:                number of entries in the vector
  *
  * Modify multi-byte instruction by using int3 breakpoint on SMP.
  * We completely avoid stop_machine() here, and achieve the
  * synchronization using int3 breakpoint.
  *
  * The way it is done:
- *     - add a int3 trap to the address that will be patched
+ *     - For each entry in the vector:
+ *             - add a int3 trap to the address that will be patched
  *     - sync cores
- *     - update all but the first byte of the patched range
+ *     - For each entry in the vector:
+ *             - update all but the first byte of the patched range
  *     - sync cores
- *     - replace the first byte (int3) by the first byte of
- *       replacing opcode
+ *     - For each entry in the vector:
+ *             - replace the first byte (int3) by the first byte of
+ *               replacing opcode
  *     - sync cores
  */
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 {
+       int patched_all_but_first = 0;
        unsigned char int3 = 0xcc;
-
-       bp_int3_handler = handler;
-       bp_int3_addr = (u8 *)addr + sizeof(int3);
-       bp_patching_in_progress = true;
+       unsigned int i;
 
        lockdep_assert_held(&text_mutex);
 
+       bp_patching.vec = tp;
+       bp_patching.nr_entries = nr_entries;
+
        /*
         * Corresponding read barrier in int3 notifier for making sure the
-        * in_progress and handler are correctly ordered wrt. patching.
+        * nr_entries and handler are correctly ordered wrt. patching.
         */
        smp_wmb();
 
-       text_poke(addr, &int3, sizeof(int3));
+       /*
+        * First step: add a int3 trap to the address that will be patched.
+        */
+       for (i = 0; i < nr_entries; i++)
+               text_poke(tp[i].addr, &int3, sizeof(int3));
 
        on_each_cpu(do_sync_core, NULL, 1);
 
-       if (len - sizeof(int3) > 0) {
-               /* patch all but the first byte */
-               text_poke((char *)addr + sizeof(int3),
-                         (const char *) opcode + sizeof(int3),
-                         len - sizeof(int3));
+       /*
+        * Second step: update all but the first byte of the patched range.
+        */
+       for (i = 0; i < nr_entries; i++) {
+               if (tp[i].len - sizeof(int3) > 0) {
+                       text_poke((char *)tp[i].addr + sizeof(int3),
+                                 (const char *)tp[i].opcode + sizeof(int3),
+                                 tp[i].len - sizeof(int3));
+                       patched_all_but_first++;
+               }
+       }
+
+       if (patched_all_but_first) {
                /*
                 * According to Intel, this core syncing is very likely
                 * not necessary and we'd be safe even without it. But
@@ -931,14 +984,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
                on_each_cpu(do_sync_core, NULL, 1);
        }
 
-       /* patch the first byte */
-       text_poke(addr, opcode, sizeof(int3));
+       /*
+        * Third step: replace the first byte (int3) by the first byte of
+        * replacing opcode.
+        */
+       for (i = 0; i < nr_entries; i++)
+               text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
 
        on_each_cpu(do_sync_core, NULL, 1);
        /*
         * sync_core() implies an smp_mb() and orders this store against
         * the writing of the new instruction.
         */
-       bp_patching_in_progress = false;
+       bp_patching.vec = NULL;
+       bp_patching.nr_entries = 0;
 }
 
+/**
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr:      address to patch
+ * @opcode:    opcode of new instruction
+ * @len:       length to copy
+ * @handler:   address to jump to when the temporary breakpoint is hit
+ *
+ * Update a single instruction with the vector in the stack, avoiding
+ * dynamically allocated memory. This function should be used when it is
+ * not possible to allocate memory.
+ */
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+{
+       struct text_poke_loc tp = {
+               .detour = handler,
+               .addr = addr,
+               .len = len,
+       };
+
+       if (len > POKE_MAX_OPCODE_SIZE) {
+               WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
+               return;
+       }
+
+       memcpy((void *)tp.opcode, opcode, len);
+
+       text_poke_bp_batch(&tp, 1);
+}
index e631c35..0440532 100644 (file)
@@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line)
        BUG();
 }
 
-static void __ref __jump_label_transform(struct jump_entry *entry,
-                                        enum jump_label_type type,
-                                        int init)
+static void __jump_label_set_jump_code(struct jump_entry *entry,
+                                      enum jump_label_type type,
+                                      union jump_code_union *code,
+                                      int init)
 {
-       union jump_code_union jmp;
        const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
        const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
-       const void *expect, *code;
+       const void *expect;
        int line;
 
-       jmp.jump = 0xe9;
-       jmp.offset = jump_entry_target(entry) -
-                    (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+       code->jump = 0xe9;
+       code->offset = jump_entry_target(entry) -
+                      (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
 
-       if (type == JUMP_LABEL_JMP) {
-               if (init) {
-                       expect = default_nop; line = __LINE__;
-               } else {
-                       expect = ideal_nop; line = __LINE__;
-               }
-
-               code = &jmp.code;
+       if (init) {
+               expect = default_nop; line = __LINE__;
+       } else if (type == JUMP_LABEL_JMP) {
+               expect = ideal_nop; line = __LINE__;
        } else {
-               if (init) {
-                       expect = default_nop; line = __LINE__;
-               } else {
-                       expect = &jmp.code; line = __LINE__;
-               }
-
-               code = ideal_nop;
+               expect = code->code; line = __LINE__;
        }
 
        if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
                bug_at((void *)jump_entry_code(entry), line);
 
+       if (type == JUMP_LABEL_NOP)
+               memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
+}
+
+static void __ref __jump_label_transform(struct jump_entry *entry,
+                                        enum jump_label_type type,
+                                        int init)
+{
+       union jump_code_union code;
+
+       __jump_label_set_jump_code(entry, type, &code, init);
+
        /*
         * As long as only a single processor is running and the code is still
         * not marked as RO, text_poke_early() can be used; Checking that
@@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
         * always nop being the 'currently valid' instruction
         */
        if (init || system_state == SYSTEM_BOOTING) {
-               text_poke_early((void *)jump_entry_code(entry), code,
+               text_poke_early((void *)jump_entry_code(entry), &code,
                                JUMP_LABEL_NOP_SIZE);
                return;
        }
 
-       text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
+       text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
                     (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
 }
 
@@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry,
        mutex_unlock(&text_mutex);
 }
 
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+                                    enum jump_label_type type)
+{
+       struct text_poke_loc *tp;
+       void *entry_code;
+
+       if (system_state == SYSTEM_BOOTING) {
+               /*
+                * Fallback to the non-batching mode.
+                */
+               arch_jump_label_transform(entry, type);
+               return true;
+       }
+
+       /*
+        * No more space in the vector, tell upper layer to apply
+        * the queue before continuing.
+        */
+       if (tp_vec_nr == TP_VEC_MAX)
+               return false;
+
+       tp = &tp_vec[tp_vec_nr];
+
+       entry_code = (void *)jump_entry_code(entry);
+
+       /*
+        * The INT3 handler will do a bsearch in the queue, so we need entries
+        * to be sorted. We can survive an unsorted list by rejecting the entry,
+        * forcing the generic jump_label code to apply the queue. Warning once,
+        * to raise the attention to the case of an unsorted entry that is
+        * better not happen, because, in the worst case we will perform in the
+        * same way as we do without batching - with some more overhead.
+        */
+       if (tp_vec_nr > 0) {
+               int prev = tp_vec_nr - 1;
+               struct text_poke_loc *prev_tp = &tp_vec[prev];
+
+               if (WARN_ON_ONCE(prev_tp->addr > entry_code))
+                       return false;
+       }
+
+       __jump_label_set_jump_code(entry, type,
+                                  (union jump_code_union *) &tp->opcode, 0);
+
+       tp->addr = entry_code;
+       tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
+       tp->len = JUMP_LABEL_NOP_SIZE;
+
+       tp_vec_nr++;
+
+       return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+       if (!tp_vec_nr)
+               return;
+
+       mutex_lock(&text_mutex);
+       text_poke_bp_batch(tp_vec, tp_vec_nr);
+       mutex_unlock(&text_mutex);
+
+       tp_vec_nr = 0;
+}
+
 static enum {
        JL_STATE_START,
        JL_STATE_NO_UPDATE,
index 5c4aa60..2de5e36 100644 (file)
@@ -856,7 +856,7 @@ static ssize_t nx842_##_name##_show(struct device *dev,             \
        rcu_read_lock();                                                \
        local_devdata = rcu_dereference(devdata);                       \
        if (local_devdata)                                              \
-               p = snprintf(buf, PAGE_SIZE, "%ld\n",                   \
+               p = snprintf(buf, PAGE_SIZE, "%lld\n",                  \
                       atomic64_read(&local_devdata->counters->_name)); \
        rcu_read_unlock();                                              \
        return p;                                                       \
@@ -909,7 +909,7 @@ static ssize_t nx842_timehist_show(struct device *dev,
        }
 
        for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
-               bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
+               bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n",
                               i ? (2<<(i-1)) : 0, (2<<i)-1,
                               atomic64_read(&times[i]));
                bytes_remain -= bytes;
@@ -917,7 +917,7 @@ static ssize_t nx842_timehist_show(struct device *dev,
        }
        /* The last bucket holds everything over
         * 2<<(NX842_HIST_SLOTS - 2) us */
-       bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
+       bytes = snprintf(p, bytes_remain, "%uus - :\t%lld\n",
                        2<<(NX842_HIST_SLOTS - 2),
                        atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
        p += bytes;
index 29f7b15..d020bb4 100644 (file)
@@ -457,7 +457,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name)
        int rc;
        int i;
 
-       lockdep_assert_held_exclusive(&devices_rwsem);
+       lockdep_assert_held_write(&devices_rwsem);
        ida_init(&inuse);
        xa_for_each (&devices, index, device) {
                char buf[IB_DEVICE_NAME_MAX];
index e38f104..fde8d40 100644 (file)
@@ -487,7 +487,7 @@ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
 
 static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 {
-       lockdep_assert_held_exclusive(&tty->ldisc_sem);
+       lockdep_assert_held_write(&tty->ldisc_sem);
        WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
        clear_bit(TTY_LDISC_OPEN, &tty->flags);
        if (ld->ops->close)
@@ -509,7 +509,7 @@ static int tty_ldisc_failto(struct tty_struct *tty, int ld)
        struct tty_ldisc *disc = tty_ldisc_get(tty, ld);
        int r;
 
-       lockdep_assert_held_exclusive(&tty->ldisc_sem);
+       lockdep_assert_held_write(&tty->ldisc_sem);
        if (IS_ERR(disc))
                return PTR_ERR(disc);
        tty->ldisc = disc;
@@ -633,7 +633,7 @@ EXPORT_SYMBOL_GPL(tty_set_ldisc);
  */
 static void tty_ldisc_kill(struct tty_struct *tty)
 {
-       lockdep_assert_held_exclusive(&tty->ldisc_sem);
+       lockdep_assert_held_write(&tty->ldisc_sem);
        if (!tty->ldisc)
                return;
        /*
@@ -681,7 +681,7 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
        struct tty_ldisc *ld;
        int retval;
 
-       lockdep_assert_held_exclusive(&tty->ldisc_sem);
+       lockdep_assert_held_write(&tty->ldisc_sem);
        ld = tty_ldisc_get(tty, disc);
        if (IS_ERR(ld)) {
                BUG_ON(disc == N_TTY);
index d2c90bf..fe5e338 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1187,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
        unsigned flags = 0;
 
        if (iov_iter_rw(iter) == WRITE) {
-               lockdep_assert_held_exclusive(&inode->i_rwsem);
+               lockdep_assert_held_write(&inode->i_rwsem);
                flags |= IOMAP_WRITE;
        } else {
                lockdep_assert_held(&inode->i_rwsem);
index d7a1509..370f01d 100644 (file)
 #include <linux/types.h>
 
 typedef struct {
-       long long counter;
+       s64 counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(i)       { (i) }
 
-extern long long atomic64_read(const atomic64_t *v);
-extern void     atomic64_set(atomic64_t *v, long long i);
+extern s64 atomic64_read(const atomic64_t *v);
+extern void atomic64_set(atomic64_t *v, s64 i);
 
 #define atomic64_set_release(v, i)     atomic64_set((v), (i))
 
 #define ATOMIC64_OP(op)                                                        \
-extern void     atomic64_##op(long long a, atomic64_t *v);
+extern void     atomic64_##op(s64 a, atomic64_t *v);
 
 #define ATOMIC64_OP_RETURN(op)                                         \
-extern long long atomic64_##op##_return(long long a, atomic64_t *v);
+extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
 
 #define ATOMIC64_FETCH_OP(op)                                          \
-extern long long atomic64_fetch_##op(long long a, atomic64_t *v);
+extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
 
 #define ATOMIC64_OPS(op)       ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
@@ -46,11 +46,11 @@ ATOMIC64_OPS(xor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern s64 atomic64_dec_if_positive(atomic64_t *v);
 #define atomic64_dec_if_positive atomic64_dec_if_positive
-extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
-extern long long atomic64_xchg(atomic64_t *v, long long new);
-extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
+extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
+extern s64 atomic64_xchg(atomic64_t *v, s64 new);
+extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
index 3e113a1..3526c0a 100644 (file)
@@ -215,6 +215,9 @@ extern void arch_jump_label_transform(struct jump_entry *entry,
                                      enum jump_label_type type);
 extern void arch_jump_label_transform_static(struct jump_entry *entry,
                                             enum jump_label_type type);
+extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
+                                           enum jump_label_type type);
+extern void arch_jump_label_transform_apply(void);
 extern int jump_label_text_reserved(void *start, void *end);
 extern void static_key_slow_inc(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
index e8eef38..57baa27 100644 (file)
@@ -203,11 +203,17 @@ struct lock_list {
        struct lock_list                *parent;
 };
 
-/*
- * We record lock dependency chains, so that we can cache them:
+/**
+ * struct lock_chain - lock dependency chain record
+ *
+ * @irq_context: the same as irq_context in held_lock below
+ * @depth:       the number of held locks in this chain
+ * @base:        the index in chain_hlocks for this chain
+ * @entry:       the collided lock chains in lock_chain hash list
+ * @chain_key:   the hash key of this lock_chain
  */
 struct lock_chain {
-       /* see BUILD_BUG_ON()s in lookup_chain_cache() */
+       /* see BUILD_BUG_ON()s in add_chain_cache() */
        unsigned int                    irq_context :  2,
                                        depth       :  6,
                                        base        : 24;
@@ -217,12 +223,8 @@ struct lock_chain {
 };
 
 #define MAX_LOCKDEP_KEYS_BITS          13
-/*
- * Subtract one because we offset hlock->class_idx by 1 in order
- * to make 0 mean no class. This avoids overflowing the class_idx
- * bitfield and hitting the BUG in hlock_class().
- */
-#define MAX_LOCKDEP_KEYS               ((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
+#define MAX_LOCKDEP_KEYS               (1UL << MAX_LOCKDEP_KEYS_BITS)
+#define INITIAL_CHAIN_KEY              -1
 
 struct held_lock {
        /*
@@ -247,6 +249,11 @@ struct held_lock {
        u64                             waittime_stamp;
        u64                             holdtime_stamp;
 #endif
+       /*
+        * class_idx is zero-indexed; it points to the element in
+        * lock_classes this held lock instance belongs to. class_idx is in
+        * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive.
+        */
        unsigned int                    class_idx:MAX_LOCKDEP_KEYS_BITS;
        /*
         * The lock-stack is unified in that the lock chains of interrupt
@@ -281,6 +288,8 @@ extern void lockdep_free_key_range(void *start, unsigned long size);
 extern asmlinkage void lockdep_sys_exit(void);
 extern void lockdep_set_selftest_task(struct task_struct *task);
 
+extern void lockdep_init_task(struct task_struct *task);
+
 extern void lockdep_off(void);
 extern void lockdep_on(void);
 
@@ -385,7 +394,7 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
                WARN_ON(debug_locks && !lockdep_is_held(l));    \
        } while (0)
 
-#define lockdep_assert_held_exclusive(l)       do {                    \
+#define lockdep_assert_held_write(l)   do {                    \
                WARN_ON(debug_locks && !lockdep_is_held_type(l, 0));    \
        } while (0)
 
@@ -405,6 +414,10 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 
 #else /* !CONFIG_LOCKDEP */
 
+static inline void lockdep_init_task(struct task_struct *task)
+{
+}
+
 static inline void lockdep_off(void)
 {
 }
@@ -466,7 +479,7 @@ struct lockdep_map { };
 #define lockdep_is_held_type(l, r)             (1)
 
 #define lockdep_assert_held(l)                 do { (void)(l); } while (0)
-#define lockdep_assert_held_exclusive(l)       do { (void)(l); } while (0)
+#define lockdep_assert_held_write(l)   do { (void)(l); } while (0)
 #define lockdep_assert_held_read(l)            do { (void)(l); } while (0)
 #define lockdep_assert_held_once(l)            do { (void)(l); } while (0)
 
@@ -497,7 +510,6 @@ enum xhlock_context_t {
        { .name = (_name), .key = (void *)(_key), }
 
 static inline void lockdep_invariant_state(bool force) {}
-static inline void lockdep_init_task(struct task_struct *task) {}
 static inline void lockdep_free_task(struct task_struct *task) {}
 
 #ifdef CONFIG_LOCK_STAT
index 2809b44..3998cdf 100644 (file)
@@ -121,7 +121,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
        lock_release(&sem->rw_sem.dep_map, 1, ip);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        if (!read)
-               sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
+               atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
 #endif
 }
 
@@ -131,7 +131,7 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
        lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        if (!read)
-               sem->rw_sem.owner = current;
+               atomic_long_set(&sem->rw_sem.owner, (long)current);
 #endif
 }
 
index 2ea18a3..e401358 100644 (file)
  */
 struct rw_semaphore {
        atomic_long_t count;
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        /*
-        * Write owner. Used as a speculative check to see
-        * if the owner is running on the cpu.
+        * Write owner or one of the read owners as well flags regarding
+        * the current state of the rwsem. Can be used as a speculative
+        * check to see if the write owner is running on the cpu.
         */
-       struct task_struct *owner;
+       atomic_long_t owner;
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        struct optimistic_spin_queue osq; /* spinner MCS lock */
 #endif
        raw_spinlock_t wait_lock;
@@ -50,10 +51,10 @@ struct rw_semaphore {
 };
 
 /*
- * Setting bit 1 of the owner field but not bit 0 will indicate
+ * Setting all bits of the owner field except bit 0 will indicate
  * that the rwsem is writer-owned with an unknown owner.
  */
-#define RWSEM_OWNER_UNKNOWN    ((struct task_struct *)-2L)
+#define RWSEM_OWNER_UNKNOWN    (-2L)
 
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
@@ -73,13 +74,14 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 #endif
 
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL
+#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
 #else
 #define __RWSEM_OPT_INIT(lockname)
 #endif
 
 #define __RWSEM_INITIALIZER(name)                              \
        { __RWSEM_INIT_COUNT(name),                             \
+         .owner = ATOMIC_LONG_INIT(0),                         \
          .wait_list = LIST_HEAD_INIT((name).wait_list),        \
          .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
          __RWSEM_OPT_INIT(name)                                \
index ad826d2..26a2013 100644 (file)
@@ -51,6 +51,11 @@ static inline void wake_q_init(struct wake_q_head *head)
        head->lastp = &head->first;
 }
 
+static inline bool wake_q_empty(struct wake_q_head *head)
+{
+       return head->first == WAKE_Q_TAIL;
+}
+
 extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
 extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
index bb8b451..6fc856c 100644 (file)
@@ -180,29 +180,46 @@ static inline int get_boot_cpu_id(void)
 
 #endif /* !SMP */
 
-/*
- * smp_processor_id(): get the current CPU ID.
+/**
+ * raw_processor_id() - get the current (unstable) CPU id
+ *
+ * For then you know what you are doing and need an unstable
+ * CPU id.
+ */
+
+/**
+ * smp_processor_id() - get the current (stable) CPU id
+ *
+ * This is the normal accessor to the CPU id and should be used
+ * whenever possible.
+ *
+ * The CPU id is stable when:
  *
- * if DEBUG_PREEMPT is enabled then we check whether it is
- * used in a preemption-safe way. (smp_processor_id() is safe
- * if it's used in a preemption-off critical section, or in
- * a thread that is bound to the current CPU.)
+ *  - IRQs are disabled;
+ *  - preemption is disabled;
+ *  - the task is CPU affine.
  *
- * NOTE: raw_smp_processor_id() is for internal use only
- * (smp_processor_id() is the preferred variant), but in rare
- * instances it might also be used to turn off false positives
- * (i.e. smp_processor_id() use that the debugging code reports but
- * which use for some reason is legal). Don't use this to hack around
- * the warning message, as your code might not work under PREEMPT.
+ * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
+ * when smp_processor_id() is used when the CPU id is not stable.
  */
+
+/*
+ * Allow the architecture to differentiate between a stable and unstable read.
+ * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
+ * regular asm read for the stable.
+ */
+#ifndef __smp_processor_id
+#define __smp_processor_id(x) raw_smp_processor_id(x)
+#endif
+
 #ifdef CONFIG_DEBUG_PREEMPT
   extern unsigned int debug_smp_processor_id(void);
 # define smp_processor_id() debug_smp_processor_id()
 #else
-# define smp_processor_id() raw_smp_processor_id()
+# define smp_processor_id() __smp_processor_id()
 #endif
 
-#define get_cpu()              ({ preempt_disable(); smp_processor_id(); })
+#define get_cpu()              ({ preempt_disable(); __smp_processor_id(); })
 #define put_cpu()              preempt_enable()
 
 /*
index 231114a..05030f6 100644 (file)
@@ -174,7 +174,7 @@ typedef struct {
 
 #ifdef CONFIG_64BIT
 typedef struct {
-       long counter;
+       s64 counter;
 } atomic64_t;
 #endif
 
index c70ef65..afa6ad7 100644 (file)
@@ -166,6 +166,8 @@ struct task_struct init_task
        .softirqs_enabled = 1,
 #endif
 #ifdef CONFIG_LOCKDEP
+       .lockdep_depth = 0, /* no locks held yet */
+       .curr_chain_key = INITIAL_CHAIN_KEY,
        .lockdep_recursion = 0,
 #endif
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
index a83ef72..d18e343 100644 (file)
@@ -1952,9 +1952,6 @@ static __latent_entropy struct task_struct *copy_process(
        p->pagefault_disabled = 0;
 
 #ifdef CONFIG_LOCKDEP
-       p->lockdep_depth = 0; /* no locks held yet */
-       p->curr_chain_key = 0;
-       p->lockdep_recursion = 0;
        lockdep_init_task(p);
 #endif
 
index 4b5b468..6d50728 100644 (file)
@@ -470,6 +470,37 @@ enum futex_access {
        FUTEX_WRITE
 };
 
+/**
+ * futex_setup_timer - set up the sleeping hrtimer.
+ * @time:      ptr to the given timeout value
+ * @timeout:   the hrtimer_sleeper structure to be set up
+ * @flags:     futex flags
+ * @range_ns:  optional range in ns
+ *
+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
+ *        value given
+ */
+static inline struct hrtimer_sleeper *
+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
+                 int flags, u64 range_ns)
+{
+       if (!time)
+               return NULL;
+
+       hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
+                             CLOCK_REALTIME : CLOCK_MONOTONIC,
+                             HRTIMER_MODE_ABS);
+       hrtimer_init_sleeper(timeout, current);
+
+       /*
+        * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
+        * effectively the same as calling hrtimer_set_expires().
+        */
+       hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
+
+       return timeout;
+}
+
 /**
  * get_futex_key() - Get parameters which are the keys for a futex
  * @uaddr:     virtual address of the futex
@@ -2679,7 +2710,7 @@ out:
 static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
                      ktime_t *abs_time, u32 bitset)
 {
-       struct hrtimer_sleeper timeout, *to = NULL;
+       struct hrtimer_sleeper timeout, *to;
        struct restart_block *restart;
        struct futex_hash_bucket *hb;
        struct futex_q q = futex_q_init;
@@ -2689,17 +2720,8 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
                return -EINVAL;
        q.bitset = bitset;
 
-       if (abs_time) {
-               to = &timeout;
-
-               hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
-                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
-                                     HRTIMER_MODE_ABS);
-               hrtimer_init_sleeper(to, current);
-               hrtimer_set_expires_range_ns(&to->timer, *abs_time,
-                                            current->timer_slack_ns);
-       }
-
+       to = futex_setup_timer(abs_time, &timeout, flags,
+                              current->timer_slack_ns);
 retry:
        /*
         * Prepare to wait on uaddr. On success, holds hb lock and increments
@@ -2779,7 +2801,7 @@ static long futex_wait_restart(struct restart_block *restart)
 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
                         ktime_t *time, int trylock)
 {
-       struct hrtimer_sleeper timeout, *to = NULL;
+       struct hrtimer_sleeper timeout, *to;
        struct futex_pi_state *pi_state = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct futex_hash_bucket *hb;
@@ -2792,13 +2814,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
        if (refill_pi_state_cache())
                return -ENOMEM;
 
-       if (time) {
-               to = &timeout;
-               hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
-                                     HRTIMER_MODE_ABS);
-               hrtimer_init_sleeper(to, current);
-               hrtimer_set_expires(&to->timer, *time);
-       }
+       to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
 
 retry:
        ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
@@ -3195,7 +3211,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                                 u32 val, ktime_t *abs_time, u32 bitset,
                                 u32 __user *uaddr2)
 {
-       struct hrtimer_sleeper timeout, *to = NULL;
+       struct hrtimer_sleeper timeout, *to;
        struct futex_pi_state *pi_state = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct futex_hash_bucket *hb;
@@ -3212,15 +3228,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
        if (!bitset)
                return -EINVAL;
 
-       if (abs_time) {
-               to = &timeout;
-               hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
-                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
-                                     HRTIMER_MODE_ABS);
-               hrtimer_init_sleeper(to, current);
-               hrtimer_set_expires_range_ns(&to->timer, *abs_time,
-                                            current->timer_slack_ns);
-       }
+       to = futex_setup_timer(abs_time, &timeout, flags,
+                              current->timer_slack_ns);
 
        /*
         * The waiter is allocated on our stack, manipulated by the requeue
index 0bfa10f..df30084 100644 (file)
@@ -37,12 +37,26 @@ static int jump_label_cmp(const void *a, const void *b)
        const struct jump_entry *jea = a;
        const struct jump_entry *jeb = b;
 
+       /*
+        * Entrires are sorted by key.
+        */
        if (jump_entry_key(jea) < jump_entry_key(jeb))
                return -1;
 
        if (jump_entry_key(jea) > jump_entry_key(jeb))
                return 1;
 
+       /*
+        * In the batching mode, entries should also be sorted by the code
+        * inside the already sorted list of entries, enabling a bsearch in
+        * the vector.
+        */
+       if (jump_entry_code(jea) < jump_entry_code(jeb))
+               return -1;
+
+       if (jump_entry_code(jea) > jump_entry_code(jeb))
+               return 1;
+
        return 0;
 }
 
@@ -384,25 +398,55 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
        return enabled ^ branch;
 }
 
+static bool jump_label_can_update(struct jump_entry *entry, bool init)
+{
+       /*
+        * Cannot update code that was in an init text area.
+        */
+       if (!init && jump_entry_is_init(entry))
+               return false;
+
+       if (!kernel_text_address(jump_entry_code(entry))) {
+               WARN_ONCE(1, "can't patch jump_label at %pS", (void *)jump_entry_code(entry));
+               return false;
+       }
+
+       return true;
+}
+
+#ifndef HAVE_JUMP_LABEL_BATCH
 static void __jump_label_update(struct static_key *key,
                                struct jump_entry *entry,
                                struct jump_entry *stop,
                                bool init)
 {
        for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
-               /*
-                * An entry->code of 0 indicates an entry which has been
-                * disabled because it was in an init text area.
-                */
-               if (init || !jump_entry_is_init(entry)) {
-                       if (kernel_text_address(jump_entry_code(entry)))
-                               arch_jump_label_transform(entry, jump_label_type(entry));
-                       else
-                               WARN_ONCE(1, "can't patch jump_label at %pS",
-                                         (void *)jump_entry_code(entry));
+               if (jump_label_can_update(entry, init))
+                       arch_jump_label_transform(entry, jump_label_type(entry));
+       }
+}
+#else
+static void __jump_label_update(struct static_key *key,
+                               struct jump_entry *entry,
+                               struct jump_entry *stop,
+                               bool init)
+{
+       for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
+
+               if (!jump_label_can_update(entry, init))
+                       continue;
+
+               if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) {
+                       /*
+                        * Queue is full: Apply the current queue and try again.
+                        */
+                       arch_jump_label_transform_apply();
+                       BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry)));
                }
        }
+       arch_jump_label_transform_apply();
 }
+#endif
 
 void __init jump_label_init(void)
 {
index 6fe2f33..45452fa 100644 (file)
@@ -3,7 +3,7 @@
 # and is generally not a function of system call inputs.
 KCOV_INSTRUMENT                := n
 
-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
+obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
index 46b71af..8c7e7d2 100644 (file)
@@ -31,50 +31,13 @@ enum lock_events {
 DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
 
 /*
- * The purpose of the lock event counting subsystem is to provide a low
- * overhead way to record the number of specific locking events by using
- * percpu counters. It is the percpu sum that matters, not specifically
- * how many of them happens in each cpu.
- *
- * It is possible that the same percpu counter may be modified in both
- * the process and interrupt contexts. For architectures that perform
- * percpu operation with multiple instructions, it is possible to lose
- * count if a process context percpu update is interrupted in the middle
- * and the same counter is updated in the interrupt context. Therefore,
- * the generated percpu sum may not be precise. The error, if any, should
- * be small and insignificant.
- *
- * For those architectures that do multi-instruction percpu operation,
- * preemption in the middle and moving the task to another cpu may cause
- * a larger error in the count. Again, this will be few and far between.
- * Given the imprecise nature of the count and the possibility of resetting
- * the count and doing the measurement again, this is not really a big
- * problem.
- *
- * To get a better picture of what is happening under the hood, it is
- * suggested that a few measurements should be taken with the counts
- * reset in between to stamp out outliner because of these possible
- * error conditions.
- *
- * To minimize overhead, we use __this_cpu_*() in all cases except when
- * CONFIG_DEBUG_PREEMPT is defined. In this particular case, this_cpu_*()
- * will be used to avoid the appearance of unwanted BUG messages.
- */
-#ifdef CONFIG_DEBUG_PREEMPT
-#define lockevent_percpu_inc(x)                this_cpu_inc(x)
-#define lockevent_percpu_add(x, v)     this_cpu_add(x, v)
-#else
-#define lockevent_percpu_inc(x)                __this_cpu_inc(x)
-#define lockevent_percpu_add(x, v)     __this_cpu_add(x, v)
-#endif
-
-/*
- * Increment the PV qspinlock statistical counters
+ * Increment the statistical counters. use raw_cpu_inc() because of lower
+ * overhead and we don't care if we loose the occasional update.
  */
 static inline void __lockevent_inc(enum lock_events event, bool cond)
 {
        if (cond)
-               lockevent_percpu_inc(lockevents[event]);
+               raw_cpu_inc(lockevents[event]);
 }
 
 #define lockevent_inc(ev)        __lockevent_inc(LOCKEVENT_ ##ev, true)
@@ -82,7 +45,7 @@ static inline void __lockevent_inc(enum lock_events event, bool cond)
 
 static inline void __lockevent_add(enum lock_events event, int inc)
 {
-       lockevent_percpu_add(lockevents[event], inc);
+       raw_cpu_add(lockevents[event], inc);
 }
 
 #define lockevent_add(ev, c)   __lockevent_add(LOCKEVENT_ ##ev, c)
index ad7668c..239039d 100644 (file)
@@ -56,12 +56,16 @@ LOCK_EVENT(rwsem_sleep_reader)      /* # of reader sleeps                   */
 LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps                   */
 LOCK_EVENT(rwsem_wake_reader)  /* # of reader wakeups                  */
 LOCK_EVENT(rwsem_wake_writer)  /* # of writer wakeups                  */
-LOCK_EVENT(rwsem_opt_wlock)    /* # of write locks opt-spin acquired   */
-LOCK_EVENT(rwsem_opt_fail)     /* # of failed opt-spinnings            */
+LOCK_EVENT(rwsem_opt_rlock)    /* # of opt-acquired read locks         */
+LOCK_EVENT(rwsem_opt_wlock)    /* # of opt-acquired write locks        */
+LOCK_EVENT(rwsem_opt_fail)     /* # of failed optspins                 */
+LOCK_EVENT(rwsem_opt_nospin)   /* # of disabled optspins               */
+LOCK_EVENT(rwsem_opt_norspin)  /* # of disabled reader-only optspins   */
+LOCK_EVENT(rwsem_opt_rlock2)   /* # of opt-acquired 2ndary read locks  */
 LOCK_EVENT(rwsem_rlock)                /* # of read locks acquired             */
 LOCK_EVENT(rwsem_rlock_fast)   /* # of fast read locks acquired        */
 LOCK_EVENT(rwsem_rlock_fail)   /* # of failed read lock acquisitions   */
-LOCK_EVENT(rwsem_rtrylock)     /* # of read trylock calls              */
+LOCK_EVENT(rwsem_rlock_handoff)        /* # of read lock handoffs              */
 LOCK_EVENT(rwsem_wlock)                /* # of write locks acquired            */
 LOCK_EVENT(rwsem_wlock_fail)   /* # of failed write lock acquisitions  */
-LOCK_EVENT(rwsem_wtrylock)     /* # of write trylock calls             */
+LOCK_EVENT(rwsem_wlock_handoff)        /* # of write lock handoffs             */
index c47788f..341f521 100644 (file)
@@ -151,17 +151,28 @@ unsigned long nr_lock_classes;
 static
 #endif
 struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
 
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
 {
-       if (!hlock->class_idx) {
+       unsigned int class_idx = hlock->class_idx;
+
+       /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfield */
+       barrier();
+
+       if (!test_bit(class_idx, lock_classes_in_use)) {
                /*
                 * Someone passed in garbage, we give up.
                 */
                DEBUG_LOCKS_WARN_ON(1);
                return NULL;
        }
-       return lock_classes + hlock->class_idx - 1;
+
+       /*
+        * At this point, if the passed hlock->class_idx is still garbage,
+        * we just have to live with it
+        */
+       return lock_classes + class_idx;
 }
 
 #ifdef CONFIG_LOCK_STAT
@@ -359,6 +370,13 @@ static inline u64 iterate_chain_key(u64 key, u32 idx)
        return k0 | (u64)k1 << 32;
 }
 
+void lockdep_init_task(struct task_struct *task)
+{
+       task->lockdep_depth = 0; /* no locks held yet */
+       task->curr_chain_key = INITIAL_CHAIN_KEY;
+       task->lockdep_recursion = 0;
+}
+
 void lockdep_off(void)
 {
        current->lockdep_recursion++;
@@ -419,13 +437,6 @@ static int verbose(struct lock_class *class)
        return 0;
 }
 
-/*
- * Stack-trace: tightly packed array of stack backtrace
- * addresses. Protected by the graph_lock.
- */
-unsigned long nr_stack_trace_entries;
-static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
-
 static void print_lockdep_off(const char *bug_msg)
 {
        printk(KERN_DEBUG "%s\n", bug_msg);
@@ -435,6 +446,15 @@ static void print_lockdep_off(const char *bug_msg)
 #endif
 }
 
+unsigned long nr_stack_trace_entries;
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the graph_lock.
+ */
+static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
+
 static int save_trace(struct lock_trace *trace)
 {
        unsigned long *entries = stack_trace + nr_stack_trace_entries;
@@ -457,6 +477,7 @@ static int save_trace(struct lock_trace *trace)
 
        return 1;
 }
+#endif
 
 unsigned int nr_hardirq_chains;
 unsigned int nr_softirq_chains;
@@ -470,6 +491,7 @@ unsigned int max_lockdep_depth;
 DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
 #endif
 
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
 /*
  * Locking printouts:
  */
@@ -487,6 +509,7 @@ static const char *usage_str[] =
 #undef LOCKDEP_STATE
        [LOCK_USED] = "INITIAL USE",
 };
+#endif
 
 const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
 {
@@ -500,15 +523,26 @@ static inline unsigned long lock_flag(enum lock_usage_bit bit)
 
 static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
 {
+       /*
+        * The usage character defaults to '.' (i.e., irqs disabled and not in
+        * irq context), which is the safest usage category.
+        */
        char c = '.';
 
-       if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
+       /*
+        * The order of the following usage checks matters, which will
+        * result in the outcome character as follows:
+        *
+        * - '+': irq is enabled and not in irq context
+        * - '-': in irq context and irq is disabled
+        * - '?': in irq context and irq is enabled
+        */
+       if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) {
                c = '+';
-       if (class->usage_mask & lock_flag(bit)) {
-               c = '-';
-               if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
+               if (class->usage_mask & lock_flag(bit))
                        c = '?';
-       }
+       } else if (class->usage_mask & lock_flag(bit))
+               c = '-';
 
        return c;
 }
@@ -572,19 +606,22 @@ static void print_lock(struct held_lock *hlock)
        /*
         * We can be called locklessly through debug_show_all_locks() so be
         * extra careful, the hlock might have been released and cleared.
+        *
+        * If this indeed happens, lets pretend it does not hurt to continue
+        * to print the lock unless the hlock class_idx does not point to a
+        * registered class. The rationale here is: since we don't attempt
+        * to distinguish whether we are in this situation, if it just
+        * happened we can't count on class_idx to tell either.
         */
-       unsigned int class_idx = hlock->class_idx;
-
-       /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */
-       barrier();
+       struct lock_class *lock = hlock_class(hlock);
 
-       if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) {
+       if (!lock) {
                printk(KERN_CONT "<RELEASED>\n");
                return;
        }
 
        printk(KERN_CONT "%p", hlock->instance);
-       print_lock_name(lock_classes + class_idx - 1);
+       print_lock_name(lock);
        printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
 }
 
@@ -732,7 +769,8 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
                         * Huh! same key, different name? Did someone trample
                         * on some memory? We're most confused.
                         */
-                       WARN_ON_ONCE(class->name != lock->name);
+                       WARN_ON_ONCE(class->name != lock->name &&
+                                    lock->key != &__lockdep_no_validate__);
                        return class;
                }
        }
@@ -838,11 +876,11 @@ static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS];
 static bool check_lock_chain_key(struct lock_chain *chain)
 {
 #ifdef CONFIG_PROVE_LOCKING
-       u64 chain_key = 0;
+       u64 chain_key = INITIAL_CHAIN_KEY;
        int i;
 
        for (i = chain->base; i < chain->base + chain->depth; i++)
-               chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1);
+               chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
        /*
         * The 'unsigned long long' casts avoid that a compiler warning
         * is reported when building tools/lib/lockdep.
@@ -1117,6 +1155,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
                return NULL;
        }
        nr_lock_classes++;
+       __set_bit(class - lock_classes, lock_classes_in_use);
        debug_atomic_inc(nr_unused_locks);
        class->key = key;
        class->name = lock->name;
@@ -1228,13 +1267,17 @@ static int add_lock_to_list(struct lock_class *this,
 #define CQ_MASK                                (MAX_CIRCULAR_QUEUE_SIZE-1)
 
 /*
- * The circular_queue and helpers is used to implement the
- * breadth-first search(BFS)algorithem, by which we can build
- * the shortest path from the next lock to be acquired to the
- * previous held lock if there is a circular between them.
+ * The circular_queue and helpers are used to implement graph
+ * breadth-first search (BFS) algorithm, by which we can determine
+ * whether there is a path from a lock to another. In deadlock checks,
+ * a path from the next lock to be acquired to a previous held lock
+ * indicates that adding the <prev> -> <next> lock dependency will
+ * produce a circle in the graph. Breadth-first search instead of
+ * depth-first search is used in order to find the shortest (circular)
+ * path.
  */
 struct circular_queue {
-       unsigned long element[MAX_CIRCULAR_QUEUE_SIZE];
+       struct lock_list *element[MAX_CIRCULAR_QUEUE_SIZE];
        unsigned int  front, rear;
 };
 
@@ -1260,7 +1303,7 @@ static inline int __cq_full(struct circular_queue *cq)
        return ((cq->rear + 1) & CQ_MASK) == cq->front;
 }
 
-static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
+static inline int __cq_enqueue(struct circular_queue *cq, struct lock_list *elem)
 {
        if (__cq_full(cq))
                return -1;
@@ -1270,14 +1313,21 @@ static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
        return 0;
 }
 
-static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
+/*
+ * Dequeue an element from the circular_queue, return a lock_list if
+ * the queue is not empty, or NULL if otherwise.
+ */
+static inline struct lock_list * __cq_dequeue(struct circular_queue *cq)
 {
+       struct lock_list * lock;
+
        if (__cq_empty(cq))
-               return -1;
+               return NULL;
 
-       *elem = cq->element[cq->front];
+       lock = cq->element[cq->front];
        cq->front = (cq->front + 1) & CQ_MASK;
-       return 0;
+
+       return lock;
 }
 
 static inline unsigned int  __cq_get_elem_count(struct circular_queue *cq)
@@ -1322,13 +1372,32 @@ static inline int get_lock_depth(struct lock_list *child)
        return depth;
 }
 
+/*
+ * Return the forward or backward dependency list.
+ *
+ * @lock:   the lock_list to get its class's dependency list
+ * @offset: the offset to struct lock_class to determine whether it is
+ *          locks_after or locks_before
+ */
+static inline struct list_head *get_dep_list(struct lock_list *lock, int offset)
+{
+       void *lock_class = lock->class;
+
+       return lock_class + offset;
+}
+
+/*
+ * Forward- or backward-dependency search, used for both circular dependency
+ * checking and hardirq-unsafe/softirq-unsafe checking.
+ */
 static int __bfs(struct lock_list *source_entry,
                 void *data,
                 int (*match)(struct lock_list *entry, void *data),
                 struct lock_list **target_entry,
-                int forward)
+                int offset)
 {
        struct lock_list *entry;
+       struct lock_list *lock;
        struct list_head *head;
        struct circular_queue *cq = &lock_cq;
        int ret = 1;
@@ -1339,31 +1408,21 @@ static int __bfs(struct lock_list *source_entry,
                goto exit;
        }
 
-       if (forward)
-               head = &source_entry->class->locks_after;
-       else
-               head = &source_entry->class->locks_before;
-
+       head = get_dep_list(source_entry, offset);
        if (list_empty(head))
                goto exit;
 
        __cq_init(cq);
-       __cq_enqueue(cq, (unsigned long)source_entry);
+       __cq_enqueue(cq, source_entry);
 
-       while (!__cq_empty(cq)) {
-               struct lock_list *lock;
-
-               __cq_dequeue(cq, (unsigned long *)&lock);
+       while ((lock = __cq_dequeue(cq))) {
 
                if (!lock->class) {
                        ret = -2;
                        goto exit;
                }
 
-               if (forward)
-                       head = &lock->class->locks_after;
-               else
-                       head = &lock->class->locks_before;
+               head = get_dep_list(lock, offset);
 
                DEBUG_LOCKS_WARN_ON(!irqs_disabled());
 
@@ -1377,7 +1436,7 @@ static int __bfs(struct lock_list *source_entry,
                                        goto exit;
                                }
 
-                               if (__cq_enqueue(cq, (unsigned long)entry)) {
+                               if (__cq_enqueue(cq, entry)) {
                                        ret = -1;
                                        goto exit;
                                }
@@ -1396,7 +1455,8 @@ static inline int __bfs_forwards(struct lock_list *src_entry,
                        int (*match)(struct lock_list *entry, void *data),
                        struct lock_list **target_entry)
 {
-       return __bfs(src_entry, data, match, target_entry, 1);
+       return __bfs(src_entry, data, match, target_entry,
+                    offsetof(struct lock_class, locks_after));
 
 }
 
@@ -1405,16 +1465,11 @@ static inline int __bfs_backwards(struct lock_list *src_entry,
                        int (*match)(struct lock_list *entry, void *data),
                        struct lock_list **target_entry)
 {
-       return __bfs(src_entry, data, match, target_entry, 0);
+       return __bfs(src_entry, data, match, target_entry,
+                    offsetof(struct lock_class, locks_before));
 
 }
 
-/*
- * Recursive, forwards-direction lock-dependency checking, used for
- * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
- * checking.
- */
-
 static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
 {
        unsigned long *entries = stack_trace + trace->offset;
@@ -1426,16 +1481,15 @@ static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
  * Print a dependency chain entry (this is only done when a deadlock
  * has been detected):
  */
-static noinline int
+static noinline void
 print_circular_bug_entry(struct lock_list *target, int depth)
 {
        if (debug_locks_silent)
-               return 0;
+               return;
        printk("\n-> #%u", depth);
        print_lock_name(target->class);
        printk(KERN_CONT ":\n");
        print_lock_trace(&target->trace, 6);
-       return 0;
 }
 
 static void
@@ -1492,7 +1546,7 @@ print_circular_lock_scenario(struct held_lock *src,
  * When a circular dependency is detected, print the
  * header first:
  */
-static noinline int
+static noinline void
 print_circular_bug_header(struct lock_list *entry, unsigned int depth,
                        struct held_lock *check_src,
                        struct held_lock *check_tgt)
@@ -1500,7 +1554,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
        struct task_struct *curr = current;
 
        if (debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("======================================================\n");
@@ -1518,8 +1572,6 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
        pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
 
        print_circular_bug_entry(entry, depth);
-
-       return 0;
 }
 
 static inline int class_equal(struct lock_list *entry, void *data)
@@ -1527,10 +1579,10 @@ static inline int class_equal(struct lock_list *entry, void *data)
        return entry->class == data;
 }
 
-static noinline int print_circular_bug(struct lock_list *this,
-                                      struct lock_list *target,
-                                      struct held_lock *check_src,
-                                      struct held_lock *check_tgt)
+static noinline void print_circular_bug(struct lock_list *this,
+                                       struct lock_list *target,
+                                       struct held_lock *check_src,
+                                       struct held_lock *check_tgt)
 {
        struct task_struct *curr = current;
        struct lock_list *parent;
@@ -1538,10 +1590,10 @@ static noinline int print_circular_bug(struct lock_list *this,
        int depth;
 
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
-               return 0;
+               return;
 
        if (!save_trace(&this->trace))
-               return 0;
+               return;
 
        depth = get_lock_depth(target);
 
@@ -1563,21 +1615,17 @@ static noinline int print_circular_bug(struct lock_list *this,
 
        printk("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
-static noinline int print_bfs_bug(int ret)
+static noinline void print_bfs_bug(int ret)
 {
        if (!debug_locks_off_graph_unlock())
-               return 0;
+               return;
 
        /*
         * Breadth-first-search failed, graph got corrupted?
         */
        WARN(1, "lockdep bfs error:%d\n", ret);
-
-       return 0;
 }
 
 static int noop_count(struct lock_list *entry, void *data)
@@ -1640,36 +1688,95 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
 }
 
 /*
- * Prove that the dependency graph starting at <entry> can not
- * lead to <target>. Print an error and return 0 if it does.
+ * Check that the dependency graph starting at <src> can lead to
+ * <target> or not. Print an error and return 0 if it does.
  */
 static noinline int
-check_noncircular(struct lock_list *root, struct lock_class *target,
-               struct lock_list **target_entry)
+check_path(struct lock_class *target, struct lock_list *src_entry,
+          struct lock_list **target_entry)
 {
-       int result;
+       int ret;
+
+       ret = __bfs_forwards(src_entry, (void *)target, class_equal,
+                            target_entry);
+
+       if (unlikely(ret < 0))
+               print_bfs_bug(ret);
+
+       return ret;
+}
+
+/*
+ * Prove that the dependency graph starting at <src> can not
+ * lead to <target>. If it can, there is a circle when adding
+ * <target> -> <src> dependency.
+ *
+ * Print an error and return 0 if it does.
+ */
+static noinline int
+check_noncircular(struct held_lock *src, struct held_lock *target,
+                 struct lock_trace *trace)
+{
+       int ret;
+       struct lock_list *uninitialized_var(target_entry);
+       struct lock_list src_entry = {
+               .class = hlock_class(src),
+               .parent = NULL,
+       };
 
        debug_atomic_inc(nr_cyclic_checks);
 
-       result = __bfs_forwards(root, target, class_equal, target_entry);
+       ret = check_path(hlock_class(target), &src_entry, &target_entry);
 
-       return result;
+       if (unlikely(!ret)) {
+               if (!trace->nr_entries) {
+                       /*
+                        * If save_trace fails here, the printing might
+                        * trigger a WARN but because of the !nr_entries it
+                        * should not do bad things.
+                        */
+                       save_trace(trace);
+               }
+
+               print_circular_bug(&src_entry, target_entry, src, target);
+       }
+
+       return ret;
 }
 
+#ifdef CONFIG_LOCKDEP_SMALL
+/*
+ * Check that the dependency graph starting at <src> can lead to
+ * <target> or not. If it can, <src> -> <target> dependency is already
+ * in the graph.
+ *
+ * Print an error and return 2 if it does or 1 if it does not.
+ */
 static noinline int
-check_redundant(struct lock_list *root, struct lock_class *target,
-               struct lock_list **target_entry)
+check_redundant(struct held_lock *src, struct held_lock *target)
 {
-       int result;
+       int ret;
+       struct lock_list *uninitialized_var(target_entry);
+       struct lock_list src_entry = {
+               .class = hlock_class(src),
+               .parent = NULL,
+       };
 
        debug_atomic_inc(nr_redundant_checks);
 
-       result = __bfs_forwards(root, target, class_equal, target_entry);
+       ret = check_path(hlock_class(target), &src_entry, &target_entry);
 
-       return result;
+       if (!ret) {
+               debug_atomic_inc(nr_redundant);
+               ret = 2;
+       } else if (ret < 0)
+               ret = 0;
+
+       return ret;
 }
+#endif
 
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+#ifdef CONFIG_TRACE_IRQFLAGS
 
 static inline int usage_accumulate(struct lock_list *entry, void *mask)
 {
@@ -1766,7 +1873,7 @@ static void print_lock_class_header(struct lock_class *class, int depth)
  */
 static void __used
 print_shortest_lock_dependencies(struct lock_list *leaf,
-                               struct lock_list *root)
+                                struct lock_list *root)
 {
        struct lock_list *entry = leaf;
        int depth;
@@ -1788,8 +1895,6 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
                entry = get_lock_parent(entry);
                depth--;
        } while (entry && (depth >= 0));
-
-       return;
 }
 
 static void
@@ -1848,7 +1953,7 @@ print_irq_lock_scenario(struct lock_list *safe_entry,
        printk("\n *** DEADLOCK ***\n\n");
 }
 
-static int
+static void
 print_bad_irq_dependency(struct task_struct *curr,
                         struct lock_list *prev_root,
                         struct lock_list *next_root,
@@ -1861,7 +1966,7 @@ print_bad_irq_dependency(struct task_struct *curr,
                         const char *irqclass)
 {
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("=====================================================\n");
@@ -1907,19 +2012,17 @@ print_bad_irq_dependency(struct task_struct *curr,
 
        pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
        if (!save_trace(&prev_root->trace))
-               return 0;
+               return;
        print_shortest_lock_dependencies(backwards_entry, prev_root);
 
        pr_warn("\nthe dependencies between the lock to be acquired");
        pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
        if (!save_trace(&next_root->trace))
-               return 0;
+               return;
        print_shortest_lock_dependencies(forwards_entry, next_root);
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 static const char *state_names[] = {
@@ -2066,8 +2169,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
        this.class = hlock_class(prev);
 
        ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
-       if (ret < 0)
-               return print_bfs_bug(ret);
+       if (ret < 0) {
+               print_bfs_bug(ret);
+               return 0;
+       }
 
        usage_mask &= LOCKF_USED_IN_IRQ_ALL;
        if (!usage_mask)
@@ -2083,8 +2188,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
        that.class = hlock_class(next);
 
        ret = find_usage_forwards(&that, forward_mask, &target_entry1);
-       if (ret < 0)
-               return print_bfs_bug(ret);
+       if (ret < 0) {
+               print_bfs_bug(ret);
+               return 0;
+       }
        if (ret == 1)
                return ret;
 
@@ -2096,8 +2203,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
        backward_mask = original_mask(target_entry1->class->usage_mask);
 
        ret = find_usage_backwards(&this, backward_mask, &target_entry);
-       if (ret < 0)
-               return print_bfs_bug(ret);
+       if (ret < 0) {
+               print_bfs_bug(ret);
+               return 0;
+       }
        if (DEBUG_LOCKS_WARN_ON(ret == 1))
                return 1;
 
@@ -2111,11 +2220,13 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
        if (DEBUG_LOCKS_WARN_ON(ret == -1))
                return 1;
 
-       return print_bad_irq_dependency(curr, &this, &that,
-                       target_entry, target_entry1,
-                       prev, next,
-                       backward_bit, forward_bit,
-                       state_name(backward_bit));
+       print_bad_irq_dependency(curr, &this, &that,
+                                target_entry, target_entry1,
+                                prev, next,
+                                backward_bit, forward_bit,
+                                state_name(backward_bit));
+
+       return 0;
 }
 
 static void inc_chains(void)
@@ -2143,11 +2254,10 @@ static inline void inc_chains(void)
        nr_process_chains++;
 }
 
-#endif
+#endif /* CONFIG_TRACE_IRQFLAGS */
 
 static void
-print_deadlock_scenario(struct held_lock *nxt,
-                            struct held_lock *prv)
+print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv)
 {
        struct lock_class *next = hlock_class(nxt);
        struct lock_class *prev = hlock_class(prv);
@@ -2165,12 +2275,12 @@ print_deadlock_scenario(struct held_lock *nxt,
        printk(" May be due to missing lock nesting notation\n\n");
 }
 
-static int
+static void
 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
                   struct held_lock *next)
 {
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("============================================\n");
@@ -2189,8 +2299,6 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 /*
@@ -2202,8 +2310,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
  * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
  */
 static int
-check_deadlock(struct task_struct *curr, struct held_lock *next,
-              struct lockdep_map *next_instance, int read)
+check_deadlock(struct task_struct *curr, struct held_lock *next)
 {
        struct held_lock *prev;
        struct held_lock *nest = NULL;
@@ -2222,7 +2329,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
                 * Allow read-after-read recursion of the same
                 * lock class (i.e. read_lock(lock)+read_lock(lock)):
                 */
-               if ((read == 2) && prev->read)
+               if ((next->read == 2) && prev->read)
                        return 2;
 
                /*
@@ -2232,14 +2339,15 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
                if (nest)
                        return 2;
 
-               return print_deadlock_bug(curr, prev, next);
+               print_deadlock_bug(curr, prev, next);
+               return 0;
        }
        return 1;
 }
 
 /*
  * There was a chain-cache miss, and we are about to add a new dependency
- * to a previous lock. We recursively validate the following rules:
+ * to a previous lock. We validate the following rules:
  *
  *  - would the adding of the <prev> -> <next> dependency create a
  *    circular dependency in the graph? [== circular deadlock]
@@ -2263,9 +2371,7 @@ static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
               struct held_lock *next, int distance, struct lock_trace *trace)
 {
-       struct lock_list *uninitialized_var(target_entry);
        struct lock_list *entry;
-       struct lock_list this;
        int ret;
 
        if (!hlock_class(prev)->key || !hlock_class(next)->key) {
@@ -2289,28 +2395,16 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
        /*
         * Prove that the new <prev> -> <next> dependency would not
         * create a circular dependency in the graph. (We do this by
-        * forward-recursing into the graph starting at <next>, and
-        * checking whether we can reach <prev>.)
+        * a breadth-first search into the graph starting at <next>,
+        * and check whether we can reach <prev>.)
         *
-        * We are using global variables to control the recursion, to
-        * keep the stackframe size of the recursive functions low:
+        * The search is limited by the size of the circular queue (i.e.,
+        * MAX_CIRCULAR_QUEUE_SIZE) which keeps track of a breadth of nodes
+        * in the graph whose neighbours are to be checked.
         */
-       this.class = hlock_class(next);
-       this.parent = NULL;
-       ret = check_noncircular(&this, hlock_class(prev), &target_entry);
-       if (unlikely(!ret)) {
-               if (!trace->nr_entries) {
-                       /*
-                        * If save_trace fails here, the printing might
-                        * trigger a WARN but because of the !nr_entries it
-                        * should not do bad things.
-                        */
-                       save_trace(trace);
-               }
-               return print_circular_bug(&this, target_entry, next, prev);
-       }
-       else if (unlikely(ret < 0))
-               return print_bfs_bug(ret);
+       ret = check_noncircular(next, prev, trace);
+       if (unlikely(ret <= 0))
+               return 0;
 
        if (!check_irq_usage(curr, prev, next))
                return 0;
@@ -2341,19 +2435,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
                }
        }
 
+#ifdef CONFIG_LOCKDEP_SMALL
        /*
         * Is the <prev> -> <next> link redundant?
         */
-       this.class = hlock_class(prev);
-       this.parent = NULL;
-       ret = check_redundant(&this, hlock_class(next), &target_entry);
-       if (!ret) {
-               debug_atomic_inc(nr_redundant);
-               return 2;
-       }
-       if (ret < 0)
-               return print_bfs_bug(ret);
-
+       ret = check_redundant(prev, next);
+       if (ret != 1)
+               return ret;
+#endif
 
        if (!trace->nr_entries && !save_trace(trace))
                return 0;
@@ -2505,12 +2594,13 @@ static void
 print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next)
 {
        struct held_lock *hlock;
-       u64 chain_key = 0;
+       u64 chain_key = INITIAL_CHAIN_KEY;
        int depth = curr->lockdep_depth;
-       int i;
+       int i = get_first_held_lock(curr, hlock_next);
 
-       printk("depth: %u\n", depth + 1);
-       for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
+       printk("depth: %u (irq_context %u)\n", depth - i + 1,
+               hlock_next->irq_context);
+       for (; i < depth; i++) {
                hlock = curr->held_locks + i;
                chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
 
@@ -2524,13 +2614,13 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
 static void print_chain_keys_chain(struct lock_chain *chain)
 {
        int i;
-       u64 chain_key = 0;
+       u64 chain_key = INITIAL_CHAIN_KEY;
        int class_id;
 
        printk("depth: %u\n", chain->depth);
        for (i = 0; i < chain->depth; i++) {
                class_id = chain_hlocks[chain->base + i];
-               chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+               chain_key = print_chain_key_iteration(class_id, chain_key);
 
                print_lock_name(lock_classes + class_id);
                printk("\n");
@@ -2581,7 +2671,7 @@ static int check_no_collision(struct task_struct *curr,
        }
 
        for (j = 0; j < chain->depth - 1; j++, i++) {
-               id = curr->held_locks[i].class_idx - 1;
+               id = curr->held_locks[i].class_idx;
 
                if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
                        print_collision(curr, hlock, chain);
@@ -2664,7 +2754,7 @@ static inline int add_chain_cache(struct task_struct *curr,
        if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
                chain->base = nr_chain_hlocks;
                for (j = 0; j < chain->depth - 1; j++, i++) {
-                       int lock_id = curr->held_locks[i].class_idx - 1;
+                       int lock_id = curr->held_locks[i].class_idx;
                        chain_hlocks[chain->base + j] = lock_id;
                }
                chain_hlocks[chain->base + j] = class - lock_classes;
@@ -2754,8 +2844,9 @@ cache_hit:
        return 1;
 }
 
-static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-               struct held_lock *hlock, int chain_head, u64 chain_key)
+static int validate_chain(struct task_struct *curr,
+                         struct held_lock *hlock,
+                         int chain_head, u64 chain_key)
 {
        /*
         * Trylock needs to maintain the stack of held locks, but it
@@ -2776,12 +2867,18 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
                 * - is softirq-safe, if this lock is hardirq-unsafe
                 *
                 * And check whether the new lock's dependency graph
-                * could lead back to the previous lock.
+                * could lead back to the previous lock:
                 *
-                * any of these scenarios could lead to a deadlock. If
-                * All validations
+                * - within the current held-lock stack
+                * - across our accumulated lock dependency records
+                *
+                * any of these scenarios could lead to a deadlock.
                 */
-               int ret = check_deadlock(curr, hlock, lock, hlock->read);
+               /*
+                * The simple case: does the current hold the same lock
+                * already?
+                */
+               int ret = check_deadlock(curr, hlock);
 
                if (!ret)
                        return 0;
@@ -2812,16 +2909,12 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
 }
 #else
 static inline int validate_chain(struct task_struct *curr,
-               struct lockdep_map *lock, struct held_lock *hlock,
-               int chain_head, u64 chain_key)
+                                struct held_lock *hlock,
+                                int chain_head, u64 chain_key)
 {
        return 1;
 }
-
-static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
-{
-}
-#endif
+#endif /* CONFIG_PROVE_LOCKING */
 
 /*
  * We are building curr_chain_key incrementally, so double-check
@@ -2832,7 +2925,7 @@ static void check_chain_key(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCKDEP
        struct held_lock *hlock, *prev_hlock = NULL;
        unsigned int i;
-       u64 chain_key = 0;
+       u64 chain_key = INITIAL_CHAIN_KEY;
 
        for (i = 0; i < curr->lockdep_depth; i++) {
                hlock = curr->held_locks + i;
@@ -2848,15 +2941,17 @@ static void check_chain_key(struct task_struct *curr)
                                (unsigned long long)hlock->prev_chain_key);
                        return;
                }
+
                /*
-                * Whoops ran out of static storage again?
+                * hlock->class_idx can't go beyond MAX_LOCKDEP_KEYS, but is
+                * it registered lock class index?
                 */
-               if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
+               if (DEBUG_LOCKS_WARN_ON(!test_bit(hlock->class_idx, lock_classes_in_use)))
                        return;
 
                if (prev_hlock && (prev_hlock->irq_context !=
                                                        hlock->irq_context))
-                       chain_key = 0;
+                       chain_key = INITIAL_CHAIN_KEY;
                chain_key = iterate_chain_key(chain_key, hlock->class_idx);
                prev_hlock = hlock;
        }
@@ -2874,14 +2969,11 @@ static void check_chain_key(struct task_struct *curr)
 #endif
 }
 
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
 static int mark_lock(struct task_struct *curr, struct held_lock *this,
                     enum lock_usage_bit new_bit);
 
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
-
-
-static void
-print_usage_bug_scenario(struct held_lock *lock)
+static void print_usage_bug_scenario(struct held_lock *lock)
 {
        struct lock_class *class = hlock_class(lock);
 
@@ -2898,12 +2990,12 @@ print_usage_bug_scenario(struct held_lock *lock)
        printk("\n *** DEADLOCK ***\n\n");
 }
 
-static int
+static void
 print_usage_bug(struct task_struct *curr, struct held_lock *this,
                enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
 {
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("================================\n");
@@ -2933,8 +3025,6 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 /*
@@ -2944,8 +3034,10 @@ static inline int
 valid_state(struct task_struct *curr, struct held_lock *this,
            enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
 {
-       if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit)))
-               return print_usage_bug(curr, this, bad_bit, new_bit);
+       if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) {
+               print_usage_bug(curr, this, bad_bit, new_bit);
+               return 0;
+       }
        return 1;
 }
 
@@ -2953,7 +3045,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
 /*
  * print irq inversion bug:
  */
-static int
+static void
 print_irq_inversion_bug(struct task_struct *curr,
                        struct lock_list *root, struct lock_list *other,
                        struct held_lock *this, int forwards,
@@ -2964,7 +3056,7 @@ print_irq_inversion_bug(struct task_struct *curr,
        int depth;
 
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("========================================================\n");
@@ -3005,13 +3097,11 @@ print_irq_inversion_bug(struct task_struct *curr,
 
        pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
        if (!save_trace(&root->trace))
-               return 0;
+               return;
        print_shortest_lock_dependencies(other, root);
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 /*
@@ -3029,13 +3119,16 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
        root.parent = NULL;
        root.class = hlock_class(this);
        ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
-       if (ret < 0)
-               return print_bfs_bug(ret);
+       if (ret < 0) {
+               print_bfs_bug(ret);
+               return 0;
+       }
        if (ret == 1)
                return ret;
 
-       return print_irq_inversion_bug(curr, &root, target_entry,
-                                       this, 1, irqclass);
+       print_irq_inversion_bug(curr, &root, target_entry,
+                               this, 1, irqclass);
+       return 0;
 }
 
 /*
@@ -3053,13 +3146,16 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
        root.parent = NULL;
        root.class = hlock_class(this);
        ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
-       if (ret < 0)
-               return print_bfs_bug(ret);
+       if (ret < 0) {
+               print_bfs_bug(ret);
+               return 0;
+       }
        if (ret == 1)
                return ret;
 
-       return print_irq_inversion_bug(curr, &root, target_entry,
-                                       this, 0, irqclass);
+       print_irq_inversion_bug(curr, &root, target_entry,
+                               this, 0, irqclass);
+       return 0;
 }
 
 void print_irqtrace_events(struct task_struct *curr)
@@ -3142,7 +3238,7 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this,
         * Validate that the lock dependencies don't have conflicting usage
         * states.
         */
-       if ((!read || !dir || STRICT_READ_CHECKS) &&
+       if ((!read || STRICT_READ_CHECKS) &&
                        !usage(curr, this, excl_bit, state_name(new_bit & ~LOCK_USAGE_READ_MASK)))
                return 0;
 
@@ -3367,8 +3463,12 @@ void trace_softirqs_off(unsigned long ip)
                debug_atomic_inc(redundant_softirqs_off);
 }
 
-static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
+static int
+mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
 {
+       if (!check)
+               goto lock_used;
+
        /*
         * If non-trylock use in a hardirq or softirq context, then
         * mark the lock as used in these contexts:
@@ -3412,6 +3512,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
                }
        }
 
+lock_used:
+       /* mark it as used: */
+       if (!mark_lock(curr, hlock, LOCK_USED))
+               return 0;
+
        return 1;
 }
 
@@ -3443,35 +3548,6 @@ static int separate_irq_context(struct task_struct *curr,
        return 0;
 }
 
-#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
-
-static inline
-int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
-               enum lock_usage_bit new_bit)
-{
-       WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */
-       return 1;
-}
-
-static inline int mark_irqflags(struct task_struct *curr,
-               struct held_lock *hlock)
-{
-       return 1;
-}
-
-static inline unsigned int task_irq_context(struct task_struct *task)
-{
-       return 0;
-}
-
-static inline int separate_irq_context(struct task_struct *curr,
-               struct held_lock *hlock)
-{
-       return 0;
-}
-
-#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
-
 /*
  * Mark a lock with a usage bit, and validate the state transition:
  */
@@ -3480,6 +3556,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 {
        unsigned int new_mask = 1 << new_bit, ret = 1;
 
+       if (new_bit >= LOCK_USAGE_STATES) {
+               DEBUG_LOCKS_WARN_ON(1);
+               return 0;
+       }
+
        /*
         * If already set then do not dirty the cacheline,
         * nor do any checks:
@@ -3503,25 +3584,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
                return 0;
 
        switch (new_bit) {
-#define LOCKDEP_STATE(__STATE)                 \
-       case LOCK_USED_IN_##__STATE:            \
-       case LOCK_USED_IN_##__STATE##_READ:     \
-       case LOCK_ENABLED_##__STATE:            \
-       case LOCK_ENABLED_##__STATE##_READ:
-#include "lockdep_states.h"
-#undef LOCKDEP_STATE
-               ret = mark_lock_irq(curr, this, new_bit);
-               if (!ret)
-                       return 0;
-               break;
        case LOCK_USED:
                debug_atomic_dec(nr_unused_locks);
                break;
        default:
-               if (!debug_locks_off_graph_unlock())
+               ret = mark_lock_irq(curr, this, new_bit);
+               if (!ret)
                        return 0;
-               WARN_ON(1);
-               return 0;
        }
 
        graph_unlock();
@@ -3539,6 +3608,27 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
        return ret;
 }
 
+#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
+
+static inline int
+mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
+{
+       return 1;
+}
+
+static inline unsigned int task_irq_context(struct task_struct *task)
+{
+       return 0;
+}
+
+static inline int separate_irq_context(struct task_struct *curr,
+               struct held_lock *hlock)
+{
+       return 0;
+}
+
+#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
+
 /*
  * Initialize a lock instance's lock-class mapping info:
  */
@@ -3602,15 +3692,15 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
 struct lock_class_key __lockdep_no_validate__;
 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
-static int
+static void
 print_lock_nested_lock_not_held(struct task_struct *curr,
                                struct held_lock *hlock,
                                unsigned long ip)
 {
        if (!debug_locks_off())
-               return 0;
+               return;
        if (debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("==================================\n");
@@ -3632,8 +3722,6 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 static int __lock_is_held(const struct lockdep_map *lock, int read);
@@ -3698,24 +3786,24 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
                return 0;
 
-       class_idx = class - lock_classes + 1;
+       class_idx = class - lock_classes;
 
        if (depth) {
                hlock = curr->held_locks + depth - 1;
                if (hlock->class_idx == class_idx && nest_lock) {
-                       if (hlock->references) {
-                               /*
-                                * Check: unsigned int references:12, overflow.
-                                */
-                               if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
-                                       return 0;
+                       if (!references)
+                               references++;
 
+                       if (!hlock->references)
                                hlock->references++;
-                       } else {
-                               hlock->references = 2;
-                       }
 
-                       return 1;
+                       hlock->references += references;
+
+                       /* Overflow */
+                       if (DEBUG_LOCKS_WARN_ON(hlock->references < references))
+                               return 0;
+
+                       return 2;
                }
        }
 
@@ -3742,11 +3830,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 #endif
        hlock->pin_count = pin_count;
 
-       if (check && !mark_irqflags(curr, hlock))
-               return 0;
-
-       /* mark it as used: */
-       if (!mark_lock(curr, hlock, LOCK_USED))
+       /* Initialize the lock usage bit */
+       if (!mark_usage(curr, hlock, check))
                return 0;
 
        /*
@@ -3760,9 +3845,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         * the hash, not class->key.
         */
        /*
-        * Whoops, we did it again.. ran straight out of our static allocation.
+        * Whoops, we did it again.. class_idx is invalid.
         */
-       if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
+       if (DEBUG_LOCKS_WARN_ON(!test_bit(class_idx, lock_classes_in_use)))
                return 0;
 
        chain_key = curr->curr_chain_key;
@@ -3770,27 +3855,29 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                /*
                 * How can we have a chain hash when we ain't got no keys?!
                 */
-               if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
+               if (DEBUG_LOCKS_WARN_ON(chain_key != INITIAL_CHAIN_KEY))
                        return 0;
                chain_head = 1;
        }
 
        hlock->prev_chain_key = chain_key;
        if (separate_irq_context(curr, hlock)) {
-               chain_key = 0;
+               chain_key = INITIAL_CHAIN_KEY;
                chain_head = 1;
        }
        chain_key = iterate_chain_key(chain_key, class_idx);
 
-       if (nest_lock && !__lock_is_held(nest_lock, -1))
-               return print_lock_nested_lock_not_held(curr, hlock, ip);
+       if (nest_lock && !__lock_is_held(nest_lock, -1)) {
+               print_lock_nested_lock_not_held(curr, hlock, ip);
+               return 0;
+       }
 
        if (!debug_locks_silent) {
                WARN_ON_ONCE(depth && !hlock_class(hlock - 1)->key);
                WARN_ON_ONCE(!hlock_class(hlock)->key);
        }
 
-       if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
+       if (!validate_chain(curr, hlock, chain_head, chain_key))
                return 0;
 
        curr->curr_chain_key = chain_key;
@@ -3819,14 +3906,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        return 1;
 }
 
-static int
-print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
-                          unsigned long ip)
+static void print_unlock_imbalance_bug(struct task_struct *curr,
+                                      struct lockdep_map *lock,
+                                      unsigned long ip)
 {
        if (!debug_locks_off())
-               return 0;
+               return;
        if (debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("=====================================\n");
@@ -3844,8 +3931,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 static int match_held_lock(const struct held_lock *hlock,
@@ -3877,7 +3962,7 @@ static int match_held_lock(const struct held_lock *hlock,
                if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
                        return 0;
 
-               if (hlock->class_idx == class - lock_classes + 1)
+               if (hlock->class_idx == class - lock_classes)
                        return 1;
        }
 
@@ -3921,22 +4006,33 @@ out:
 }
 
 static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
-                             int idx)
+                               int idx, unsigned int *merged)
 {
        struct held_lock *hlock;
+       int first_idx = idx;
 
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return 0;
 
        for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
-               if (!__lock_acquire(hlock->instance,
+               switch (__lock_acquire(hlock->instance,
                                    hlock_class(hlock)->subclass,
                                    hlock->trylock,
                                    hlock->read, hlock->check,
                                    hlock->hardirqs_off,
                                    hlock->nest_lock, hlock->acquire_ip,
-                                   hlock->references, hlock->pin_count))
+                                   hlock->references, hlock->pin_count)) {
+               case 0:
                        return 1;
+               case 1:
+                       break;
+               case 2:
+                       *merged += (idx == first_idx);
+                       break;
+               default:
+                       WARN_ON(1);
+                       return 0;
+               }
        }
        return 0;
 }
@@ -3947,9 +4043,9 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
                 unsigned long ip)
 {
        struct task_struct *curr = current;
+       unsigned int depth, merged = 0;
        struct held_lock *hlock;
        struct lock_class *class;
-       unsigned int depth;
        int i;
 
        if (unlikely(!debug_locks))
@@ -3964,24 +4060,26 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
                return 0;
 
        hlock = find_held_lock(curr, lock, depth, &i);
-       if (!hlock)
-               return print_unlock_imbalance_bug(curr, lock, ip);
+       if (!hlock) {
+               print_unlock_imbalance_bug(curr, lock, ip);
+               return 0;
+       }
 
        lockdep_init_map(lock, name, key, 0);
        class = register_lock_class(lock, subclass, 0);
-       hlock->class_idx = class - lock_classes + 1;
+       hlock->class_idx = class - lock_classes;
 
        curr->lockdep_depth = i;
        curr->curr_chain_key = hlock->prev_chain_key;
 
-       if (reacquire_held_locks(curr, depth, i))
+       if (reacquire_held_locks(curr, depth, i, &merged))
                return 0;
 
        /*
         * I took it apart and put it back together again, except now I have
         * these 'spare' parts.. where shall I put them.
         */
-       if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
+       if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - merged))
                return 0;
        return 1;
 }
@@ -3989,8 +4087,8 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
 static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
 {
        struct task_struct *curr = current;
+       unsigned int depth, merged = 0;
        struct held_lock *hlock;
-       unsigned int depth;
        int i;
 
        if (unlikely(!debug_locks))
@@ -4005,8 +4103,10 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
                return 0;
 
        hlock = find_held_lock(curr, lock, depth, &i);
-       if (!hlock)
-               return print_unlock_imbalance_bug(curr, lock, ip);
+       if (!hlock) {
+               print_unlock_imbalance_bug(curr, lock, ip);
+               return 0;
+       }
 
        curr->lockdep_depth = i;
        curr->curr_chain_key = hlock->prev_chain_key;
@@ -4015,7 +4115,11 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
        hlock->read = 1;
        hlock->acquire_ip = ip;
 
-       if (reacquire_held_locks(curr, depth, i))
+       if (reacquire_held_locks(curr, depth, i, &merged))
+               return 0;
+
+       /* Merging can't happen with unchanged classes.. */
+       if (DEBUG_LOCKS_WARN_ON(merged))
                return 0;
 
        /*
@@ -4024,6 +4128,7 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
         */
        if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
                return 0;
+
        return 1;
 }
 
@@ -4035,11 +4140,11 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
  * @nested is an hysterical artifact, needs a tree wide cleanup.
  */
 static int
-__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+__lock_release(struct lockdep_map *lock, unsigned long ip)
 {
        struct task_struct *curr = current;
+       unsigned int depth, merged = 1;
        struct held_lock *hlock;
-       unsigned int depth;
        int i;
 
        if (unlikely(!debug_locks))
@@ -4050,16 +4155,20 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
         * So we're all set to release this lock.. wait what lock? We don't
         * own any locks, you've been drinking again?
         */
-       if (DEBUG_LOCKS_WARN_ON(depth <= 0))
-                return print_unlock_imbalance_bug(curr, lock, ip);
+       if (depth <= 0) {
+               print_unlock_imbalance_bug(curr, lock, ip);
+               return 0;
+       }
 
        /*
         * Check whether the lock exists in the current stack
         * of held locks:
         */
        hlock = find_held_lock(curr, lock, depth, &i);
-       if (!hlock)
-               return print_unlock_imbalance_bug(curr, lock, ip);
+       if (!hlock) {
+               print_unlock_imbalance_bug(curr, lock, ip);
+               return 0;
+       }
 
        if (hlock->instance == lock)
                lock_release_holdtime(hlock);
@@ -4094,14 +4203,15 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
        if (i == depth-1)
                return 1;
 
-       if (reacquire_held_locks(curr, depth, i + 1))
+       if (reacquire_held_locks(curr, depth, i + 1, &merged))
                return 0;
 
        /*
         * We had N bottles of beer on the wall, we drank one, but now
         * there's not N-1 bottles of beer left on the wall...
+        * Pouring two of the bottles together is acceptable.
         */
-       DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1);
+       DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - merged);
 
        /*
         * Since reacquire_held_locks() would have called check_chain_key()
@@ -4319,7 +4429,7 @@ void lock_release(struct lockdep_map *lock, int nested,
        check_flags(flags);
        current->lockdep_recursion = 1;
        trace_lock_release(lock, ip);
-       if (__lock_release(lock, nested, ip))
+       if (__lock_release(lock, ip))
                check_chain_key(current);
        current->lockdep_recursion = 0;
        raw_local_irq_restore(flags);
@@ -4402,14 +4512,14 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
 EXPORT_SYMBOL_GPL(lock_unpin_lock);
 
 #ifdef CONFIG_LOCK_STAT
-static int
-print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
-                          unsigned long ip)
+static void print_lock_contention_bug(struct task_struct *curr,
+                                     struct lockdep_map *lock,
+                                     unsigned long ip)
 {
        if (!debug_locks_off())
-               return 0;
+               return;
        if (debug_locks_silent)
-               return 0;
+               return;
 
        pr_warn("\n");
        pr_warn("=================================\n");
@@ -4427,8 +4537,6 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 
        pr_warn("\nstack backtrace:\n");
        dump_stack();
-
-       return 0;
 }
 
 static void
@@ -4573,9 +4681,7 @@ void lockdep_reset(void)
        int i;
 
        raw_local_irq_save(flags);
-       current->curr_chain_key = 0;
-       current->lockdep_depth = 0;
-       current->lockdep_recursion = 0;
+       lockdep_init_task(current);
        memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
        nr_hardirq_chains = 0;
        nr_softirq_chains = 0;
@@ -4615,9 +4721,9 @@ static void remove_class_from_lock_chain(struct pending_free *pf,
        return;
 
 recalc:
-       chain_key = 0;
+       chain_key = INITIAL_CHAIN_KEY;
        for (i = chain->base; i < chain->base + chain->depth; i++)
-               chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1);
+               chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
        if (chain->depth && chain->chain_key == chain_key)
                return;
        /* Overwrite the chain key for concurrent RCU readers. */
@@ -4691,6 +4797,7 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
                WRITE_ONCE(class->key, NULL);
                WRITE_ONCE(class->name, NULL);
                nr_lock_classes--;
+               __clear_bit(class - lock_classes, lock_classes_in_use);
        } else {
                WARN_ONCE(true, "%s() failed for class %s\n", __func__,
                          class->name);
@@ -5036,6 +5143,7 @@ void __init lockdep_init(void)
 
        printk(" memory used by lock dependency info: %zu kB\n",
               (sizeof(lock_classes) +
+               sizeof(lock_classes_in_use) +
                sizeof(classhash_table) +
                sizeof(list_entries) +
                sizeof(list_entries_in_use) +
index 150ec3f..cc83568 100644 (file)
@@ -131,7 +131,6 @@ extern unsigned int nr_hardirq_chains;
 extern unsigned int nr_softirq_chains;
 extern unsigned int nr_process_chains;
 extern unsigned int max_lockdep_depth;
-extern unsigned int max_recursion_depth;
 
 extern unsigned int max_bfs_queue_depth;
 
@@ -160,25 +159,22 @@ lockdep_count_backward_deps(struct lock_class *class)
  * and we want to avoid too much cache bouncing.
  */
 struct lockdep_stats {
-       int     chain_lookup_hits;
-       int     chain_lookup_misses;
-       int     hardirqs_on_events;
-       int     hardirqs_off_events;
-       int     redundant_hardirqs_on;
-       int     redundant_hardirqs_off;
-       int     softirqs_on_events;
-       int     softirqs_off_events;
-       int     redundant_softirqs_on;
-       int     redundant_softirqs_off;
-       int     nr_unused_locks;
-       int     nr_redundant_checks;
-       int     nr_redundant;
-       int     nr_cyclic_checks;
-       int     nr_cyclic_check_recursions;
-       int     nr_find_usage_forwards_checks;
-       int     nr_find_usage_forwards_recursions;
-       int     nr_find_usage_backwards_checks;
-       int     nr_find_usage_backwards_recursions;
+       unsigned long  chain_lookup_hits;
+       unsigned int   chain_lookup_misses;
+       unsigned long  hardirqs_on_events;
+       unsigned long  hardirqs_off_events;
+       unsigned long  redundant_hardirqs_on;
+       unsigned long  redundant_hardirqs_off;
+       unsigned long  softirqs_on_events;
+       unsigned long  softirqs_off_events;
+       unsigned long  redundant_softirqs_on;
+       unsigned long  redundant_softirqs_off;
+       int            nr_unused_locks;
+       unsigned int   nr_redundant_checks;
+       unsigned int   nr_redundant;
+       unsigned int   nr_cyclic_checks;
+       unsigned int   nr_find_usage_forwards_checks;
+       unsigned int   nr_find_usage_backwards_checks;
 
        /*
         * Per lock class locking operation stat counts
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
deleted file mode 100644 (file)
index 0b1f779..0000000
+++ /dev/null
@@ -1,745 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* rwsem.c: R/W semaphores: contention handling functions
- *
- * Written by David Howells (dhowells@redhat.com).
- * Derived from arch/i386/kernel/semaphore.c
- *
- * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
- * and Michel Lespinasse <walken@google.com>
- *
- * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
- * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
- */
-#include <linux/rwsem.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/rt.h>
-#include <linux/sched/wake_q.h>
-#include <linux/sched/debug.h>
-#include <linux/osq_lock.h>
-
-#include "rwsem.h"
-
-/*
- * Guide to the rw_semaphore's count field for common values.
- * (32-bit case illustrated, similar for 64-bit)
- *
- * 0x0000000X  (1) X readers active or attempting lock, no writer waiting
- *                 X = #active_readers + #readers attempting to lock
- *                 (X*ACTIVE_BIAS)
- *
- * 0x00000000  rwsem is unlocked, and no one is waiting for the lock or
- *             attempting to read lock or write lock.
- *
- * 0xffff000X  (1) X readers active or attempting lock, with waiters for lock
- *                 X = #active readers + # readers attempting lock
- *                 (X*ACTIVE_BIAS + WAITING_BIAS)
- *             (2) 1 writer attempting lock, no waiters for lock
- *                 X-1 = #active readers + #readers attempting lock
- *                 ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
- *             (3) 1 writer active, no waiters for lock
- *                 X-1 = #active readers + #readers attempting lock
- *                 ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
- *
- * 0xffff0001  (1) 1 reader active or attempting lock, waiters for lock
- *                 (WAITING_BIAS + ACTIVE_BIAS)
- *             (2) 1 writer active or attempting lock, no waiters for lock
- *                 (ACTIVE_WRITE_BIAS)
- *
- * 0xffff0000  (1) There are writers or readers queued but none active
- *                 or in the process of attempting lock.
- *                 (WAITING_BIAS)
- *             Note: writer can attempt to steal lock for this count by adding
- *             ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
- *
- * 0xfffe0001  (1) 1 writer active, or attempting lock. Waiters on queue.
- *                 (ACTIVE_WRITE_BIAS + WAITING_BIAS)
- *
- * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
- *      the count becomes more than 0 for successful lock acquisition,
- *      i.e. the case where there are only readers or nobody has lock.
- *      (1st and 2nd case above).
- *
- *      Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
- *      checking the count becomes ACTIVE_WRITE_BIAS for successful lock
- *      acquisition (i.e. nobody else has lock or attempts lock).  If
- *      unsuccessful, in rwsem_down_write_failed, we'll check to see if there
- *      are only waiters but none active (5th case above), and attempt to
- *      steal the lock.
- *
- */
-
-/*
- * Initialize an rwsem:
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
-                 struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       /*
-        * Make sure we are not reinitializing a held semaphore:
-        */
-       debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-       lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
-       atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
-       raw_spin_lock_init(&sem->wait_lock);
-       INIT_LIST_HEAD(&sem->wait_list);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-       sem->owner = NULL;
-       osq_lock_init(&sem->osq);
-#endif
-}
-
-EXPORT_SYMBOL(__init_rwsem);
-
-enum rwsem_waiter_type {
-       RWSEM_WAITING_FOR_WRITE,
-       RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
-       struct list_head list;
-       struct task_struct *task;
-       enum rwsem_waiter_type type;
-};
-
-enum rwsem_wake_type {
-       RWSEM_WAKE_ANY,         /* Wake whatever's at head of wait list */
-       RWSEM_WAKE_READERS,     /* Wake readers only */
-       RWSEM_WAKE_READ_OWNED   /* Waker thread holds the read lock */
-};
-
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here from up_xxxx(), then:
- *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
- *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- * - there must be someone on the queue
- * - the wait_lock must be held by the caller
- * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
- *   to actually wakeup the blocked task(s) and drop the reference count,
- *   preferably when the wait_lock is released
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only marked woken if downgrading is false
- */
-static void __rwsem_mark_wake(struct rw_semaphore *sem,
-                             enum rwsem_wake_type wake_type,
-                             struct wake_q_head *wake_q)
-{
-       struct rwsem_waiter *waiter, *tmp;
-       long oldcount, woken = 0, adjustment = 0;
-       struct list_head wlist;
-
-       /*
-        * Take a peek at the queue head waiter such that we can determine
-        * the wakeup(s) to perform.
-        */
-       waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
-
-       if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-               if (wake_type == RWSEM_WAKE_ANY) {
-                       /*
-                        * Mark writer at the front of the queue for wakeup.
-                        * Until the task is actually later awoken later by
-                        * the caller, other writers are able to steal it.
-                        * Readers, on the other hand, will block as they
-                        * will notice the queued writer.
-                        */
-                       wake_q_add(wake_q, waiter->task);
-                       lockevent_inc(rwsem_wake_writer);
-               }
-
-               return;
-       }
-
-       /*
-        * Writers might steal the lock before we grant it to the next reader.
-        * We prefer to do the first reader grant before counting readers
-        * so we can bail out early if a writer stole the lock.
-        */
-       if (wake_type != RWSEM_WAKE_READ_OWNED) {
-               adjustment = RWSEM_ACTIVE_READ_BIAS;
- try_reader_grant:
-               oldcount = atomic_long_fetch_add(adjustment, &sem->count);
-               if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
-                       /*
-                        * If the count is still less than RWSEM_WAITING_BIAS
-                        * after removing the adjustment, it is assumed that
-                        * a writer has stolen the lock. We have to undo our
-                        * reader grant.
-                        */
-                       if (atomic_long_add_return(-adjustment, &sem->count) <
-                           RWSEM_WAITING_BIAS)
-                               return;
-
-                       /* Last active locker left. Retry waking readers. */
-                       goto try_reader_grant;
-               }
-               /*
-                * Set it to reader-owned to give spinners an early
-                * indication that readers now have the lock.
-                */
-               __rwsem_set_reader_owned(sem, waiter->task);
-       }
-
-       /*
-        * Grant an infinite number of read locks to the readers at the front
-        * of the queue. We know that woken will be at least 1 as we accounted
-        * for above. Note we increment the 'active part' of the count by the
-        * number of readers before waking any processes up.
-        *
-        * We have to do wakeup in 2 passes to prevent the possibility that
-        * the reader count may be decremented before it is incremented. It
-        * is because the to-be-woken waiter may not have slept yet. So it
-        * may see waiter->task got cleared, finish its critical section and
-        * do an unlock before the reader count increment.
-        *
-        * 1) Collect the read-waiters in a separate list, count them and
-        *    fully increment the reader count in rwsem.
-        * 2) For each waiters in the new list, clear waiter->task and
-        *    put them into wake_q to be woken up later.
-        */
-       list_for_each_entry(waiter, &sem->wait_list, list) {
-               if (waiter->type == RWSEM_WAITING_FOR_WRITE)
-                       break;
-
-               woken++;
-       }
-       list_cut_before(&wlist, &sem->wait_list, &waiter->list);
-
-       adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-       lockevent_cond_inc(rwsem_wake_reader, woken);
-       if (list_empty(&sem->wait_list)) {
-               /* hit end of list above */
-               adjustment -= RWSEM_WAITING_BIAS;
-       }
-
-       if (adjustment)
-               atomic_long_add(adjustment, &sem->count);
-
-       /* 2nd pass */
-       list_for_each_entry_safe(waiter, tmp, &wlist, list) {
-               struct task_struct *tsk;
-
-               tsk = waiter->task;
-               get_task_struct(tsk);
-
-               /*
-                * Ensure calling get_task_struct() before setting the reader
-                * waiter to nil such that rwsem_down_read_failed() cannot
-                * race with do_exit() by always holding a reference count
-                * to the task to wakeup.
-                */
-               smp_store_release(&waiter->task, NULL);
-               /*
-                * Ensure issuing the wakeup (either by us or someone else)
-                * after setting the reader waiter to nil.
-                */
-               wake_q_add_safe(wake_q, tsk);
-       }
-}
-
-/*
- * This function must be called with the sem->wait_lock held to prevent
- * race conditions between checking the rwsem wait list and setting the
- * sem->count accordingly.
- */
-static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
-{
-       /*
-        * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
-        */
-       if (count != RWSEM_WAITING_BIAS)
-               return false;
-
-       /*
-        * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
-        * are other tasks on the wait list, we need to add on WAITING_BIAS.
-        */
-       count = list_is_singular(&sem->wait_list) ?
-                       RWSEM_ACTIVE_WRITE_BIAS :
-                       RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
-
-       if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
-                                                       == RWSEM_WAITING_BIAS) {
-               rwsem_set_owner(sem);
-               return true;
-       }
-
-       return false;
-}
-
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-/*
- * Try to acquire write lock before the writer has been put on wait queue.
- */
-static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
-{
-       long count = atomic_long_read(&sem->count);
-
-       while (!count || count == RWSEM_WAITING_BIAS) {
-               if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
-                                       count + RWSEM_ACTIVE_WRITE_BIAS)) {
-                       rwsem_set_owner(sem);
-                       lockevent_inc(rwsem_opt_wlock);
-                       return true;
-               }
-       }
-       return false;
-}
-
-static inline bool owner_on_cpu(struct task_struct *owner)
-{
-       /*
-        * As lock holder preemption issue, we both skip spinning if
-        * task is not on cpu or its cpu is preempted
-        */
-       return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
-}
-
-static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
-{
-       struct task_struct *owner;
-       bool ret = true;
-
-       BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
-
-       if (need_resched())
-               return false;
-
-       rcu_read_lock();
-       owner = READ_ONCE(sem->owner);
-       if (owner) {
-               ret = is_rwsem_owner_spinnable(owner) &&
-                     owner_on_cpu(owner);
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
-/*
- * Return true only if we can still spin on the owner field of the rwsem.
- */
-static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
-{
-       struct task_struct *owner = READ_ONCE(sem->owner);
-
-       if (!is_rwsem_owner_spinnable(owner))
-               return false;
-
-       rcu_read_lock();
-       while (owner && (READ_ONCE(sem->owner) == owner)) {
-               /*
-                * Ensure we emit the owner->on_cpu, dereference _after_
-                * checking sem->owner still matches owner, if that fails,
-                * owner might point to free()d memory, if it still matches,
-                * the rcu_read_lock() ensures the memory stays valid.
-                */
-               barrier();
-
-               /*
-                * abort spinning when need_resched or owner is not running or
-                * owner's cpu is preempted.
-                */
-               if (need_resched() || !owner_on_cpu(owner)) {
-                       rcu_read_unlock();
-                       return false;
-               }
-
-               cpu_relax();
-       }
-       rcu_read_unlock();
-
-       /*
-        * If there is a new owner or the owner is not set, we continue
-        * spinning.
-        */
-       return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
-}
-
-static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
-{
-       bool taken = false;
-
-       preempt_disable();
-
-       /* sem->wait_lock should not be held when doing optimistic spinning */
-       if (!rwsem_can_spin_on_owner(sem))
-               goto done;
-
-       if (!osq_lock(&sem->osq))
-               goto done;
-
-       /*
-        * Optimistically spin on the owner field and attempt to acquire the
-        * lock whenever the owner changes. Spinning will be stopped when:
-        *  1) the owning writer isn't running; or
-        *  2) readers own the lock as we can't determine if they are
-        *     actively running or not.
-        */
-       while (rwsem_spin_on_owner(sem)) {
-               /*
-                * Try to acquire the lock
-                */
-               if (rwsem_try_write_lock_unqueued(sem)) {
-                       taken = true;
-                       break;
-               }
-
-               /*
-                * When there's no owner, we might have preempted between the
-                * owner acquiring the lock and setting the owner field. If
-                * we're an RT task that will live-lock because we won't let
-                * the owner complete.
-                */
-               if (!sem->owner && (need_resched() || rt_task(current)))
-                       break;
-
-               /*
-                * The cpu_relax() call is a compiler barrier which forces
-                * everything in this loop to be re-loaded. We don't need
-                * memory barriers as we'll eventually observe the right
-                * values at the cost of a few extra spins.
-                */
-               cpu_relax();
-       }
-       osq_unlock(&sem->osq);
-done:
-       preempt_enable();
-       lockevent_cond_inc(rwsem_opt_fail, !taken);
-       return taken;
-}
-
-/*
- * Return true if the rwsem has active spinner
- */
-static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
-{
-       return osq_is_locked(&sem->osq);
-}
-
-#else
-static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
-{
-       return false;
-}
-
-static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
-{
-       return false;
-}
-#endif
-
-/*
- * Wait for the read lock to be granted
- */
-static inline struct rw_semaphore __sched *
-__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
-{
-       long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
-       struct rwsem_waiter waiter;
-       DEFINE_WAKE_Q(wake_q);
-
-       waiter.task = current;
-       waiter.type = RWSEM_WAITING_FOR_READ;
-
-       raw_spin_lock_irq(&sem->wait_lock);
-       if (list_empty(&sem->wait_list)) {
-               /*
-                * In case the wait queue is empty and the lock isn't owned
-                * by a writer, this reader can exit the slowpath and return
-                * immediately as its RWSEM_ACTIVE_READ_BIAS has already
-                * been set in the count.
-                */
-               if (atomic_long_read(&sem->count) >= 0) {
-                       raw_spin_unlock_irq(&sem->wait_lock);
-                       rwsem_set_reader_owned(sem);
-                       lockevent_inc(rwsem_rlock_fast);
-                       return sem;
-               }
-               adjustment += RWSEM_WAITING_BIAS;
-       }
-       list_add_tail(&waiter.list, &sem->wait_list);
-
-       /* we're now waiting on the lock, but no longer actively locking */
-       count = atomic_long_add_return(adjustment, &sem->count);
-
-       /*
-        * If there are no active locks, wake the front queued process(es).
-        *
-        * If there are no writers and we are first in the queue,
-        * wake our own waiter to join the existing active readers !
-        */
-       if (count == RWSEM_WAITING_BIAS ||
-           (count > RWSEM_WAITING_BIAS &&
-            adjustment != -RWSEM_ACTIVE_READ_BIAS))
-               __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
-       raw_spin_unlock_irq(&sem->wait_lock);
-       wake_up_q(&wake_q);
-
-       /* wait to be given the lock */
-       while (true) {
-               set_current_state(state);
-               if (!waiter.task)
-                       break;
-               if (signal_pending_state(state, current)) {
-                       raw_spin_lock_irq(&sem->wait_lock);
-                       if (waiter.task)
-                               goto out_nolock;
-                       raw_spin_unlock_irq(&sem->wait_lock);
-                       break;
-               }
-               schedule();
-               lockevent_inc(rwsem_sleep_reader);
-       }
-
-       __set_current_state(TASK_RUNNING);
-       lockevent_inc(rwsem_rlock);
-       return sem;
-out_nolock:
-       list_del(&waiter.list);
-       if (list_empty(&sem->wait_list))
-               atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
-       raw_spin_unlock_irq(&sem->wait_lock);
-       __set_current_state(TASK_RUNNING);
-       lockevent_inc(rwsem_rlock_fail);
-       return ERR_PTR(-EINTR);
-}
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed(struct rw_semaphore *sem)
-{
-       return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed);
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed_killable(struct rw_semaphore *sem)
-{
-       return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed_killable);
-
-/*
- * Wait until we successfully acquire the write lock
- */
-static inline struct rw_semaphore *
-__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
-{
-       long count;
-       bool waiting = true; /* any queued threads before us */
-       struct rwsem_waiter waiter;
-       struct rw_semaphore *ret = sem;
-       DEFINE_WAKE_Q(wake_q);
-
-       /* undo write bias from down_write operation, stop active locking */
-       count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
-
-       /* do optimistic spinning and steal lock if possible */
-       if (rwsem_optimistic_spin(sem))
-               return sem;
-
-       /*
-        * Optimistic spinning failed, proceed to the slowpath
-        * and block until we can acquire the sem.
-        */
-       waiter.task = current;
-       waiter.type = RWSEM_WAITING_FOR_WRITE;
-
-       raw_spin_lock_irq(&sem->wait_lock);
-
-       /* account for this before adding a new element to the list */
-       if (list_empty(&sem->wait_list))
-               waiting = false;
-
-       list_add_tail(&waiter.list, &sem->wait_list);
-
-       /* we're now waiting on the lock, but no longer actively locking */
-       if (waiting) {
-               count = atomic_long_read(&sem->count);
-
-               /*
-                * If there were already threads queued before us and there are
-                * no active writers, the lock must be read owned; so we try to
-                * wake any read locks that were queued ahead of us.
-                */
-               if (count > RWSEM_WAITING_BIAS) {
-                       __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
-                       /*
-                        * The wakeup is normally called _after_ the wait_lock
-                        * is released, but given that we are proactively waking
-                        * readers we can deal with the wake_q overhead as it is
-                        * similar to releasing and taking the wait_lock again
-                        * for attempting rwsem_try_write_lock().
-                        */
-                       wake_up_q(&wake_q);
-
-                       /*
-                        * Reinitialize wake_q after use.
-                        */
-                       wake_q_init(&wake_q);
-               }
-
-       } else
-               count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
-
-       /* wait until we successfully acquire the lock */
-       set_current_state(state);
-       while (true) {
-               if (rwsem_try_write_lock(count, sem))
-                       break;
-               raw_spin_unlock_irq(&sem->wait_lock);
-
-               /* Block until there are no active lockers. */
-               do {
-                       if (signal_pending_state(state, current))
-                               goto out_nolock;
-
-                       schedule();
-                       lockevent_inc(rwsem_sleep_writer);
-                       set_current_state(state);
-               } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
-
-               raw_spin_lock_irq(&sem->wait_lock);
-       }
-       __set_current_state(TASK_RUNNING);
-       list_del(&waiter.list);
-       raw_spin_unlock_irq(&sem->wait_lock);
-       lockevent_inc(rwsem_wlock);
-
-       return ret;
-
-out_nolock:
-       __set_current_state(TASK_RUNNING);
-       raw_spin_lock_irq(&sem->wait_lock);
-       list_del(&waiter.list);
-       if (list_empty(&sem->wait_list))
-               atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
-       else
-               __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-       raw_spin_unlock_irq(&sem->wait_lock);
-       wake_up_q(&wake_q);
-       lockevent_inc(rwsem_wlock_fail);
-
-       return ERR_PTR(-EINTR);
-}
-
-__visible struct rw_semaphore * __sched
-rwsem_down_write_failed(struct rw_semaphore *sem)
-{
-       return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(rwsem_down_write_failed);
-
-__visible struct rw_semaphore * __sched
-rwsem_down_write_failed_killable(struct rw_semaphore *sem)
-{
-       return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(rwsem_down_write_failed_killable);
-
-/*
- * handle waking up a waiter on the semaphore
- * - up_read/up_write has decremented the active part of count if we come here
- */
-__visible
-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
-{
-       unsigned long flags;
-       DEFINE_WAKE_Q(wake_q);
-
-       /*
-       * __rwsem_down_write_failed_common(sem)
-       *   rwsem_optimistic_spin(sem)
-       *     osq_unlock(sem->osq)
-       *   ...
-       *   atomic_long_add_return(&sem->count)
-       *
-       *      - VS -
-       *
-       *              __up_write()
-       *                if (atomic_long_sub_return_release(&sem->count) < 0)
-       *                  rwsem_wake(sem)
-       *                    osq_is_locked(&sem->osq)
-       *
-       * And __up_write() must observe !osq_is_locked() when it observes the
-       * atomic_long_add_return() in order to not miss a wakeup.
-       *
-       * This boils down to:
-       *
-       * [S.rel] X = 1                [RmW] r0 = (Y += 0)
-       *         MB                         RMB
-       * [RmW]   Y += 1               [L]   r1 = X
-       *
-       * exists (r0=1 /\ r1=0)
-       */
-       smp_rmb();
-
-       /*
-        * If a spinner is present, it is not necessary to do the wakeup.
-        * Try to do wakeup only if the trylock succeeds to minimize
-        * spinlock contention which may introduce too much delay in the
-        * unlock operation.
-        *
-        *    spinning writer           up_write/up_read caller
-        *    ---------------           -----------------------
-        * [S]   osq_unlock()           [L]   osq
-        *       MB                           RMB
-        * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
-        *
-        * Here, it is important to make sure that there won't be a missed
-        * wakeup while the rwsem is free and the only spinning writer goes
-        * to sleep without taking the rwsem. Even when the spinning writer
-        * is just going to break out of the waiting loop, it will still do
-        * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
-        * rwsem_has_spinner() is true, it will guarantee at least one
-        * trylock attempt on the rwsem later on.
-        */
-       if (rwsem_has_spinner(sem)) {
-               /*
-                * The smp_rmb() here is to make sure that the spinner
-                * state is consulted before reading the wait_lock.
-                */
-               smp_rmb();
-               if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
-                       return sem;
-               goto locked;
-       }
-       raw_spin_lock_irqsave(&sem->wait_lock, flags);
-locked:
-
-       if (!list_empty(&sem->wait_list))
-               __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
-       raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-       wake_up_q(&wake_q);
-
-       return sem;
-}
-EXPORT_SYMBOL(rwsem_wake);
-
-/*
- * downgrade a write lock into a read lock
- * - caller incremented waiting part of count and discovered it still negative
- * - just wake up any readers at the front of the queue
- */
-__visible
-struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
-{
-       unsigned long flags;
-       DEFINE_WAKE_Q(wake_q);
-
-       raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-       if (!list_empty(&sem->wait_list))
-               __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
-
-       raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-       wake_up_q(&wake_q);
-
-       return sem;
-}
-EXPORT_SYMBOL(rwsem_downgrade_wake);
index ccbf18f..37524a4 100644 (file)
  *
  * Written by David Howells (dhowells@redhat.com).
  * Derived from asm-i386/semaphore.h
+ *
+ * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
+ * and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
+ *
+ * Rwsem count bit fields re-definition and rwsem rearchitecture by
+ * Waiman Long <longman@redhat.com> and
+ * Peter Zijlstra <peterz@infradead.org>.
  */
 
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/sched/task.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/clock.h>
 #include <linux/export.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
 
 #include "rwsem.h"
+#include "lock_events.h"
+
+/*
+ * The least significant 3 bits of the owner value has the following
+ * meanings when set.
+ *  - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
+ *  - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.
+ *  - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.
+ *
+ * When the rwsem is either owned by an anonymous writer, or it is
+ * reader-owned, but a spinning writer has timed out, both nonspinnable
+ * bits will be set to disable optimistic spinning by readers and writers.
+ * In the later case, the last unlocking reader should then check the
+ * writer nonspinnable bit and clear it only to give writers preference
+ * to acquire the lock via optimistic spinning, but not readers. Similar
+ * action is also done in the reader slowpath.
+
+ * When a writer acquires a rwsem, it puts its task_struct pointer
+ * into the owner field. It is cleared after an unlock.
+ *
+ * When a reader acquires a rwsem, it will also puts its task_struct
+ * pointer into the owner field with the RWSEM_READER_OWNED bit set.
+ * On unlock, the owner field will largely be left untouched. So
+ * for a free or reader-owned rwsem, the owner value may contain
+ * information about the last reader that acquires the rwsem.
+ *
+ * That information may be helpful in debugging cases where the system
+ * seems to hang on a reader owned rwsem especially if only one reader
+ * is involved. Ideally we would like to track all the readers that own
+ * a rwsem, but the overhead is simply too big.
+ *
+ * Reader optimistic spinning is helpful when the reader critical section
+ * is short and there aren't that many readers around. It makes readers
+ * relatively more preferred than writers. When a writer times out spinning
+ * on a reader-owned lock and set the nospinnable bits, there are two main
+ * reasons for that.
+ *
+ *  1) The reader critical section is long, perhaps the task sleeps after
+ *     acquiring the read lock.
+ *  2) There are just too many readers contending the lock causing it to
+ *     take a while to service all of them.
+ *
+ * In the former case, long reader critical section will impede the progress
+ * of writers which is usually more important for system performance. In
+ * the later case, reader optimistic spinning tends to make the reader
+ * groups that contain readers that acquire the lock together smaller
+ * leading to more of them. That may hurt performance in some cases. In
+ * other words, the setting of nonspinnable bits indicates that reader
+ * optimistic spinning may not be helpful for those workloads that cause
+ * it.
+ *
+ * Therefore, any writers that had observed the setting of the writer
+ * nonspinnable bit for a given rwsem after they fail to acquire the lock
+ * via optimistic spinning will set the reader nonspinnable bit once they
+ * acquire the write lock. Similarly, readers that observe the setting
+ * of reader nonspinnable bit at slowpath entry will set the reader
+ * nonspinnable bits when they acquire the read lock via the wakeup path.
+ *
+ * Once the reader nonspinnable bit is on, it will only be reset when
+ * a writer is able to acquire the rwsem in the fast path or somehow a
+ * reader or writer in the slowpath doesn't observe the nonspinable bit.
+ *
+ * This is to discourage reader optmistic spinning on that particular
+ * rwsem and make writers more preferred. This adaptive disabling of reader
+ * optimistic spinning will alleviate the negative side effect of this
+ * feature.
+ */
+#define RWSEM_READER_OWNED     (1UL << 0)
+#define RWSEM_RD_NONSPINNABLE  (1UL << 1)
+#define RWSEM_WR_NONSPINNABLE  (1UL << 2)
+#define RWSEM_NONSPINNABLE     (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
+#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
+
+#ifdef CONFIG_DEBUG_RWSEMS
+# define DEBUG_RWSEMS_WARN_ON(c, sem)  do {                    \
+       if (!debug_locks_silent &&                              \
+           WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
+               #c, atomic_long_read(&(sem)->count),            \
+               atomic_long_read(&(sem)->owner), (long)current, \
+               list_empty(&(sem)->wait_list) ? "" : "not "))   \
+                       debug_locks_off();                      \
+       } while (0)
+#else
+# define DEBUG_RWSEMS_WARN_ON(c, sem)
+#endif
+
+/*
+ * On 64-bit architectures, the bit definitions of the count are:
+ *
+ * Bit  0    - writer locked bit
+ * Bit  1    - waiters present bit
+ * Bit  2    - lock handoff bit
+ * Bits 3-7  - reserved
+ * Bits 8-62 - 55-bit reader count
+ * Bit  63   - read fail bit
+ *
+ * On 32-bit architectures, the bit definitions of the count are:
+ *
+ * Bit  0    - writer locked bit
+ * Bit  1    - waiters present bit
+ * Bit  2    - lock handoff bit
+ * Bits 3-7  - reserved
+ * Bits 8-30 - 23-bit reader count
+ * Bit  31   - read fail bit
+ *
+ * It is not likely that the most significant bit (read fail bit) will ever
+ * be set. This guard bit is still checked anyway in the down_read() fastpath
+ * just in case we need to use up more of the reader bits for other purpose
+ * in the future.
+ *
+ * atomic_long_fetch_add() is used to obtain reader lock, whereas
+ * atomic_long_cmpxchg() will be used to obtain writer lock.
+ *
+ * There are three places where the lock handoff bit may be set or cleared.
+ * 1) rwsem_mark_wake() for readers.
+ * 2) rwsem_try_write_lock() for writers.
+ * 3) Error path of rwsem_down_write_slowpath().
+ *
+ * For all the above cases, wait_lock will be held. A writer must also
+ * be the first one in the wait_list to be eligible for setting the handoff
+ * bit. So concurrent setting/clearing of handoff bit is not possible.
+ */
+#define RWSEM_WRITER_LOCKED    (1UL << 0)
+#define RWSEM_FLAG_WAITERS     (1UL << 1)
+#define RWSEM_FLAG_HANDOFF     (1UL << 2)
+#define RWSEM_FLAG_READFAIL    (1UL << (BITS_PER_LONG - 1))
+
+#define RWSEM_READER_SHIFT     8
+#define RWSEM_READER_BIAS      (1UL << RWSEM_READER_SHIFT)
+#define RWSEM_READER_MASK      (~(RWSEM_READER_BIAS - 1))
+#define RWSEM_WRITER_MASK      RWSEM_WRITER_LOCKED
+#define RWSEM_LOCK_MASK                (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
+#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
+                                RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
+
+/*
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
+ * store tearing can't happen as optimistic spinners may read and use
+ * the owner value concurrently without lock. Read from owner, however,
+ * may not need READ_ONCE() as long as the pointer value is only used
+ * for comparison and isn't being dereferenced.
+ */
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+       atomic_long_set(&sem->owner, (long)current);
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+       atomic_long_set(&sem->owner, 0);
+}
+
+/*
+ * Test the flags in the owner field.
+ */
+static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
+{
+       return atomic_long_read(&sem->owner) & flags;
+}
+
+/*
+ * The task_struct pointer of the last owning reader will be left in
+ * the owner field.
+ *
+ * Note that the owner value just indicates the task has owned the rwsem
+ * previously, it may not be the real owner or one of the real owners
+ * anymore when that field is examined, so take it with a grain of salt.
+ *
+ * The reader non-spinnable bit is preserved.
+ */
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+                                           struct task_struct *owner)
+{
+       unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
+               (atomic_long_read(&sem->owner) & RWSEM_RD_NONSPINNABLE);
+
+       atomic_long_set(&sem->owner, val);
+}
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+       __rwsem_set_reader_owned(sem, current);
+}
+
+/*
+ * Return true if the rwsem is owned by a reader.
+ */
+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
+{
+#ifdef CONFIG_DEBUG_RWSEMS
+       /*
+        * Check the count to see if it is write-locked.
+        */
+       long count = atomic_long_read(&sem->count);
+
+       if (count & RWSEM_WRITER_MASK)
+               return false;
+#endif
+       return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
+}
+
+#ifdef CONFIG_DEBUG_RWSEMS
+/*
+ * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
+ * is a task pointer in owner of a reader-owned rwsem, it will be the
+ * real owner or one of the real owners. The only exception is when the
+ * unlock is done by up_read_non_owner().
+ */
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+       unsigned long val = atomic_long_read(&sem->owner);
+
+       while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
+               if (atomic_long_try_cmpxchg(&sem->owner, &val,
+                                           val & RWSEM_OWNER_FLAGS_MASK))
+                       return;
+       }
+}
+#else
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+}
+#endif
+
+/*
+ * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
+ * remains set. Otherwise, the operation will be aborted.
+ */
+static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
+{
+       unsigned long owner = atomic_long_read(&sem->owner);
+
+       do {
+               if (!(owner & RWSEM_READER_OWNED))
+                       break;
+               if (owner & RWSEM_NONSPINNABLE)
+                       break;
+       } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
+                                         owner | RWSEM_NONSPINNABLE));
+}
+
+static inline bool rwsem_read_trylock(struct rw_semaphore *sem)
+{
+       long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
+       if (WARN_ON_ONCE(cnt < 0))
+               rwsem_set_nonspinnable(sem);
+       return !(cnt & RWSEM_READ_FAILED_MASK);
+}
+
+/*
+ * Return just the real task structure pointer of the owner
+ */
+static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
+{
+       return (struct task_struct *)
+               (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
+}
+
+/*
+ * Return the real task structure pointer of the owner and the embedded
+ * flags in the owner. pflags must be non-NULL.
+ */
+static inline struct task_struct *
+rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
+{
+       unsigned long owner = atomic_long_read(&sem->owner);
+
+       *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
+       return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
+}
+
+/*
+ * Guide to the rw_semaphore's count field.
+ *
+ * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
+ * by a writer.
+ *
+ * The lock is owned by readers when
+ * (1) the RWSEM_WRITER_LOCKED isn't set in count,
+ * (2) some of the reader bits are set in count, and
+ * (3) the owner field has RWSEM_READ_OWNED bit set.
+ *
+ * Having some reader bits set is not enough to guarantee a readers owned
+ * lock as the readers may be in the process of backing out from the count
+ * and a writer has just released the lock. So another writer may steal
+ * the lock immediately after that.
+ */
+
+/*
+ * Initialize an rwsem:
+ */
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
+                 struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       /*
+        * Make sure we are not reinitializing a held semaphore:
+        */
+       debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+       lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
+       atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
+       raw_spin_lock_init(&sem->wait_lock);
+       INIT_LIST_HEAD(&sem->wait_list);
+       atomic_long_set(&sem->owner, 0L);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+       osq_lock_init(&sem->osq);
+#endif
+}
+EXPORT_SYMBOL(__init_rwsem);
+
+enum rwsem_waiter_type {
+       RWSEM_WAITING_FOR_WRITE,
+       RWSEM_WAITING_FOR_READ
+};
+
+struct rwsem_waiter {
+       struct list_head list;
+       struct task_struct *task;
+       enum rwsem_waiter_type type;
+       unsigned long timeout;
+       unsigned long last_rowner;
+};
+#define rwsem_first_waiter(sem) \
+       list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
+
+enum rwsem_wake_type {
+       RWSEM_WAKE_ANY,         /* Wake whatever's at head of wait list */
+       RWSEM_WAKE_READERS,     /* Wake readers only */
+       RWSEM_WAKE_READ_OWNED   /* Waker thread holds the read lock */
+};
+
+enum writer_wait_state {
+       WRITER_NOT_FIRST,       /* Writer is not first in wait list */
+       WRITER_FIRST,           /* Writer is first in wait list     */
+       WRITER_HANDOFF          /* Writer is first & handoff needed */
+};
+
+/*
+ * The typical HZ value is either 250 or 1000. So set the minimum waiting
+ * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
+ * queue before initiating the handoff protocol.
+ */
+#define RWSEM_WAIT_TIMEOUT     DIV_ROUND_UP(HZ, 250)
+
+/*
+ * Magic number to batch-wakeup waiting readers, even when writers are
+ * also present in the queue. This both limits the amount of work the
+ * waking thread must do and also prevents any potential counter overflow,
+ * however unlikely.
+ */
+#define MAX_READERS_WAKEUP     0x100
+
+/*
+ * handle the lock release when processes blocked on it that can now run
+ * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
+ *   have been set.
+ * - there must be someone on the queue
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ *   to actually wakeup the blocked task(s) and drop the reference count,
+ *   preferably when the wait_lock is released
+ * - woken process blocks are discarded from the list after having task zeroed
+ * - writers are only marked woken if downgrading is false
+ */
+static void rwsem_mark_wake(struct rw_semaphore *sem,
+                           enum rwsem_wake_type wake_type,
+                           struct wake_q_head *wake_q)
+{
+       struct rwsem_waiter *waiter, *tmp;
+       long oldcount, woken = 0, adjustment = 0;
+       struct list_head wlist;
+
+       lockdep_assert_held(&sem->wait_lock);
+
+       /*
+        * Take a peek at the queue head waiter such that we can determine
+        * the wakeup(s) to perform.
+        */
+       waiter = rwsem_first_waiter(sem);
+
+       if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
+               if (wake_type == RWSEM_WAKE_ANY) {
+                       /*
+                        * Mark writer at the front of the queue for wakeup.
+                        * Until the task is actually later awoken later by
+                        * the caller, other writers are able to steal it.
+                        * Readers, on the other hand, will block as they
+                        * will notice the queued writer.
+                        */
+                       wake_q_add(wake_q, waiter->task);
+                       lockevent_inc(rwsem_wake_writer);
+               }
+
+               return;
+       }
+
+       /*
+        * No reader wakeup if there are too many of them already.
+        */
+       if (unlikely(atomic_long_read(&sem->count) < 0))
+               return;
+
+       /*
+        * Writers might steal the lock before we grant it to the next reader.
+        * We prefer to do the first reader grant before counting readers
+        * so we can bail out early if a writer stole the lock.
+        */
+       if (wake_type != RWSEM_WAKE_READ_OWNED) {
+               struct task_struct *owner;
+
+               adjustment = RWSEM_READER_BIAS;
+               oldcount = atomic_long_fetch_add(adjustment, &sem->count);
+               if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
+                       /*
+                        * When we've been waiting "too" long (for writers
+                        * to give up the lock), request a HANDOFF to
+                        * force the issue.
+                        */
+                       if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
+                           time_after(jiffies, waiter->timeout)) {
+                               adjustment -= RWSEM_FLAG_HANDOFF;
+                               lockevent_inc(rwsem_rlock_handoff);
+                       }
+
+                       atomic_long_add(-adjustment, &sem->count);
+                       return;
+               }
+               /*
+                * Set it to reader-owned to give spinners an early
+                * indication that readers now have the lock.
+                * The reader nonspinnable bit seen at slowpath entry of
+                * the reader is copied over.
+                */
+               owner = waiter->task;
+               if (waiter->last_rowner & RWSEM_RD_NONSPINNABLE) {
+                       owner = (void *)((unsigned long)owner | RWSEM_RD_NONSPINNABLE);
+                       lockevent_inc(rwsem_opt_norspin);
+               }
+               __rwsem_set_reader_owned(sem, owner);
+       }
+
+       /*
+        * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
+        * queue. We know that the woken will be at least 1 as we accounted
+        * for above. Note we increment the 'active part' of the count by the
+        * number of readers before waking any processes up.
+        *
+        * This is an adaptation of the phase-fair R/W locks where at the
+        * reader phase (first waiter is a reader), all readers are eligible
+        * to acquire the lock at the same time irrespective of their order
+        * in the queue. The writers acquire the lock according to their
+        * order in the queue.
+        *
+        * We have to do wakeup in 2 passes to prevent the possibility that
+        * the reader count may be decremented before it is incremented. It
+        * is because the to-be-woken waiter may not have slept yet. So it
+        * may see waiter->task got cleared, finish its critical section and
+        * do an unlock before the reader count increment.
+        *
+        * 1) Collect the read-waiters in a separate list, count them and
+        *    fully increment the reader count in rwsem.
+        * 2) For each waiters in the new list, clear waiter->task and
+        *    put them into wake_q to be woken up later.
+        */
+       INIT_LIST_HEAD(&wlist);
+       list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+               if (waiter->type == RWSEM_WAITING_FOR_WRITE)
+                       continue;
+
+               woken++;
+               list_move_tail(&waiter->list, &wlist);
+
+               /*
+                * Limit # of readers that can be woken up per wakeup call.
+                */
+               if (woken >= MAX_READERS_WAKEUP)
+                       break;
+       }
+
+       adjustment = woken * RWSEM_READER_BIAS - adjustment;
+       lockevent_cond_inc(rwsem_wake_reader, woken);
+       if (list_empty(&sem->wait_list)) {
+               /* hit end of list above */
+               adjustment -= RWSEM_FLAG_WAITERS;
+       }
+
+       /*
+        * When we've woken a reader, we no longer need to force writers
+        * to give up the lock and we can clear HANDOFF.
+        */
+       if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
+               adjustment -= RWSEM_FLAG_HANDOFF;
+
+       if (adjustment)
+               atomic_long_add(adjustment, &sem->count);
+
+       /* 2nd pass */
+       list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+               struct task_struct *tsk;
+
+               tsk = waiter->task;
+               get_task_struct(tsk);
+
+               /*
+                * Ensure calling get_task_struct() before setting the reader
+                * waiter to nil such that rwsem_down_read_slowpath() cannot
+                * race with do_exit() by always holding a reference count
+                * to the task to wakeup.
+                */
+               smp_store_release(&waiter->task, NULL);
+               /*
+                * Ensure issuing the wakeup (either by us or someone else)
+                * after setting the reader waiter to nil.
+                */
+               wake_q_add_safe(wake_q, tsk);
+       }
+}
+
+/*
+ * This function must be called with the sem->wait_lock held to prevent
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ *
+ * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
+ * bit is set or the lock is acquired with handoff bit cleared.
+ */
+static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+                                       enum writer_wait_state wstate)
+{
+       long count, new;
+
+       lockdep_assert_held(&sem->wait_lock);
+
+       count = atomic_long_read(&sem->count);
+       do {
+               bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
+
+               if (has_handoff && wstate == WRITER_NOT_FIRST)
+                       return false;
+
+               new = count;
+
+               if (count & RWSEM_LOCK_MASK) {
+                       if (has_handoff || (wstate != WRITER_HANDOFF))
+                               return false;
+
+                       new |= RWSEM_FLAG_HANDOFF;
+               } else {
+                       new |= RWSEM_WRITER_LOCKED;
+                       new &= ~RWSEM_FLAG_HANDOFF;
+
+                       if (list_is_singular(&sem->wait_list))
+                               new &= ~RWSEM_FLAG_WAITERS;
+               }
+       } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
+
+       /*
+        * We have either acquired the lock with handoff bit cleared or
+        * set the handoff bit.
+        */
+       if (new & RWSEM_FLAG_HANDOFF)
+               return false;
+
+       rwsem_set_owner(sem);
+       return true;
+}
+
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+/*
+ * Try to acquire read lock before the reader is put on wait queue.
+ * Lock acquisition isn't allowed if the rwsem is locked or a writer handoff
+ * is ongoing.
+ */
+static inline bool rwsem_try_read_lock_unqueued(struct rw_semaphore *sem)
+{
+       long count = atomic_long_read(&sem->count);
+
+       if (count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))
+               return false;
+
+       count = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);
+       if (!(count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+               rwsem_set_reader_owned(sem);
+               lockevent_inc(rwsem_opt_rlock);
+               return true;
+       }
+
+       /* Back out the change */
+       atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
+       return false;
+}
+
+/*
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+       long count = atomic_long_read(&sem->count);
+
+       while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
+               if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
+                                       count | RWSEM_WRITER_LOCKED)) {
+                       rwsem_set_owner(sem);
+                       lockevent_inc(rwsem_opt_wlock);
+                       return true;
+               }
+       }
+       return false;
+}
+
+static inline bool owner_on_cpu(struct task_struct *owner)
+{
+       /*
+        * As lock holder preemption issue, we both skip spinning if
+        * task is not on cpu or its cpu is preempted
+        */
+       return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
+                                          unsigned long nonspinnable)
+{
+       struct task_struct *owner;
+       unsigned long flags;
+       bool ret = true;
+
+       BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
+
+       if (need_resched()) {
+               lockevent_inc(rwsem_opt_fail);
+               return false;
+       }
+
+       preempt_disable();
+       rcu_read_lock();
+       owner = rwsem_owner_flags(sem, &flags);
+       if ((flags & nonspinnable) || (owner && !owner_on_cpu(owner)))
+               ret = false;
+       rcu_read_unlock();
+       preempt_enable();
+
+       lockevent_cond_inc(rwsem_opt_fail, !ret);
+       return ret;
+}
+
+/*
+ * The rwsem_spin_on_owner() function returns the folowing 4 values
+ * depending on the lock owner state.
+ *   OWNER_NULL  : owner is currently NULL
+ *   OWNER_WRITER: when owner changes and is a writer
+ *   OWNER_READER: when owner changes and the new owner may be a reader.
+ *   OWNER_NONSPINNABLE:
+ *                when optimistic spinning has to stop because either the
+ *                owner stops running, is unknown, or its timeslice has
+ *                been used up.
+ */
+enum owner_state {
+       OWNER_NULL              = 1 << 0,
+       OWNER_WRITER            = 1 << 1,
+       OWNER_READER            = 1 << 2,
+       OWNER_NONSPINNABLE      = 1 << 3,
+};
+#define OWNER_SPINNABLE                (OWNER_NULL | OWNER_WRITER | OWNER_READER)
+
+static inline enum owner_state
+rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable)
+{
+       if (flags & nonspinnable)
+               return OWNER_NONSPINNABLE;
+
+       if (flags & RWSEM_READER_OWNED)
+               return OWNER_READER;
+
+       return owner ? OWNER_WRITER : OWNER_NULL;
+}
+
+static noinline enum owner_state
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
+{
+       struct task_struct *new, *owner;
+       unsigned long flags, new_flags;
+       enum owner_state state;
+
+       owner = rwsem_owner_flags(sem, &flags);
+       state = rwsem_owner_state(owner, flags, nonspinnable);
+       if (state != OWNER_WRITER)
+               return state;
+
+       rcu_read_lock();
+       for (;;) {
+               if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) {
+                       state = OWNER_NONSPINNABLE;
+                       break;
+               }
+
+               new = rwsem_owner_flags(sem, &new_flags);
+               if ((new != owner) || (new_flags != flags)) {
+                       state = rwsem_owner_state(new, new_flags, nonspinnable);
+                       break;
+               }
+
+               /*
+                * Ensure we emit the owner->on_cpu, dereference _after_
+                * checking sem->owner still matches owner, if that fails,
+                * owner might point to free()d memory, if it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+
+               if (need_resched() || !owner_on_cpu(owner)) {
+                       state = OWNER_NONSPINNABLE;
+                       break;
+               }
+
+               cpu_relax();
+       }
+       rcu_read_unlock();
+
+       return state;
+}
+
+/*
+ * Calculate reader-owned rwsem spinning threshold for writer
+ *
+ * The more readers own the rwsem, the longer it will take for them to
+ * wind down and free the rwsem. So the empirical formula used to
+ * determine the actual spinning time limit here is:
+ *
+ *   Spinning threshold = (10 + nr_readers/2)us
+ *
+ * The limit is capped to a maximum of 25us (30 readers). This is just
+ * a heuristic and is subjected to change in the future.
+ */
+static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
+{
+       long count = atomic_long_read(&sem->count);
+       int readers = count >> RWSEM_READER_SHIFT;
+       u64 delta;
+
+       if (readers > 30)
+               readers = 30;
+       delta = (20 + readers) * NSEC_PER_USEC / 2;
+
+       return sched_clock() + delta;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+{
+       bool taken = false;
+       int prev_owner_state = OWNER_NULL;
+       int loop = 0;
+       u64 rspin_threshold = 0;
+       unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
+                                          : RWSEM_RD_NONSPINNABLE;
+
+       preempt_disable();
+
+       /* sem->wait_lock should not be held when doing optimistic spinning */
+       if (!osq_lock(&sem->osq))
+               goto done;
+
+       /*
+        * Optimistically spin on the owner field and attempt to acquire the
+        * lock whenever the owner changes. Spinning will be stopped when:
+        *  1) the owning writer isn't running; or
+        *  2) readers own the lock and spinning time has exceeded limit.
+        */
+       for (;;) {
+               enum owner_state owner_state;
+
+               owner_state = rwsem_spin_on_owner(sem, nonspinnable);
+               if (!(owner_state & OWNER_SPINNABLE))
+                       break;
+
+               /*
+                * Try to acquire the lock
+                */
+               taken = wlock ? rwsem_try_write_lock_unqueued(sem)
+                             : rwsem_try_read_lock_unqueued(sem);
+
+               if (taken)
+                       break;
+
+               /*
+                * Time-based reader-owned rwsem optimistic spinning
+                */
+               if (wlock && (owner_state == OWNER_READER)) {
+                       /*
+                        * Re-initialize rspin_threshold every time when
+                        * the owner state changes from non-reader to reader.
+                        * This allows a writer to steal the lock in between
+                        * 2 reader phases and have the threshold reset at
+                        * the beginning of the 2nd reader phase.
+                        */
+                       if (prev_owner_state != OWNER_READER) {
+                               if (rwsem_test_oflags(sem, nonspinnable))
+                                       break;
+                               rspin_threshold = rwsem_rspin_threshold(sem);
+                               loop = 0;
+                       }
+
+                       /*
+                        * Check time threshold once every 16 iterations to
+                        * avoid calling sched_clock() too frequently so
+                        * as to reduce the average latency between the times
+                        * when the lock becomes free and when the spinner
+                        * is ready to do a trylock.
+                        */
+                       else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
+                               rwsem_set_nonspinnable(sem);
+                               lockevent_inc(rwsem_opt_nospin);
+                               break;
+                       }
+               }
+
+               /*
+                * An RT task cannot do optimistic spinning if it cannot
+                * be sure the lock holder is running or live-lock may
+                * happen if the current task and the lock holder happen
+                * to run in the same CPU. However, aborting optimistic
+                * spinning while a NULL owner is detected may miss some
+                * opportunity where spinning can continue without causing
+                * problem.
+                *
+                * There are 2 possible cases where an RT task may be able
+                * to continue spinning.
+                *
+                * 1) The lock owner is in the process of releasing the
+                *    lock, sem->owner is cleared but the lock has not
+                *    been released yet.
+                * 2) The lock was free and owner cleared, but another
+                *    task just comes in and acquire the lock before
+                *    we try to get it. The new owner may be a spinnable
+                *    writer.
+                *
+                * To take advantage of two scenarios listed agove, the RT
+                * task is made to retry one more time to see if it can
+                * acquire the lock or continue spinning on the new owning
+                * writer. Of course, if the time lag is long enough or the
+                * new owner is not a writer or spinnable, the RT task will
+                * quit spinning.
+                *
+                * If the owner is a writer, the need_resched() check is
+                * done inside rwsem_spin_on_owner(). If the owner is not
+                * a writer, need_resched() check needs to be done here.
+                */
+               if (owner_state != OWNER_WRITER) {
+                       if (need_resched())
+                               break;
+                       if (rt_task(current) &&
+                          (prev_owner_state != OWNER_WRITER))
+                               break;
+               }
+               prev_owner_state = owner_state;
+
+               /*
+                * The cpu_relax() call is a compiler barrier which forces
+                * everything in this loop to be re-loaded. We don't need
+                * memory barriers as we'll eventually observe the right
+                * values at the cost of a few extra spins.
+                */
+               cpu_relax();
+       }
+       osq_unlock(&sem->osq);
+done:
+       preempt_enable();
+       lockevent_cond_inc(rwsem_opt_fail, !taken);
+       return taken;
+}
+
+/*
+ * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should
+ * only be called when the reader count reaches 0.
+ *
+ * This give writers better chance to acquire the rwsem first before
+ * readers when the rwsem was being held by readers for a relatively long
+ * period of time. Race can happen that an optimistic spinner may have
+ * just stolen the rwsem and set the owner, but just clearing the
+ * RWSEM_WR_NONSPINNABLE bit will do no harm anyway.
+ */
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem)
+{
+       if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE))
+               atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner);
+}
+
+/*
+ * This function is called when the reader fails to acquire the lock via
+ * optimistic spinning. In this case we will still attempt to do a trylock
+ * when comparing the rwsem state right now with the state when entering
+ * the slowpath indicates that the reader is still in a valid reader phase.
+ * This happens when the following conditions are true:
+ *
+ * 1) The lock is currently reader owned, and
+ * 2) The lock is previously not reader-owned or the last read owner changes.
+ *
+ * In the former case, we have transitioned from a writer phase to a
+ * reader-phase while spinning. In the latter case, it means the reader
+ * phase hasn't ended when we entered the optimistic spinning loop. In
+ * both cases, the reader is eligible to acquire the lock. This is the
+ * secondary path where a read lock is acquired optimistically.
+ *
+ * The reader non-spinnable bit wasn't set at time of entry or it will
+ * not be here at all.
+ */
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
+                                             unsigned long last_rowner)
+{
+       unsigned long owner = atomic_long_read(&sem->owner);
+
+       if (!(owner & RWSEM_READER_OWNED))
+               return false;
+
+       if (((owner ^ last_rowner) & ~RWSEM_OWNER_FLAGS_MASK) &&
+           rwsem_try_read_lock_unqueued(sem)) {
+               lockevent_inc(rwsem_opt_rlock2);
+               lockevent_add(rwsem_opt_fail, -1);
+               return true;
+       }
+       return false;
+}
+#else
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
+                                          unsigned long nonspinnable)
+{
+       return false;
+}
+
+static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+{
+       return false;
+}
+
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { }
+
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
+                                             unsigned long last_rowner)
+{
+       return false;
+}
+#endif
+
+/*
+ * Wait for the read lock to be granted
+ */
+static struct rw_semaphore __sched *
+rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
+{
+       long count, adjustment = -RWSEM_READER_BIAS;
+       struct rwsem_waiter waiter;
+       DEFINE_WAKE_Q(wake_q);
+       bool wake = false;
+
+       /*
+        * Save the current read-owner of rwsem, if available, and the
+        * reader nonspinnable bit.
+        */
+       waiter.last_rowner = atomic_long_read(&sem->owner);
+       if (!(waiter.last_rowner & RWSEM_READER_OWNED))
+               waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;
+
+       if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE))
+               goto queue;
+
+       /*
+        * Undo read bias from down_read() and do optimistic spinning.
+        */
+       atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
+       adjustment = 0;
+       if (rwsem_optimistic_spin(sem, false)) {
+               /*
+                * Wake up other readers in the wait list if the front
+                * waiter is a reader.
+                */
+               if ((atomic_long_read(&sem->count) & RWSEM_FLAG_WAITERS)) {
+                       raw_spin_lock_irq(&sem->wait_lock);
+                       if (!list_empty(&sem->wait_list))
+                               rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
+                                               &wake_q);
+                       raw_spin_unlock_irq(&sem->wait_lock);
+                       wake_up_q(&wake_q);
+               }
+               return sem;
+       } else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {
+               return sem;
+       }
+
+queue:
+       waiter.task = current;
+       waiter.type = RWSEM_WAITING_FOR_READ;
+       waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+
+       raw_spin_lock_irq(&sem->wait_lock);
+       if (list_empty(&sem->wait_list)) {
+               /*
+                * In case the wait queue is empty and the lock isn't owned
+                * by a writer or has the handoff bit set, this reader can
+                * exit the slowpath and return immediately as its
+                * RWSEM_READER_BIAS has already been set in the count.
+                */
+               if (adjustment && !(atomic_long_read(&sem->count) &
+                    (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+                       raw_spin_unlock_irq(&sem->wait_lock);
+                       rwsem_set_reader_owned(sem);
+                       lockevent_inc(rwsem_rlock_fast);
+                       return sem;
+               }
+               adjustment += RWSEM_FLAG_WAITERS;
+       }
+       list_add_tail(&waiter.list, &sem->wait_list);
+
+       /* we're now waiting on the lock, but no longer actively locking */
+       if (adjustment)
+               count = atomic_long_add_return(adjustment, &sem->count);
+       else
+               count = atomic_long_read(&sem->count);
+
+       /*
+        * If there are no active locks, wake the front queued process(es).
+        *
+        * If there are no writers and we are first in the queue,
+        * wake our own waiter to join the existing active readers !
+        */
+       if (!(count & RWSEM_LOCK_MASK)) {
+               clear_wr_nonspinnable(sem);
+               wake = true;
+       }
+       if (wake || (!(count & RWSEM_WRITER_MASK) &&
+                   (adjustment & RWSEM_FLAG_WAITERS)))
+               rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+
+       raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
+
+       /* wait to be given the lock */
+       while (true) {
+               set_current_state(state);
+               if (!waiter.task)
+                       break;
+               if (signal_pending_state(state, current)) {
+                       raw_spin_lock_irq(&sem->wait_lock);
+                       if (waiter.task)
+                               goto out_nolock;
+                       raw_spin_unlock_irq(&sem->wait_lock);
+                       break;
+               }
+               schedule();
+               lockevent_inc(rwsem_sleep_reader);
+       }
+
+       __set_current_state(TASK_RUNNING);
+       lockevent_inc(rwsem_rlock);
+       return sem;
+out_nolock:
+       list_del(&waiter.list);
+       if (list_empty(&sem->wait_list)) {
+               atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
+                                  &sem->count);
+       }
+       raw_spin_unlock_irq(&sem->wait_lock);
+       __set_current_state(TASK_RUNNING);
+       lockevent_inc(rwsem_rlock_fail);
+       return ERR_PTR(-EINTR);
+}
+
+/*
+ * This function is called by the a write lock owner. So the owner value
+ * won't get changed by others.
+ */
+static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,
+                                               bool disable)
+{
+       if (unlikely(disable)) {
+               atomic_long_or(RWSEM_RD_NONSPINNABLE, &sem->owner);
+               lockevent_inc(rwsem_opt_norspin);
+       }
+}
+
+/*
+ * Wait until we successfully acquire the write lock
+ */
+static struct rw_semaphore *
+rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+{
+       long count;
+       bool disable_rspin;
+       enum writer_wait_state wstate;
+       struct rwsem_waiter waiter;
+       struct rw_semaphore *ret = sem;
+       DEFINE_WAKE_Q(wake_q);
+
+       /* do optimistic spinning and steal lock if possible */
+       if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
+           rwsem_optimistic_spin(sem, true))
+               return sem;
+
+       /*
+        * Disable reader optimistic spinning for this rwsem after
+        * acquiring the write lock when the setting of the nonspinnable
+        * bits are observed.
+        */
+       disable_rspin = atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE;
+
+       /*
+        * Optimistic spinning failed, proceed to the slowpath
+        * and block until we can acquire the sem.
+        */
+       waiter.task = current;
+       waiter.type = RWSEM_WAITING_FOR_WRITE;
+       waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+
+       raw_spin_lock_irq(&sem->wait_lock);
+
+       /* account for this before adding a new element to the list */
+       wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
+
+       list_add_tail(&waiter.list, &sem->wait_list);
+
+       /* we're now waiting on the lock */
+       if (wstate == WRITER_NOT_FIRST) {
+               count = atomic_long_read(&sem->count);
+
+               /*
+                * If there were already threads queued before us and:
+                *  1) there are no no active locks, wake the front
+                *     queued process(es) as the handoff bit might be set.
+                *  2) there are no active writers and some readers, the lock
+                *     must be read owned; so we try to wake any read lock
+                *     waiters that were queued ahead of us.
+                */
+               if (count & RWSEM_WRITER_MASK)
+                       goto wait;
+
+               rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
+                                       ? RWSEM_WAKE_READERS
+                                       : RWSEM_WAKE_ANY, &wake_q);
+
+               if (!wake_q_empty(&wake_q)) {
+                       /*
+                        * We want to minimize wait_lock hold time especially
+                        * when a large number of readers are to be woken up.
+                        */
+                       raw_spin_unlock_irq(&sem->wait_lock);
+                       wake_up_q(&wake_q);
+                       wake_q_init(&wake_q);   /* Used again, reinit */
+                       raw_spin_lock_irq(&sem->wait_lock);
+               }
+       } else {
+               atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
+       }
+
+wait:
+       /* wait until we successfully acquire the lock */
+       set_current_state(state);
+       while (true) {
+               if (rwsem_try_write_lock(sem, wstate))
+                       break;
+
+               raw_spin_unlock_irq(&sem->wait_lock);
+
+               /* Block until there are no active lockers. */
+               for (;;) {
+                       if (signal_pending_state(state, current))
+                               goto out_nolock;
+
+                       schedule();
+                       lockevent_inc(rwsem_sleep_writer);
+                       set_current_state(state);
+                       /*
+                        * If HANDOFF bit is set, unconditionally do
+                        * a trylock.
+                        */
+                       if (wstate == WRITER_HANDOFF)
+                               break;
+
+                       if ((wstate == WRITER_NOT_FIRST) &&
+                           (rwsem_first_waiter(sem) == &waiter))
+                               wstate = WRITER_FIRST;
+
+                       count = atomic_long_read(&sem->count);
+                       if (!(count & RWSEM_LOCK_MASK))
+                               break;
+
+                       /*
+                        * The setting of the handoff bit is deferred
+                        * until rwsem_try_write_lock() is called.
+                        */
+                       if ((wstate == WRITER_FIRST) && (rt_task(current) ||
+                           time_after(jiffies, waiter.timeout))) {
+                               wstate = WRITER_HANDOFF;
+                               lockevent_inc(rwsem_wlock_handoff);
+                               break;
+                       }
+               }
+
+               raw_spin_lock_irq(&sem->wait_lock);
+       }
+       __set_current_state(TASK_RUNNING);
+       list_del(&waiter.list);
+       rwsem_disable_reader_optspin(sem, disable_rspin);
+       raw_spin_unlock_irq(&sem->wait_lock);
+       lockevent_inc(rwsem_wlock);
+
+       return ret;
+
+out_nolock:
+       __set_current_state(TASK_RUNNING);
+       raw_spin_lock_irq(&sem->wait_lock);
+       list_del(&waiter.list);
+
+       if (unlikely(wstate == WRITER_HANDOFF))
+               atomic_long_add(-RWSEM_FLAG_HANDOFF,  &sem->count);
+
+       if (list_empty(&sem->wait_list))
+               atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
+       else
+               rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+       raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
+       lockevent_inc(rwsem_wlock_fail);
+
+       return ERR_PTR(-EINTR);
+}
+
+/*
+ * handle waking up a waiter on the semaphore
+ * - up_read/up_write has decremented the active part of count if we come here
+ */
+static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
+{
+       unsigned long flags;
+       DEFINE_WAKE_Q(wake_q);
+
+       raw_spin_lock_irqsave(&sem->wait_lock, flags);
+
+       if (!list_empty(&sem->wait_list))
+               rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+
+       raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
+
+       return sem;
+}
+
+/*
+ * downgrade a write lock into a read lock
+ * - caller incremented waiting part of count and discovered it still negative
+ * - just wake up any readers at the front of the queue
+ */
+static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+{
+       unsigned long flags;
+       DEFINE_WAKE_Q(wake_q);
+
+       raw_spin_lock_irqsave(&sem->wait_lock, flags);
+
+       if (!list_empty(&sem->wait_list))
+               rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+
+       raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
+
+       return sem;
+}
+
+/*
+ * lock for reading
+ */
+inline void __down_read(struct rw_semaphore *sem)
+{
+       if (!rwsem_read_trylock(sem)) {
+               rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
+               DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+       } else {
+               rwsem_set_reader_owned(sem);
+       }
+}
+
+static inline int __down_read_killable(struct rw_semaphore *sem)
+{
+       if (!rwsem_read_trylock(sem)) {
+               if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
+                       return -EINTR;
+               DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+       } else {
+               rwsem_set_reader_owned(sem);
+       }
+       return 0;
+}
+
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+       /*
+        * Optimize for the case when the rwsem is not locked at all.
+        */
+       long tmp = RWSEM_UNLOCKED_VALUE;
+
+       do {
+               if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+                                       tmp + RWSEM_READER_BIAS)) {
+                       rwsem_set_reader_owned(sem);
+                       return 1;
+               }
+       } while (!(tmp & RWSEM_READ_FAILED_MASK));
+       return 0;
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+       long tmp = RWSEM_UNLOCKED_VALUE;
+
+       if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+                                                     RWSEM_WRITER_LOCKED)))
+               rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);
+       else
+               rwsem_set_owner(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+       long tmp = RWSEM_UNLOCKED_VALUE;
+
+       if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+                                                     RWSEM_WRITER_LOCKED))) {
+               if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))
+                       return -EINTR;
+       } else {
+               rwsem_set_owner(sem);
+       }
+       return 0;
+}
+
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+       long tmp = RWSEM_UNLOCKED_VALUE;
+
+       if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+                                           RWSEM_WRITER_LOCKED)) {
+               rwsem_set_owner(sem);
+               return true;
+       }
+       return false;
+}
+
+/*
+ * unlock after reading
+ */
+inline void __up_read(struct rw_semaphore *sem)
+{
+       long tmp;
+
+       DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+       rwsem_clear_reader_owned(sem);
+       tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
+       DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+       if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
+                     RWSEM_FLAG_WAITERS)) {
+               clear_wr_nonspinnable(sem);
+               rwsem_wake(sem, tmp);
+       }
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+       long tmp;
+
+       /*
+        * sem->owner may differ from current if the ownership is transferred
+        * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
+        */
+       DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
+                           !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
+       rwsem_clear_owner(sem);
+       tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
+       if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+               rwsem_wake(sem, tmp);
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+       long tmp;
+
+       /*
+        * When downgrading from exclusive to shared ownership,
+        * anything inside the write-locked region cannot leak
+        * into the read side. In contrast, anything in the
+        * read-locked region is ok to be re-ordered into the
+        * write side. As such, rely on RELEASE semantics.
+        */
+       DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
+       tmp = atomic_long_fetch_add_release(
+               -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
+       rwsem_set_reader_owned(sem);
+       if (tmp & RWSEM_FLAG_WAITERS)
+               rwsem_downgrade_wake(sem);
+}
 
 /*
  * lock for reading
@@ -25,7 +1446,6 @@ void __sched down_read(struct rw_semaphore *sem)
 
        LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
 }
-
 EXPORT_SYMBOL(down_read);
 
 int __sched down_read_killable(struct rw_semaphore *sem)
@@ -40,7 +1460,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
 
        return 0;
 }
-
 EXPORT_SYMBOL(down_read_killable);
 
 /*
@@ -54,7 +1473,6 @@ int down_read_trylock(struct rw_semaphore *sem)
                rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
        return ret;
 }
-
 EXPORT_SYMBOL(down_read_trylock);
 
 /*
@@ -64,10 +1482,8 @@ void __sched down_write(struct rw_semaphore *sem)
 {
        might_sleep();
        rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
-
        LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
 }
-
 EXPORT_SYMBOL(down_write);
 
 /*
@@ -78,14 +1494,14 @@ int __sched down_write_killable(struct rw_semaphore *sem)
        might_sleep();
        rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
-       if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+       if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
+                                 __down_write_killable)) {
                rwsem_release(&sem->dep_map, 1, _RET_IP_);
                return -EINTR;
        }
 
        return 0;
 }
-
 EXPORT_SYMBOL(down_write_killable);
 
 /*
@@ -100,7 +1516,6 @@ int down_write_trylock(struct rw_semaphore *sem)
 
        return ret;
 }
-
 EXPORT_SYMBOL(down_write_trylock);
 
 /*
@@ -109,10 +1524,8 @@ EXPORT_SYMBOL(down_write_trylock);
 void up_read(struct rw_semaphore *sem)
 {
        rwsem_release(&sem->dep_map, 1, _RET_IP_);
-
        __up_read(sem);
 }
-
 EXPORT_SYMBOL(up_read);
 
 /*
@@ -121,10 +1534,8 @@ EXPORT_SYMBOL(up_read);
 void up_write(struct rw_semaphore *sem)
 {
        rwsem_release(&sem->dep_map, 1, _RET_IP_);
-
        __up_write(sem);
 }
-
 EXPORT_SYMBOL(up_write);
 
 /*
@@ -133,10 +1544,8 @@ EXPORT_SYMBOL(up_write);
 void downgrade_write(struct rw_semaphore *sem)
 {
        lock_downgrade(&sem->dep_map, _RET_IP_);
-
        __downgrade_write(sem);
 }
-
 EXPORT_SYMBOL(downgrade_write);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -145,40 +1554,32 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
 {
        might_sleep();
        rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
-
        LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
 }
-
 EXPORT_SYMBOL(down_read_nested);
 
 void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
 {
        might_sleep();
        rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
-
        LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
 }
-
 EXPORT_SYMBOL(_down_write_nest_lock);
 
 void down_read_non_owner(struct rw_semaphore *sem)
 {
        might_sleep();
-
        __down_read(sem);
        __rwsem_set_reader_owned(sem, NULL);
 }
-
 EXPORT_SYMBOL(down_read_non_owner);
 
 void down_write_nested(struct rw_semaphore *sem, int subclass)
 {
        might_sleep();
        rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
-
        LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
 }
-
 EXPORT_SYMBOL(down_write_nested);
 
 int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
@@ -186,23 +1587,21 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
        might_sleep();
        rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
 
-       if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+       if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
+                                 __down_write_killable)) {
                rwsem_release(&sem->dep_map, 1, _RET_IP_);
                return -EINTR;
        }
 
        return 0;
 }
-
 EXPORT_SYMBOL(down_write_killable_nested);
 
 void up_read_non_owner(struct rw_semaphore *sem)
 {
-       DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
-                               sem);
+       DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
        __up_read(sem);
 }
-
 EXPORT_SYMBOL(up_read_non_owner);
 
 #endif
index 64877f5..2534ce4 100644 (file)
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * The least significant 2 bits of the owner value has the following
- * meanings when set.
- *  - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
- *  - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
- *    i.e. the owner(s) cannot be readily determined. It can be reader
- *    owned or the owning writer is indeterminate.
- *
- * When a writer acquires a rwsem, it puts its task_struct pointer
- * into the owner field. It is cleared after an unlock.
- *
- * When a reader acquires a rwsem, it will also puts its task_struct
- * pointer into the owner field with both the RWSEM_READER_OWNED and
- * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
- * largely be left untouched. So for a free or reader-owned rwsem,
- * the owner value may contain information about the last reader that
- * acquires the rwsem. The anonymous bit is set because that particular
- * reader may or may not still own the lock.
- *
- * That information may be helpful in debugging cases where the system
- * seems to hang on a reader owned rwsem especially if only one reader
- * is involved. Ideally we would like to track all the readers that own
- * a rwsem, but the overhead is simply too big.
- */
-#include "lock_events.h"
 
-#define RWSEM_READER_OWNED     (1UL << 0)
-#define RWSEM_ANONYMOUSLY_OWNED        (1UL << 1)
+#ifndef __INTERNAL_RWSEM_H
+#define __INTERNAL_RWSEM_H
+#include <linux/rwsem.h>
 
-#ifdef CONFIG_DEBUG_RWSEMS
-# define DEBUG_RWSEMS_WARN_ON(c, sem)  do {                    \
-       if (!debug_locks_silent &&                              \
-           WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
-               #c, atomic_long_read(&(sem)->count),            \
-               (long)((sem)->owner), (long)current,            \
-               list_empty(&(sem)->wait_list) ? "" : "not "))   \
-                       debug_locks_off();                      \
-       } while (0)
-#else
-# define DEBUG_RWSEMS_WARN_ON(c, sem)
-#endif
+extern void __down_read(struct rw_semaphore *sem);
+extern void __up_read(struct rw_semaphore *sem);
 
-/*
- * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
- * Adapted largely from include/asm-i386/rwsem.h
- * by Paul Mackerras <paulus@samba.org>.
- */
-
-/*
- * the semaphore definition
- */
-#ifdef CONFIG_64BIT
-# define RWSEM_ACTIVE_MASK             0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK             0x0000ffffL
-#endif
-
-#define RWSEM_ACTIVE_BIAS              0x00000001L
-#define RWSEM_WAITING_BIAS             (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS         RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS                (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-/*
- * All writes to owner are protected by WRITE_ONCE() to make sure that
- * store tearing can't happen as optimistic spinners may read and use
- * the owner value concurrently without lock. Read from owner, however,
- * may not need READ_ONCE() as long as the pointer value is only used
- * for comparison and isn't being dereferenced.
- */
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-       WRITE_ONCE(sem->owner, current);
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-       WRITE_ONCE(sem->owner, NULL);
-}
-
-/*
- * The task_struct pointer of the last owning reader will be left in
- * the owner field.
- *
- * Note that the owner value just indicates the task has owned the rwsem
- * previously, it may not be the real owner or one of the real owners
- * anymore when that field is examined, so take it with a grain of salt.
- */
-static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
-                                           struct task_struct *owner)
-{
-       unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
-                                                | RWSEM_ANONYMOUSLY_OWNED;
-
-       WRITE_ONCE(sem->owner, (struct task_struct *)val);
-}
-
-static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
-{
-       __rwsem_set_reader_owned(sem, current);
-}
-
-/*
- * Return true if the a rwsem waiter can spin on the rwsem's owner
- * and steal the lock, i.e. the lock is not anonymously owned.
- * N.B. !owner is considered spinnable.
- */
-static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
-{
-       return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
-}
-
-/*
- * Return true if rwsem is owned by an anonymous writer or readers.
- */
-static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
-{
-       return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
-}
-
-#ifdef CONFIG_DEBUG_RWSEMS
-/*
- * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
- * is a task pointer in owner of a reader-owned rwsem, it will be the
- * real owner or one of the real owners. The only exception is when the
- * unlock is done by up_read_non_owner().
- */
-#define rwsem_clear_reader_owned rwsem_clear_reader_owned
-static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
-{
-       unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
-                                                  | RWSEM_ANONYMOUSLY_OWNED;
-       if (READ_ONCE(sem->owner) == (struct task_struct *)val)
-               cmpxchg_relaxed((unsigned long *)&sem->owner, val,
-                               RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
-}
-#endif
-
-#else
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
-                                          struct task_struct *owner)
-{
-}
-
-static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
-{
-}
-#endif
-
-#ifndef rwsem_clear_reader_owned
-static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
-{
-}
-#endif
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-       if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
-               rwsem_down_read_failed(sem);
-               DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
-                                       RWSEM_READER_OWNED), sem);
-       } else {
-               rwsem_set_reader_owned(sem);
-       }
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
-       if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
-               if (IS_ERR(rwsem_down_read_failed_killable(sem)))
-                       return -EINTR;
-               DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
-                                       RWSEM_READER_OWNED), sem);
-       } else {
-               rwsem_set_reader_owned(sem);
-       }
-       return 0;
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-       /*
-        * Optimize for the case when the rwsem is not locked at all.
-        */
-       long tmp = RWSEM_UNLOCKED_VALUE;
-
-       lockevent_inc(rwsem_rtrylock);
-       do {
-               if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
-                                       tmp + RWSEM_ACTIVE_READ_BIAS)) {
-                       rwsem_set_reader_owned(sem);
-                       return 1;
-               }
-       } while (tmp >= 0);
-       return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-       long tmp;
-
-       tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
-                                            &sem->count);
-       if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
-               rwsem_down_write_failed(sem);
-       rwsem_set_owner(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
-       long tmp;
-
-       tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
-                                            &sem->count);
-       if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
-               if (IS_ERR(rwsem_down_write_failed_killable(sem)))
-                       return -EINTR;
-       rwsem_set_owner(sem);
-       return 0;
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-       long tmp;
-
-       lockevent_inc(rwsem_wtrylock);
-       tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
-                     RWSEM_ACTIVE_WRITE_BIAS);
-       if (tmp == RWSEM_UNLOCKED_VALUE) {
-               rwsem_set_owner(sem);
-               return true;
-       }
-       return false;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-       long tmp;
-
-       DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
-                               sem);
-       rwsem_clear_reader_owned(sem);
-       tmp = atomic_long_dec_return_release(&sem->count);
-       if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
-               rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-       DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
-       rwsem_clear_owner(sem);
-       if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
-                                                   &sem->count) < 0))
-               rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-       long tmp;
-
-       /*
-        * When downgrading from exclusive to shared ownership,
-        * anything inside the write-locked region cannot leak
-        * into the read side. In contrast, anything in the
-        * read-locked region is ok to be re-ordered into the
-        * write side. As such, rely on RELEASE semantics.
-        */
-       DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
-       tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
-       rwsem_set_reader_owned(sem);
-       if (tmp < 0)
-               rwsem_downgrade_wake(sem);
-}
+#endif /* __INTERNAL_RWSEM_H */
index f35930f..8591529 100644 (file)
@@ -6189,6 +6189,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
        u64 time, cost;
        s64 delta;
        int cpu, nr = INT_MAX;
+       int this = smp_processor_id();
 
        this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
        if (!this_sd)
@@ -6212,7 +6213,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
                        nr = 4;
        }
 
-       time = local_clock();
+       time = cpu_clock(this);
 
        for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
                if (!--nr)
@@ -6223,7 +6224,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
                        break;
        }
 
-       time = local_clock() - time;
+       time = cpu_clock(this) - time;
        cost = this_sd->avg_scan_cost;
        delta = (s64)(time - cost) / 8;
        this_sd->avg_scan_cost += delta;
index 6629cab..06d9c9d 100644 (file)
@@ -1095,7 +1095,7 @@ config PROVE_LOCKING
        select DEBUG_SPINLOCK
        select DEBUG_MUTEXES
        select DEBUG_RT_MUTEXES if RT_MUTEXES
-       select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
+       select DEBUG_RWSEMS
        select DEBUG_WW_MUTEX_SLOWPATH
        select DEBUG_LOCK_ALLOC
        select TRACE_IRQFLAGS
@@ -1199,10 +1199,10 @@ config DEBUG_WW_MUTEX_SLOWPATH
 
 config DEBUG_RWSEMS
        bool "RW Semaphore debugging: basic checks"
-       depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
+       depends on DEBUG_KERNEL
        help
-         This debugging feature allows mismatched rw semaphore locks and unlocks
-         to be detected and reported.
+         This debugging feature allows mismatched rw semaphore locks
+         and unlocks to be detected and reported.
 
 config DEBUG_LOCK_ALLOC
        bool "Lock debugging: detect incorrect freeing of live locks"
index 7e69057..e98c85a 100644 (file)
@@ -42,11 +42,11 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
        return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
 }
 
-long long atomic64_read(const atomic64_t *v)
+s64 atomic64_read(const atomic64_t *v)
 {
        unsigned long flags;
        raw_spinlock_t *lock = lock_addr(v);
-       long long val;
+       s64 val;
 
        raw_spin_lock_irqsave(lock, flags);
        val = v->counter;
@@ -55,7 +55,7 @@ long long atomic64_read(const atomic64_t *v)
 }
 EXPORT_SYMBOL(atomic64_read);
 
-void atomic64_set(atomic64_t *v, long long i)
+void atomic64_set(atomic64_t *v, s64 i)
 {
        unsigned long flags;
        raw_spinlock_t *lock = lock_addr(v);
@@ -67,7 +67,7 @@ void atomic64_set(atomic64_t *v, long long i)
 EXPORT_SYMBOL(atomic64_set);
 
 #define ATOMIC64_OP(op, c_op)                                          \
-void atomic64_##op(long long a, atomic64_t *v)                         \
+void atomic64_##op(s64 a, atomic64_t *v)                               \
 {                                                                      \
        unsigned long flags;                                            \
        raw_spinlock_t *lock = lock_addr(v);                            \
@@ -79,11 +79,11 @@ void atomic64_##op(long long a, atomic64_t *v)                              \
 EXPORT_SYMBOL(atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op, c_op)                                   \
-long long atomic64_##op##_return(long long a, atomic64_t *v)           \
+s64 atomic64_##op##_return(s64 a, atomic64_t *v)                       \
 {                                                                      \
        unsigned long flags;                                            \
        raw_spinlock_t *lock = lock_addr(v);                            \
-       long long val;                                                  \
+       s64 val;                                                        \
                                                                        \
        raw_spin_lock_irqsave(lock, flags);                             \
        val = (v->counter c_op a);                                      \
@@ -93,11 +93,11 @@ long long atomic64_##op##_return(long long a, atomic64_t *v)                \
 EXPORT_SYMBOL(atomic64_##op##_return);
 
 #define ATOMIC64_FETCH_OP(op, c_op)                                    \
-long long atomic64_fetch_##op(long long a, atomic64_t *v)              \
+s64 atomic64_fetch_##op(s64 a, atomic64_t *v)                          \
 {                                                                      \
        unsigned long flags;                                            \
        raw_spinlock_t *lock = lock_addr(v);                            \
-       long long val;                                                  \
+       s64 val;                                                        \
                                                                        \
        raw_spin_lock_irqsave(lock, flags);                             \
        val = v->counter;                                               \
@@ -130,11 +130,11 @@ ATOMIC64_OPS(xor, ^=)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-long long atomic64_dec_if_positive(atomic64_t *v)
+s64 atomic64_dec_if_positive(atomic64_t *v)
 {
        unsigned long flags;
        raw_spinlock_t *lock = lock_addr(v);
-       long long val;
+       s64 val;
 
        raw_spin_lock_irqsave(lock, flags);
        val = v->counter - 1;
@@ -145,11 +145,11 @@ long long atomic64_dec_if_positive(atomic64_t *v)
 }
 EXPORT_SYMBOL(atomic64_dec_if_positive);
 
-long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
        unsigned long flags;
        raw_spinlock_t *lock = lock_addr(v);
-       long long val;
+       s64 val;
 
        raw_spin_lock_irqsave(lock, flags);
        val = v->counter;
@@ -160,11 +160,11 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 }
 EXPORT_SYMBOL(atomic64_cmpxchg);
 
-long long atomic64_xchg(atomic64_t *v, long long new)
+s64 atomic64_xchg(atomic64_t *v, s64 new)
 {
        unsigned long flags;
        raw_spinlock_t *lock = lock_addr(v);
-       long long val;
+       s64 val;
 
        raw_spin_lock_irqsave(lock, flags);
        val = v->counter;
@@ -174,11 +174,11 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 }
 EXPORT_SYMBOL(atomic64_xchg);
 
-long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
+s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
        unsigned long flags;
        raw_spinlock_t *lock = lock_addr(v);
-       long long val;
+       s64 val;
 
        raw_spin_lock_irqsave(lock, flags);
        val = v->counter;
index cfa0c2f..8378c63 100755 (executable)
@@ -22,7 +22,7 @@ while read header; do
        OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})"
        OLDSUM="${OLDSUM#// }"
 
-       NEWSUM="$(head -n -1 ${LINUXDIR}/include/${header} | sha1sum)"
+       NEWSUM="$(sed '$d' ${LINUXDIR}/include/${header} | sha1sum)"
        NEWSUM="${NEWSUM%% *}"
 
        if [ "${OLDSUM}" != "${NEWSUM}" ]; then
index 068e93c..59f1cc2 100644 (file)
@@ -76,7 +76,7 @@ void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new)
 
        AA_BUG(!orig);
        AA_BUG(!new);
-       lockdep_assert_held_exclusive(&labels_set(orig)->lock);
+       lockdep_assert_held_write(&labels_set(orig)->lock);
 
        tmp = rcu_dereference_protected(orig->proxy->label,
                                        &labels_ns(orig)->lock);
@@ -566,7 +566,7 @@ static bool __label_remove(struct aa_label *label, struct aa_label *new)
 
        AA_BUG(!ls);
        AA_BUG(!label);
-       lockdep_assert_held_exclusive(&ls->lock);
+       lockdep_assert_held_write(&ls->lock);
 
        if (new)
                __aa_proxy_redirect(label, new);
@@ -603,7 +603,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new)
        AA_BUG(!ls);
        AA_BUG(!old);
        AA_BUG(!new);
-       lockdep_assert_held_exclusive(&ls->lock);
+       lockdep_assert_held_write(&ls->lock);
        AA_BUG(new->flags & FLAG_IN_TREE);
 
        if (!label_is_stale(old))
@@ -640,7 +640,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls,
        AA_BUG(!ls);
        AA_BUG(!label);
        AA_BUG(labels_set(label) != ls);
-       lockdep_assert_held_exclusive(&ls->lock);
+       lockdep_assert_held_write(&ls->lock);
        AA_BUG(label->flags & FLAG_IN_TREE);
 
        /* Figure out where to put new node */