csky: atomic: Optimize cmpxchg with acquire & release
authorGuo Ren <guoren@linux.alibaba.com>
Wed, 6 Apr 2022 12:30:13 +0000 (20:30 +0800)
committerGuo Ren <guoren@linux.alibaba.com>
Mon, 25 Apr 2022 05:51:42 +0000 (13:51 +0800)
Optimize cmpxchg with ASM acquire/release fence ASM instructions
instead of previous generic based. Prevent a fence when cmxchg's
first load != old.

Comments by Rutland:

8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for
full barrier semantics")

Comments by Boqun:

FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.

Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
arch/csky/include/asm/barrier.h
arch/csky/include/asm/cmpxchg.h

index f4045dd..15de58b 100644 (file)
  * bar.brar
  * bar.bwaw
  */
+#define FULL_FENCE             ".long 0x842fc000\n"
+#define ACQUIRE_FENCE          ".long 0x8427c000\n"
+#define RELEASE_FENCE          ".long 0x842ec000\n"
+
 #define __bar_brw()    asm volatile (".long 0x842cc000\n":::"memory")
 #define __bar_br()     asm volatile (".long 0x8424c000\n":::"memory")
 #define __bar_bw()     asm volatile (".long 0x8428c000\n":::"memory")
 #define __bar_arw()    asm volatile (".long 0x8423c000\n":::"memory")
 #define __bar_ar()     asm volatile (".long 0x8421c000\n":::"memory")
 #define __bar_aw()     asm volatile (".long 0x8422c000\n":::"memory")
-#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
-#define __bar_brarw()  asm volatile (".long 0x8427c000\n":::"memory")
+#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
+#define __bar_brarw()  asm volatile (ACQUIRE_FENCE:::"memory")
 #define __bar_bwarw()  asm volatile (".long 0x842bc000\n":::"memory")
 #define __bar_brwar()  asm volatile (".long 0x842dc000\n":::"memory")
-#define __bar_brwaw()  asm volatile (".long 0x842ec000\n":::"memory")
+#define __bar_brwaw()  asm volatile (RELEASE_FENCE:::"memory")
 #define __bar_brar()   asm volatile (".long 0x8425c000\n":::"memory")
 #define __bar_brar()   asm volatile (".long 0x8425c000\n":::"memory")
 #define __bar_bwaw()   asm volatile (".long 0x842ac000\n":::"memory")
@@ -56,7 +60,6 @@
 #define __smp_rmb()    __bar_brar()
 #define __smp_wmb()    __bar_bwaw()
 
-#define ACQUIRE_FENCE          ".long 0x8427c000\n"
 #define __smp_acquire_fence()  __bar_brarw()
 #define __smp_release_fence()  __bar_brwaw()
 
index d1bef11..5b8facc 100644 (file)
@@ -64,15 +64,71 @@ extern void __bad_xchg(void);
 #define arch_cmpxchg_relaxed(ptr, o, n) \
        (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 
-#define arch_cmpxchg(ptr, o, n)                                \
+#define __cmpxchg_acquire(ptr, old, new, size)                 \
 ({                                                             \
+       __typeof__(ptr) __ptr = (ptr);                          \
+       __typeof__(new) __new = (new);                          \
+       __typeof__(new) __tmp;                                  \
+       __typeof__(old) __old = (old);                          \
+       __typeof__(*(ptr)) __ret;                               \
+       switch (size) {                                         \
+       case 4:                                                 \
+               asm volatile (                                  \
+               "1:     ldex.w          %0, (%3) \n"            \
+               "       cmpne           %0, %4   \n"            \
+               "       bt              2f       \n"            \
+               "       mov             %1, %2   \n"            \
+               "       stex.w          %1, (%3) \n"            \
+               "       bez             %1, 1b   \n"            \
+               ACQUIRE_FENCE                                   \
+               "2:                              \n"            \
+                       : "=&r" (__ret), "=&r" (__tmp)          \
+                       : "r" (__new), "r"(__ptr), "r"(__old)   \
+                       :);                                     \
+               break;                                          \
+       default:                                                \
+               __bad_xchg();                                   \
+       }                                                       \
+       __ret;                                                  \
+})
+
+#define arch_cmpxchg_acquire(ptr, o, n) \
+       (__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
+
+#define __cmpxchg(ptr, old, new, size)                         \
+({                                                             \
+       __typeof__(ptr) __ptr = (ptr);                          \
+       __typeof__(new) __new = (new);                          \
+       __typeof__(new) __tmp;                                  \
+       __typeof__(old) __old = (old);                          \
        __typeof__(*(ptr)) __ret;                               \
-       __smp_release_fence();                                  \
-       __ret = arch_cmpxchg_relaxed(ptr, o, n);                \
-       __smp_acquire_fence();                                  \
+       switch (size) {                                         \
+       case 4:                                                 \
+               asm volatile (                                  \
+               RELEASE_FENCE                                   \
+               "1:     ldex.w          %0, (%3) \n"            \
+               "       cmpne           %0, %4   \n"            \
+               "       bt              2f       \n"            \
+               "       mov             %1, %2   \n"            \
+               "       stex.w          %1, (%3) \n"            \
+               "       bez             %1, 1b   \n"            \
+               FULL_FENCE                                      \
+               "2:                              \n"            \
+                       : "=&r" (__ret), "=&r" (__tmp)          \
+                       : "r" (__new), "r"(__ptr), "r"(__old)   \
+                       :);                                     \
+               break;                                          \
+       default:                                                \
+               __bad_xchg();                                   \
+       }                                                       \
        __ret;                                                  \
 })
 
+#define arch_cmpxchg(ptr, o, n)                                        \
+       (__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
+
+#define arch_cmpxchg_local(ptr, o, n)                          \
+       (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 #else
 #include <asm-generic/cmpxchg.h>
 #endif