riscv: Improve zacas fully-ordered cmpxchg()

author Alexandre Ghiti <alexghiti@rivosinc.com>

Sun, 3 Nov 2024 14:51:46 +0000 (15:51 +0100)

committer Palmer Dabbelt <palmer@rivosinc.com>

Mon, 11 Nov 2024 15:33:13 +0000 (07:33 -0800)
author Alexandre Ghiti <alexghiti@rivosinc.com>
Sun, 3 Nov 2024 14:51:46 +0000 (15:51 +0100)
committer Palmer Dabbelt <palmer@rivosinc.com>
Mon, 11 Nov 2024 15:33:13 +0000 (07:33 -0800)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h

index 1f4cd12..052418a 100644 (file)
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -107,8 +107,10 @@
   * store NEW in MEM.  Return the initial value in MEM.  Success is
   * indicated by comparing RETURN with OLD.
   */
-
-#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n)    \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx,                                 \
+                             sc_prepend, sc_append,                            \
+                             cas_prepend, cas_append,                          \
+                             r, p, o, n)                                       \
  ({                                                                             \
         if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&                               \
             IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&                               \
@@ -117,9 +119,9 @@
                 r = o;                                                          \
                                                                                 \
                 __asm__ __volatile__ (                                          \
-                       prepend                                                 \
+                       cas_prepend                                                     \
                         "       amocas" cas_sfx " %0, %z2, %1\n"                \
-                       append                                                  \
+                       cas_append                                                      \
                         : "+&r" (r), "+A" (*(p))                                \
                         : "rJ" (n)                                              \
                         : "memory");                                            \
@@ -134,7 +136,7 @@
                 ulong __rc;                                                     \
                                                                                 \
                 __asm__ __volatile__ (                                          \
-                       prepend                                                 \
+                       sc_prepend                                                      \
                         "0:     lr.w %0, %2\n"                                  \
                         "       and  %1, %0, %z5\n"                             \
                         "       bne  %1, %z3, 1f\n"                             \
@@ -142,7 +144,7 @@
                         "       or   %1, %1, %z4\n"                             \
                         "       sc.w" sc_sfx " %1, %1, %2\n"                    \
                         "       bnez %1, 0b\n"                                  \
-                       append                                                  \
+                       sc_append                                                       \
                         "1:\n"                                                  \
                         : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))      \
                         : "rJ" ((long)__oldx), "rJ" (__newx),                   \
@@ -153,16 +155,19 @@
         }                                                                       \
  })
  
-#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n)    \
+#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx,                                \
+                      sc_prepend, sc_append,                           \
+                      cas_prepend, cas_append,                         \
+                      r, p, co, o, n)                                  \
  ({                                                                     \
         if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&                       \
             riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {        \
                 r = o;                                                  \
                                                                         \
                 __asm__ __volatile__ (                                  \
-                       prepend                                         \
-                       "       amocas" sc_cas_sfx " %0, %z2, %1\n"     \
-                       append                                          \
+                       cas_prepend                                     \
+                       "       amocas" cas_sfx " %0, %z2, %1\n"        \
+                       cas_append                                      \
                         : "+&r" (r), "+A" (*(p))                        \
                         : "rJ" (n)                                      \
                         : "memory");                                    \
@@ -170,12 +175,12 @@
                 register unsigned int __rc;                             \
                                                                         \
                 __asm__ __volatile__ (                                  \
-                       prepend                                         \
+                       sc_prepend                                      \
                         "0:     lr" lr_sfx " %0, %2\n"                  \
                         "       bne  %0, %z3, 1f\n"                     \
-                       "       sc" sc_cas_sfx " %1, %z4, %2\n"         \
+                       "       sc" sc_sfx " %1, %z4, %2\n"             \
                         "       bnez %1, 0b\n"                          \
-                       append                                          \
+                       sc_append                                       \
                         "1:\n"                                          \
                         : "=&r" (r), "=&r" (__rc), "+A" (*(p))          \
                         : "rJ" (co o), "rJ" (n)                         \
@@ -183,7 +188,9 @@
         }                                                               \
  })
  
-#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append)      \
+#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx,                  \
+                     sc_prepend, sc_append,                            \
+                     cas_prepend, cas_append)                          \
  ({                                                                     \
         __typeof__(ptr) __ptr = (ptr);                                  \
         __typeof__(*(__ptr)) __old = (old);                             \
@@ -192,22 +199,28 @@
                                                                         \
         switch (sizeof(*__ptr)) {                                       \
         case 1:                                                         \
-               __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx,      \
-                                       prepend, append,                \
-                                       __ret, __ptr, __old, __new);    \
+               __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx,             \
+                                     sc_prepend, sc_append,            \
+                                     cas_prepend, cas_append,          \
+                                     __ret, __ptr, __old, __new);      \
                 break;                                                  \
         case 2:                                                         \
-               __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx,      \
-                                       prepend, append,                \
-                                       __ret, __ptr, __old, __new);    \
+               __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx,             \
+                                     sc_prepend, sc_append,            \
+                                     cas_prepend, cas_append,          \
+                                     __ret, __ptr, __old, __new);      \
                 break;                                                  \
         case 4:                                                         \
-               __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append,  \
-                               __ret, __ptr, (long), __old, __new);    \
+               __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx,         \
+                              sc_prepend, sc_append,                   \
+                              cas_prepend, cas_append,                 \
+                              __ret, __ptr, (long), __old, __new);     \
                 break;                                                  \
         case 8:                                                         \
-               __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append,  \
-                               __ret, __ptr, /**/, __old, __new);      \
+               __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx,         \
+                              sc_prepend, sc_append,                   \
+                              cas_prepend, cas_append,                 \
+                              __ret, __ptr, /**/, __old, __new);       \
                 break;                                                  \
         default:                                                        \
                 BUILD_BUG();                                            \
@@ -215,17 +228,40 @@
         (__typeof__(*(__ptr)))__ret;                                    \
  })
  
+/*
+ * These macros are here to improve the readability of the arch_cmpxchg_XXX()
+ * macros.
+ */
+#define SC_SFX(x)      x
+#define CAS_SFX(x)     x
+#define SC_PREPEND(x)  x
+#define SC_APPEND(x)   x
+#define CAS_PREPEND(x) x
+#define CAS_APPEND(x)  x
+
  #define arch_cmpxchg_relaxed(ptr, o, n)                                        \
-       _arch_cmpxchg((ptr), (o), (n), "", "", "")
+       _arch_cmpxchg((ptr), (o), (n),                                  \
+                     SC_SFX(""), CAS_SFX(""),                          \
+                     SC_PREPEND(""), SC_APPEND(""),                    \
+                     CAS_PREPEND(""), CAS_APPEND(""))
  
  #define arch_cmpxchg_acquire(ptr, o, n)                                        \
-       _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
+       _arch_cmpxchg((ptr), (o), (n),                                  \
+                     SC_SFX(""), CAS_SFX(""),                          \
+                     SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
+                     CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
  
  #define arch_cmpxchg_release(ptr, o, n)                                        \
-       _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
+       _arch_cmpxchg((ptr), (o), (n),                                  \
+                     SC_SFX(""), CAS_SFX(""),                          \
+                     SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
+                     CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
  
  #define arch_cmpxchg(ptr, o, n)                                                \
-       _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
+       _arch_cmpxchg((ptr), (o), (n),                                  \
+                     SC_SFX(".rl"), CAS_SFX(".aqrl"),                  \
+                     SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER),    \
+                     CAS_PREPEND(""), CAS_APPEND(""))
  
  #define arch_cmpxchg_local(ptr, o, n)                                  \
         arch_cmpxchg_relaxed((ptr), (o), (n))
author	Alexandre Ghiti <alexghiti@rivosinc.com>
	Sun, 3 Nov 2024 14:51:46 +0000 (15:51 +0100)
committer	Palmer Dabbelt <palmer@rivosinc.com>
	Mon, 11 Nov 2024 15:33:13 +0000 (07:33 -0800)