powerpc/string: Implement optimized memset variants
authorNaveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Mon, 27 Mar 2017 19:37:40 +0000 (01:07 +0530)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 17 Aug 2017 13:04:35 +0000 (23:04 +1000)
Based on Matthew Wilcox's patches for other architectures.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/string.h
arch/powerpc/lib/mem_64.S

index da3cdff..b0e8246 100644 (file)
@@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 
+#ifdef CONFIG_PPC64
+#define __HAVE_ARCH_MEMSET16
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+       return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+       return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+       return __memset64(p, v, n * 8);
+}
+#endif
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_POWERPC_STRING_H */
index 85fa986..ec531de 100644 (file)
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
 
+_GLOBAL(__memset16)
+       rlwimi  r4,r4,16,0,15
+       /* fall through */
+
+_GLOBAL(__memset32)
+       rldimi  r4,r4,32,0
+       /* fall through */
+
+_GLOBAL(__memset64)
+       neg     r0,r3
+       andi.   r0,r0,7
+       cmplw   cr1,r5,r0
+       b       .Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+
 _GLOBAL(memset)
        neg     r0,r3
        rlwimi  r4,r4,8,16,23
@@ -20,7 +37,7 @@ _GLOBAL(memset)
        rlwimi  r4,r4,16,0,15
        cmplw   cr1,r5,r0               /* do we get that far? */
        rldimi  r4,r4,32,0
-       PPC_MTOCRF(1,r0)
+.Lms:  PPC_MTOCRF(1,r0)
        mr      r6,r3
        blt     cr1,8f
        beq+    3f                      /* if already 8-byte aligned */