ARC: Checksum/byteorder/swab routines
authorVineet Gupta <vgupta@synopsys.com>
Fri, 18 Jan 2013 09:42:17 +0000 (15:12 +0530)
committerVineet Gupta <vgupta@synopsys.com>
Mon, 11 Feb 2013 14:30:34 +0000 (20:00 +0530)
TBD: do_csum still needs to be written in asm

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
arch/arc/include/asm/byteorder.h [new file with mode: 0644]
arch/arc/include/asm/checksum.h [new file with mode: 0644]
arch/arc/include/asm/swab.h [new file with mode: 0644]

diff --git a/arch/arc/include/asm/byteorder.h b/arch/arc/include/asm/byteorder.h
new file mode 100644 (file)
index 0000000..9da71d4
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARC_BYTEORDER_H
+#define __ASM_ARC_BYTEORDER_H
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#endif /* ASM_ARC_BYTEORDER_H */
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
new file mode 100644 (file)
index 0000000..1095729
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Joern Rennecke  <joern.rennecke@embecosm.com>: Jan 2012
+ *  -Insn Scheduling improvements to csum core routines.
+ *      = csum_fold( ) largely derived from ARM version.
+ *      = ip_fast_cum( ) to have module scheduling
+ *  -gcc 4.4.x broke networking. Alias analysis needed to be primed.
+ *   worked around by adding memory clobber to ip_fast_csum( )
+ *
+ * vineetg: May 2010
+ *  -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
+ */
+
+#ifndef _ASM_ARC_CHECKSUM_H
+#define _ASM_ARC_CHECKSUM_H
+
+/*
+ *     Fold a partial checksum
+ *
+ *  The 2 swords comprising the 32bit sum are added, any carry to 16th bit
+ *  added back and final sword result inverted.
+ */
+static inline __sum16 csum_fold(__wsum s)
+{
+       unsigned r = s << 16 | s >> 16; /* ror */
+       s = ~s;
+       s -= r;
+       return s >> 16;
+}
+
+/*
+ *     This is a version of ip_compute_csum() optimized for IP headers,
+ *     which always checksum on 4 octet boundaries.
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+       const void *ptr = iph;
+       unsigned int tmp, tmp2, sum;
+
+       __asm__(
+       "       ld.ab  %0, [%3, 4]              \n"
+       "       ld.ab  %2, [%3, 4]              \n"
+       "       sub    %1, %4, 2                \n"
+       "       lsr.f  lp_count, %1, 1          \n"
+       "       bcc    0f                       \n"
+       "       add.f  %0, %0, %2               \n"
+       "       ld.ab  %2, [%3, 4]              \n"
+       "0:     lp     1f                       \n"
+       "       ld.ab  %1, [%3, 4]              \n"
+       "       adc.f  %0, %0, %2               \n"
+       "       ld.ab  %2, [%3, 4]              \n"
+       "       adc.f  %0, %0, %1               \n"
+       "1:     adc.f  %0, %0, %2               \n"
+       "       add.cs %0,%0,1                  \n"
+       : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
+       : "r"(ihl)
+       : "cc", "lp_count", "memory");
+
+       return csum_fold(sum);
+}
+
+/*
+ * TCP pseudo Header is 12 bytes:
+ * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+                  unsigned short proto, __wsum sum)
+{
+       __asm__ __volatile__(
+       "       add.f %0, %0, %1        \n"
+       "       adc.f %0, %0, %2        \n"
+       "       adc.f %0, %0, %3        \n"
+       "       adc.f %0, %0, %4        \n"
+       "       adc   %0, %0, 0         \n"
+       : "+&r"(sum)
+       : "r"(saddr), "r"(daddr),
+#ifdef CONFIG_CPU_BIG_ENDIAN
+         "r"(len),
+#else
+         "r"(len << 8),
+#endif
+         "r"(htons(proto))
+       : "cc");
+
+       return sum;
+}
+
+#define csum_fold csum_fold
+#define ip_fast_csum ip_fast_csum
+#define csum_tcpudp_nofold csum_tcpudp_nofold
+
+#include <asm-generic/checksum.h>
+
+#endif /* _ASM_ARC_CHECKSUM_H */
diff --git a/arch/arc/include/asm/swab.h b/arch/arc/include/asm/swab.h
new file mode 100644 (file)
index 0000000..095599a
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -Support single cycle endian-swap insn in ARC700 4.10
+ *
+ * vineetg: June 2009
+ *  -Better htonl implementation (5 instead of 9 ALU instructions)
+ *  -Hardware assisted single cycle bswap (Use Case of ARC custom instrn)
+ */
+
+#ifndef __ASM_ARC_SWAB_H
+#define __ASM_ARC_SWAB_H
+
+#include <linux/types.h>
+
+/* Native single cycle endian swap insn */
+#ifdef CONFIG_ARC_HAS_SWAPE
+
+#define __arch_swab32(x)               \
+({                                     \
+       unsigned int tmp = x;           \
+       __asm__(                        \
+       "       swape   %0, %1  \n"     \
+       : "=r" (tmp)                    \
+       : "r" (tmp));                   \
+       tmp;                            \
+})
+
+#else
+
+/* Several ways of Endian-Swap Emulation for ARC
+ * 0: kernel generic
+ * 1: ARC optimised "C"
+ * 2: ARC Custom instruction
+ */
+#define ARC_BSWAP_TYPE 1
+
+#if (ARC_BSWAP_TYPE == 1)              /******* Software only ********/
+
+/* The kernel default implementation of htonl is
+ *             return  x<<24 | x>>24 |
+ *              (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8;
+ *
+ * This generates 9 instructions on ARC (excluding the ld/st)
+ *
+ * 8051fd8c:   ld     r3,[r7,20]       ; Mem op : Get the value to be swapped
+ * 8051fd98:   asl    r5,r3,24         ; get  3rd Byte
+ * 8051fd9c:   lsr    r2,r3,24         ; get  0th Byte
+ * 8051fda0:   and    r4,r3,0xff00
+ * 8051fda8:   asl    r4,r4,8          ; get 1st Byte
+ * 8051fdac:   and    r3,r3,0x00ff0000
+ * 8051fdb4:   or     r2,r2,r5         ; combine 0th and 3rd Bytes
+ * 8051fdb8:   lsr    r3,r3,8          ; 2nd Byte at correct place in Dst Reg
+ * 8051fdbc:   or     r2,r2,r4         ; combine 0,3 Bytes with 1st Byte
+ * 8051fdc0:   or     r2,r2,r3         ; combine 0,3,1 Bytes with 2nd Byte
+ * 8051fdc4:   st     r2,[r1,20]       ; Mem op : save result back to mem
+ *
+ * Joern suggested a better "C" algorithm which is great since
+ * (1) It is portable to any architecure
+ * (2) At the same time it takes advantage of ARC ISA (rotate intrns)
+ */
+
+#define __arch_swab32(x)                                       \
+({     unsigned long __in = (x), __tmp;                        \
+       __tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */     \
+       __in = __in << 24 | __in >> 8; /* ror in,in,8 */        \
+       __tmp ^= __in;                                          \
+       __tmp &= 0xff00ff;                                      \
+       __tmp ^ __in;                                           \
+})
+
+#elif (ARC_BSWAP_TYPE == 2)    /* Custom single cycle bwap instruction */
+
+#define __arch_swab32(x)                                               \
+({                                                                     \
+       unsigned int tmp = x;                                           \
+       __asm__(                                                        \
+       "       .extInstruction bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP \n"\
+       "       bswap  %0, %1                                           \n"\
+       : "=r" (tmp)                                                    \
+       : "r" (tmp));                                                   \
+       tmp;                                                            \
+})
+
+#endif /* ARC_BSWAP_TYPE=zzz */
+
+#endif /* CONFIG_ARC_HAS_SWAPE */
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#define __SWAB_64_THRU_32__
+#endif
+
+#endif