powerpc: Force inlining of csum_add()
authorChristophe Leroy <christophe.leroy@csgroup.eu>
Tue, 11 May 2021 06:08:06 +0000 (06:08 +0000)
committerMichael Ellerman <mpe@ellerman.id.au>
Tue, 15 Jun 2021 14:16:47 +0000 (00:16 +1000)
commit4423eff71ca6b8f2c5e0fc4cea33d8cdfe3c3740
tree4a52da4c785603ad7618a7f2e601ed1cf3754495
parenta4785e93aa364b2605ed2f4a6abea02761b3eaf7
powerpc: Force inlining of csum_add()

Commit 328e7e487a46 ("powerpc: force inlining of csum_partial() to
avoid multiple csum_partial() with GCC10") inlined csum_partial().

Now that csum_partial() is inlined, GCC outlines csum_add() when
called by csum_partial().

c064fb28 <csum_add>:
c064fb28: 7c 63 20 14  addc    r3,r3,r4
c064fb2c: 7c 63 01 94  addze   r3,r3
c064fb30: 4e 80 00 20  blr

c0665fb8 <csum_add>:
c0665fb8: 7c 63 20 14  addc    r3,r3,r4
c0665fbc: 7c 63 01 94  addze   r3,r3
c0665fc0: 4e 80 00 20  blr

c066719c: 7c 9a c0 2e  lwzx    r4,r26,r24
c06671a0: 38 60 00 00  li      r3,0
c06671a4: 7f 1a c2 14  add     r24,r26,r24
c06671a8: 4b ff ee 11  bl      c0665fb8 <csum_add>
c06671ac: 80 98 00 04  lwz     r4,4(r24)
c06671b0: 4b ff ee 09  bl      c0665fb8 <csum_add>
c06671b4: 80 98 00 08  lwz     r4,8(r24)
c06671b8: 4b ff ee 01  bl      c0665fb8 <csum_add>
c06671bc: a0 98 00 0c  lhz     r4,12(r24)
c06671c0: 4b ff ed f9  bl      c0665fb8 <csum_add>
c06671c4: 7c 63 18 f8  not     r3,r3
c06671c8: 81 3f 00 68  lwz     r9,104(r31)
c06671cc: 81 5f 00 a0  lwz     r10,160(r31)
c06671d0: 7d 29 18 14  addc    r9,r9,r3
c06671d4: 7d 29 01 94  addze   r9,r9
c06671d8: 91 3f 00 68  stw     r9,104(r31)
c06671dc: 7d 1a 50 50  subf    r8,r26,r10
c06671e0: 83 01 00 10  lwz     r24,16(r1)
c06671e4: 83 41 00 18  lwz     r26,24(r1)

The sum with 0 is useless, should have been skipped.
And there is even one completely unused instance of csum_add().

In file included from ./include/net/checksum.h:22,
                 from ./include/linux/skbuff.h:28,
                 from ./include/linux/icmp.h:16,
                 from net/ipv6/ip6_tunnel.c:23:
./arch/powerpc/include/asm/checksum.h: In function '__ip6_tnl_rcv':
./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
   94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
      |                      ^~~~~~~~
./arch/powerpc/include/asm/checksum.h:172:31: note: called from here
  172 |                         sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
      |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
   94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
      |                      ^~~~~~~~
./arch/powerpc/include/asm/checksum.h:177:31: note: called from here
  177 |                         sum = csum_add(sum, (__force __wsum)
      |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  178 |                                             *(const u32 *)(buff + 4));
      |                                             ~~~~~~~~~~~~~~~~~~~~~~~~~
./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
   94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
      |                      ^~~~~~~~
./arch/powerpc/include/asm/checksum.h:183:31: note: called from here
  183 |                         sum = csum_add(sum, (__force __wsum)
      |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  184 |                                             *(const u32 *)(buff + 8));
      |                                             ~~~~~~~~~~~~~~~~~~~~~~~~~
./arch/powerpc/include/asm/checksum.h:94:22: warning: inlining failed in call to 'csum_add': call is unlikely and code size would grow [-Winline]
   94 | static inline __wsum csum_add(__wsum csum, __wsum addend)
      |                      ^~~~~~~~
./arch/powerpc/include/asm/checksum.h:186:31: note: called from here
  186 |                         sum = csum_add(sum, (__force __wsum)
      |                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  187 |                                             *(const u16 *)(buff + 12));
      |                                             ~~~~~~~~~~~~~~~~~~~~~~~~~~

Force inlining of csum_add().

     94c: 80 df 00 a0  lwz     r6,160(r31)
     950: 7d 28 50 2e  lwzx    r9,r8,r10
     954: 7d 48 52 14  add     r10,r8,r10
     958: 80 aa 00 04  lwz     r5,4(r10)
     95c: 80 ff 00 68  lwz     r7,104(r31)
     960: 7d 29 28 14  addc    r9,r9,r5
     964: 7d 29 01 94  addze   r9,r9
     968: 7d 08 30 50  subf    r8,r8,r6
     96c: 80 aa 00 08  lwz     r5,8(r10)
     970: a1 4a 00 0c  lhz     r10,12(r10)
     974: 7d 29 28 14  addc    r9,r9,r5
     978: 7d 29 01 94  addze   r9,r9
     97c: 7d 29 50 14  addc    r9,r9,r10
     980: 7d 29 01 94  addze   r9,r9
     984: 7d 29 48 f8  not     r9,r9
     988: 7c e7 48 14  addc    r7,r7,r9
     98c: 7c e7 01 94  addze   r7,r7
     990: 90 ff 00 68  stw     r7,104(r31)

In the non-inlined version, the first sum with 0 was performed.
Here it is skipped.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/f7f4d4e364de6e473da874468b903da6e5d97adc.1620713272.git.christophe.leroy@csgroup.eu
arch/powerpc/include/asm/checksum.h