arm64: bpf: optimize modulo operation
authorJerin Jacob <jerinj@marvell.com>
Mon, 2 Sep 2019 06:14:48 +0000 (11:44 +0530)
committerDaniel Borkmann <daniel@iogearbox.net>
Tue, 3 Sep 2019 13:44:40 +0000 (15:44 +0200)
Optimize modulo operation instruction generation by
using single MSUB instruction vs MUL followed by SUB
instruction scheme.

Signed-off-by: Jerin Jacob <jerinj@marvell.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
arch/arm64/net/bpf_jit.h
arch/arm64/net/bpf_jit_comp.c

index cb7ab50..eb73f9f 100644 (file)
 /* Rd = Ra + Rn * Rm */
 #define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
        A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
+/* Rd = Ra - Rn * Rm */
+#define A64_MSUB(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
+       A64_VARIANT(sf), AARCH64_INSN_DATA3_MSUB)
 /* Rd = Rn * Rm */
 #define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)
 
index f5b437f..cdc79de 100644 (file)
@@ -409,8 +409,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
                        break;
                case BPF_MOD:
                        emit(A64_UDIV(is64, tmp, dst, src), ctx);
-                       emit(A64_MUL(is64, tmp, tmp, src), ctx);
-                       emit(A64_SUB(is64, dst, dst, tmp), ctx);
+                       emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
                        break;
                }
                break;
@@ -516,8 +515,7 @@ emit_bswap_uxt:
        case BPF_ALU64 | BPF_MOD | BPF_K:
                emit_a64_mov_i(is64, tmp2, imm, ctx);
                emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
-               emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
-               emit(A64_SUB(is64, dst, dst, tmp), ctx);
+               emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
                break;
        case BPF_ALU | BPF_LSH | BPF_K:
        case BPF_ALU64 | BPF_LSH | BPF_K: