arch/x86/net/bpf_jit_comp.c

   1 /*
   2  * bpf_jit_comp.c: BPF JIT compiler
   3  *
   4  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
   5  * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation; version 2
  10  * of the License.
  11  */
  12 #include <linux/netdevice.h>
  13 #include <linux/filter.h>
  14 #include <linux/if_vlan.h>
  15 #include <linux/bpf.h>
  16
  17 #include <asm/set_memory.h>
  18 #include <asm/nospec-branch.h>
  19
  20 /*
  21  * Assembly code in arch/x86/net/bpf_jit.S
  22  */
  23 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  24 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
  25 extern u8 sk_load_byte_positive_offset[];
  26 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
  27 extern u8 sk_load_byte_negative_offset[];
  28
  29 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
  30 {
  31         if (len == 1)
  32                 *ptr = bytes;
  33         else if (len == 2)
  34                 *(u16 *)ptr = bytes;
  35         else {
  36                 *(u32 *)ptr = bytes;
  37                 barrier();
  38         }
  39         return ptr + len;
  40 }
  41
  42 #define EMIT(bytes, len) \
  43         do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
  44
  45 #define EMIT1(b1)               EMIT(b1, 1)
  46 #define EMIT2(b1, b2)           EMIT((b1) + ((b2) << 8), 2)
  47 #define EMIT3(b1, b2, b3)       EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
  48 #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
  49
  50 #define EMIT1_off32(b1, off) \
  51         do { EMIT1(b1); EMIT(off, 4); } while (0)
  52 #define EMIT2_off32(b1, b2, off) \
  53         do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
  54 #define EMIT3_off32(b1, b2, b3, off) \
  55         do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
  56 #define EMIT4_off32(b1, b2, b3, b4, off) \
  57         do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
  58
  59 static bool is_imm8(int value)
  60 {
  61         return value <= 127 && value >= -128;
  62 }
  63
  64 static bool is_simm32(s64 value)
  65 {
  66         return value == (s64)(s32)value;
  67 }
  68
  69 static bool is_uimm32(u64 value)
  70 {
  71         return value == (u64)(u32)value;
  72 }
  73
  74 /* mov dst, src */
  75 #define EMIT_mov(DST, SRC)                                                               \
  76         do {                                                                             \
  77                 if (DST != SRC)                                                          \
  78                         EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
  79         } while (0)
  80
  81 static int bpf_size_to_x86_bytes(int bpf_size)
  82 {
  83         if (bpf_size == BPF_W)
  84                 return 4;
  85         else if (bpf_size == BPF_H)
  86                 return 2;
  87         else if (bpf_size == BPF_B)
  88                 return 1;
  89         else if (bpf_size == BPF_DW)
  90                 return 4; /* imm32 */
  91         else
  92                 return 0;
  93 }
  94
  95 /*
  96  * List of x86 cond jumps opcodes (. + s8)
  97  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
  98  */
  99 #define X86_JB  0x72
 100 #define X86_JAE 0x73
 101 #define X86_JE  0x74
 102 #define X86_JNE 0x75
 103 #define X86_JBE 0x76
 104 #define X86_JA  0x77
 105 #define X86_JL  0x7C
 106 #define X86_JGE 0x7D
 107 #define X86_JLE 0x7E
 108 #define X86_JG  0x7F
 109
 110 #define CHOOSE_LOAD_FUNC(K, func) \
 111         ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 112
 113 /* Pick a register outside of BPF range for JIT internal work */
 114 #define AUX_REG (MAX_BPF_JIT_REG + 1)
 115
 116 /*
 117  * The following table maps BPF registers to x86-64 registers.
 118  *
 119  * x86-64 register R12 is unused, since if used as base address
 120  * register in load/store instructions, it always needs an
 121  * extra byte of encoding and is callee saved.
 122  *
 123  * R9  caches skb->len - skb->data_len
 124  * R10 caches skb->data, and used for blinding (if enabled)
 125  */
 126 static const int reg2hex[] = {
 127         [BPF_REG_0] = 0,  /* RAX */
 128         [BPF_REG_1] = 7,  /* RDI */
 129         [BPF_REG_2] = 6,  /* RSI */
 130         [BPF_REG_3] = 2,  /* RDX */
 131         [BPF_REG_4] = 1,  /* RCX */
 132         [BPF_REG_5] = 0,  /* R8  */
 133         [BPF_REG_6] = 3,  /* RBX callee saved */
 134         [BPF_REG_7] = 5,  /* R13 callee saved */
 135         [BPF_REG_8] = 6,  /* R14 callee saved */
 136         [BPF_REG_9] = 7,  /* R15 callee saved */
 137         [BPF_REG_FP] = 5, /* RBP readonly */
 138         [BPF_REG_AX] = 2, /* R10 temp register */
 139         [AUX_REG] = 3,    /* R11 temp register */
 140 };
 141
 142 /*
 143  * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
 144  * which need extra byte of encoding.
 145  * rax,rcx,...,rbp have simpler encoding
 146  */
 147 static bool is_ereg(u32 reg)
 148 {
 149         return (1 << reg) & (BIT(BPF_REG_5) |
 150                              BIT(AUX_REG) |
 151                              BIT(BPF_REG_7) |
 152                              BIT(BPF_REG_8) |
 153                              BIT(BPF_REG_9) |
 154                              BIT(BPF_REG_AX));
 155 }
 156
 157 static bool is_axreg(u32 reg)
 158 {
 159         return reg == BPF_REG_0;
 160 }
 161
 162 /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
 163 static u8 add_1mod(u8 byte, u32 reg)
 164 {
 165         if (is_ereg(reg))
 166                 byte |= 1;
 167         return byte;
 168 }
 169
 170 static u8 add_2mod(u8 byte, u32 r1, u32 r2)
 171 {
 172         if (is_ereg(r1))
 173                 byte |= 1;
 174         if (is_ereg(r2))
 175                 byte |= 4;
 176         return byte;
 177 }
 178
 179 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
 180 static u8 add_1reg(u8 byte, u32 dst_reg)
 181 {
 182         return byte + reg2hex[dst_reg];
 183 }
 184
 185 /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
 186 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 187 {
 188         return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 189 }
 190
 191 static void jit_fill_hole(void *area, unsigned int size)
 192 {
 193         /* Fill whole space with INT3 instructions */
 194         memset(area, 0xcc, size);
 195 }
 196
 197 struct jit_context {
 198         int cleanup_addr; /* Epilogue code offset */
 199         bool seen_ld_abs;
 200         bool seen_ax_reg;
 201 };
 202
 203 /* Maximum number of bytes emitted while JITing one eBPF insn */
 204 #define BPF_MAX_INSN_SIZE       128
 205 #define BPF_INSN_SAFETY         64
 206
 207 #define AUX_STACK_SPACE \
 208         (32 /* Space for RBX, R13, R14, R15 */ + \
 209           8 /* Space for skb_copy_bits() buffer */)
 210
 211 #define PROLOGUE_SIZE 37
 212
 213 /*
 214  * Emit x86-64 prologue code for BPF program and check its size.
 215  * bpf_tail_call helper will skip it while jumping into another program
 216  */
 217 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 218 {
 219         u8 *prog = *pprog;
 220         int cnt = 0;
 221
 222         /* push rbp */
 223         EMIT1(0x55);
 224
 225         /* mov rbp,rsp */
 226         EMIT3(0x48, 0x89, 0xE5);
 227
 228         /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
 229         EMIT3_off32(0x48, 0x81, 0xEC,
 230                     round_up(stack_depth, 8) + AUX_STACK_SPACE);
 231
 232         /* sub rbp, AUX_STACK_SPACE */
 233         EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
 234
 235         /* All classic BPF filters use R6(rbx) save it */
 236
 237         /* mov qword ptr [rbp+0],rbx */
 238         EMIT4(0x48, 0x89, 0x5D, 0);
 239
 240         /*
 241          * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
 242          * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
 243          * R8(R14). R9(R15) spill could be made conditional, but there is only
 244          * one 'bpf_error' return path out of helper functions inside bpf_jit.S
 245          * The overhead of extra spill is negligible for any filter other
 246          * than synthetic ones. Therefore not worth adding complexity.
 247          */
 248
 249         /* mov qword ptr [rbp+8],r13 */
 250         EMIT4(0x4C, 0x89, 0x6D, 8);
 251         /* mov qword ptr [rbp+16],r14 */
 252         EMIT4(0x4C, 0x89, 0x75, 16);
 253         /* mov qword ptr [rbp+24],r15 */
 254         EMIT4(0x4C, 0x89, 0x7D, 24);
 255
 256         if (!ebpf_from_cbpf) {
 257                 /*
 258                  * Clear the tail call counter (tail_call_cnt): for eBPF tail
 259                  * calls we need to reset the counter to 0. It's done in two
 260                  * instructions, resetting RAX register to 0, and moving it
 261                  * to the counter location.
 262                  */
 263
 264                 /* xor eax, eax */
 265                 EMIT2(0x31, 0xc0);
 266                 /* mov qword ptr [rbp+32], rax */
 267                 EMIT4(0x48, 0x89, 0x45, 32);
 268
 269                 BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
 270         }
 271
 272         *pprog = prog;
 273 }
 274
 275 /*
 276  * Generate the following code:
 277  *
 278  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
 279  *   if (index >= array->map.max_entries)
 280  *     goto out;
 281  *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
 282  *     goto out;
 283  *   prog = array->ptrs[index];
 284  *   if (prog == NULL)
 285  *     goto out;
 286  *   goto *(prog->bpf_func + prologue_size);
 287  * out:
 288  */
 289 static void emit_bpf_tail_call(u8 **pprog)
 290 {
 291         u8 *prog = *pprog;
 292         int label1, label2, label3;
 293         int cnt = 0;
 294
 295         /*
 296          * rdi - pointer to ctx
 297          * rsi - pointer to bpf_array
 298          * rdx - index in bpf_array
 299          */
 300
 301         /*
 302          * if (index >= array->map.max_entries)
 303          *      goto out;
 304          */
 305         EMIT2(0x89, 0xD2);                        /* mov edx, edx */
 306         EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
 307               offsetof(struct bpf_array, map.max_entries));
 308 #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
 309         EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 310         label1 = cnt;
 311
 312         /*
 313          * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 314          *      goto out;
 315          */
 316         EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
 317         EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
 318 #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
 319         EMIT2(X86_JA, OFFSET2);                   /* ja out */
 320         label2 = cnt;
 321         EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 322         EMIT2_off32(0x89, 0x85, 36);              /* mov dword ptr [rbp + 36], eax */
 323
 324         /* prog = array->ptrs[index]; */
 325         EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
 326                     offsetof(struct bpf_array, ptrs));
 327
 328         /*
 329          * if (prog == NULL)
 330          *      goto out;
 331          */
 332         EMIT3(0x48, 0x85, 0xC0);                  /* test rax,rax */
 333 #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
 334         EMIT2(X86_JE, OFFSET3);                   /* je out */
 335         label3 = cnt;
 336
 337         /* goto *(prog->bpf_func + prologue_size); */
 338         EMIT4(0x48, 0x8B, 0x40,                   /* mov rax, qword ptr [rax + 32] */
 339               offsetof(struct bpf_prog, bpf_func));
 340         EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE);   /* add rax, prologue_size */
 341
 342         /*
 343          * Wow we're ready to jump into next BPF program
 344          * rdi == ctx (1st arg)
 345          * rax == prog->bpf_func + prologue_size
 346          */
 347         RETPOLINE_RAX_BPF_JIT();
 348
 349         /* out: */
 350         BUILD_BUG_ON(cnt - label1 != OFFSET1);
 351         BUILD_BUG_ON(cnt - label2 != OFFSET2);
 352         BUILD_BUG_ON(cnt - label3 != OFFSET3);
 353         *pprog = prog;
 354 }
 355
 356
 357 static void emit_load_skb_data_hlen(u8 **pprog)
 358 {
 359         u8 *prog = *pprog;
 360         int cnt = 0;
 361
 362         /*
 363          * r9d = skb->len - skb->data_len (headlen)
 364          * r10 = skb->data
 365          */
 366         /* mov %r9d, off32(%rdi) */
 367         EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
 368
 369         /* sub %r9d, off32(%rdi) */
 370         EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
 371
 372         /* mov %r10, off32(%rdi) */
 373         EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
 374         *pprog = prog;
 375 }
 376
 377 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
 378                            u32 dst_reg, const u32 imm32)
 379 {
 380         u8 *prog = *pprog;
 381         u8 b1, b2, b3;
 382         int cnt = 0;
 383
 384         /*
 385          * Optimization: if imm32 is positive, use 'mov %eax, imm32'
 386          * (which zero-extends imm32) to save 2 bytes.
 387          */
 388         if (sign_propagate && (s32)imm32 < 0) {
 389                 /* 'mov %rax, imm32' sign extends imm32 */
 390                 b1 = add_1mod(0x48, dst_reg);
 391                 b2 = 0xC7;
 392                 b3 = 0xC0;
 393                 EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
 394                 goto done;
 395         }
 396
 397         /*
 398          * Optimization: if imm32 is zero, use 'xor %eax, %eax'
 399          * to save 3 bytes.
 400          */
 401         if (imm32 == 0) {
 402                 if (is_ereg(dst_reg))
 403                         EMIT1(add_2mod(0x40, dst_reg, dst_reg));
 404                 b2 = 0x31; /* xor */
 405                 b3 = 0xC0;
 406                 EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
 407                 goto done;
 408         }
 409
 410         /* mov %eax, imm32 */
 411         if (is_ereg(dst_reg))
 412                 EMIT1(add_1mod(0x40, dst_reg));
 413         EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 414 done:
 415         *pprog = prog;
 416 }
 417
 418 static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
 419                            const u32 imm32_hi, const u32 imm32_lo)
 420 {
 421         u8 *prog = *pprog;
 422         int cnt = 0;
 423
 424         if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
 425                 /*
 426                  * For emitting plain u32, where sign bit must not be
 427                  * propagated LLVM tends to load imm64 over mov32
 428                  * directly, so save couple of bytes by just doing
 429                  * 'mov %eax, imm32' instead.
 430                  */
 431                 emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
 432         } else {
 433                 /* movabsq %rax, imm64 */
 434                 EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
 435                 EMIT(imm32_lo, 4);
 436                 EMIT(imm32_hi, 4);
 437         }
 438
 439         *pprog = prog;
 440 }
 441
 442 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 443 {
 444         u8 *prog = *pprog;
 445         int cnt = 0;
 446
 447         if (is64) {
 448                 /* mov dst, src */
 449                 EMIT_mov(dst_reg, src_reg);
 450         } else {
 451                 /* mov32 dst, src */
 452                 if (is_ereg(dst_reg) || is_ereg(src_reg))
 453                         EMIT1(add_2mod(0x40, dst_reg, src_reg));
 454                 EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
 455         }
 456
 457         *pprog = prog;
 458 }
 459
 460 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 461                   int oldproglen, struct jit_context *ctx)
 462 {
 463         struct bpf_insn *insn = bpf_prog->insnsi;
 464         int insn_cnt = bpf_prog->len;
 465         bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
 466         bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
 467         bool seen_exit = false;
 468         u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
 469         int i, cnt = 0;
 470         int proglen = 0;
 471         u8 *prog = temp;
 472
 473         emit_prologue(&prog, bpf_prog->aux->stack_depth,
 474                       bpf_prog_was_classic(bpf_prog));
 475
 476         if (seen_ld_abs)
 477                 emit_load_skb_data_hlen(&prog);
 478
 479         for (i = 0; i < insn_cnt; i++, insn++) {
 480                 const s32 imm32 = insn->imm;
 481                 u32 dst_reg = insn->dst_reg;
 482                 u32 src_reg = insn->src_reg;
 483                 u8 b2 = 0, b3 = 0;
 484                 s64 jmp_offset;
 485                 u8 jmp_cond;
 486                 bool reload_skb_data;
 487                 int ilen;
 488                 u8 *func;
 489
 490                 if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
 491                         ctx->seen_ax_reg = seen_ax_reg = true;
 492
 493                 switch (insn->code) {
 494                         /* ALU */
 495                 case BPF_ALU | BPF_ADD | BPF_X:
 496                 case BPF_ALU | BPF_SUB | BPF_X:
 497                 case BPF_ALU | BPF_AND | BPF_X:
 498                 case BPF_ALU | BPF_OR | BPF_X:
 499                 case BPF_ALU | BPF_XOR | BPF_X:
 500                 case BPF_ALU64 | BPF_ADD | BPF_X:
 501                 case BPF_ALU64 | BPF_SUB | BPF_X:
 502                 case BPF_ALU64 | BPF_AND | BPF_X:
 503                 case BPF_ALU64 | BPF_OR | BPF_X:
 504                 case BPF_ALU64 | BPF_XOR | BPF_X:
 505                         switch (BPF_OP(insn->code)) {
 506                         case BPF_ADD: b2 = 0x01; break;
 507                         case BPF_SUB: b2 = 0x29; break;
 508                         case BPF_AND: b2 = 0x21; break;
 509                         case BPF_OR: b2 = 0x09; break;
 510                         case BPF_XOR: b2 = 0x31; break;
 511                         }
 512                         if (BPF_CLASS(insn->code) == BPF_ALU64)
 513                                 EMIT1(add_2mod(0x48, dst_reg, src_reg));
 514                         else if (is_ereg(dst_reg) || is_ereg(src_reg))
 515                                 EMIT1(add_2mod(0x40, dst_reg, src_reg));
 516                         EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 517                         break;
 518
 519                 case BPF_ALU64 | BPF_MOV | BPF_X:
 520                 case BPF_ALU | BPF_MOV | BPF_X:
 521                         emit_mov_reg(&prog,
 522                                      BPF_CLASS(insn->code) == BPF_ALU64,
 523                                      dst_reg, src_reg);
 524                         break;
 525
 526                         /* neg dst */
 527                 case BPF_ALU | BPF_NEG:
 528                 case BPF_ALU64 | BPF_NEG:
 529                         if (BPF_CLASS(insn->code) == BPF_ALU64)
 530                                 EMIT1(add_1mod(0x48, dst_reg));
 531                         else if (is_ereg(dst_reg))
 532                                 EMIT1(add_1mod(0x40, dst_reg));
 533                         EMIT2(0xF7, add_1reg(0xD8, dst_reg));
 534                         break;
 535
 536                 case BPF_ALU | BPF_ADD | BPF_K:
 537                 case BPF_ALU | BPF_SUB | BPF_K:
 538                 case BPF_ALU | BPF_AND | BPF_K:
 539                 case BPF_ALU | BPF_OR | BPF_K:
 540                 case BPF_ALU | BPF_XOR | BPF_K:
 541                 case BPF_ALU64 | BPF_ADD | BPF_K:
 542                 case BPF_ALU64 | BPF_SUB | BPF_K:
 543                 case BPF_ALU64 | BPF_AND | BPF_K:
 544                 case BPF_ALU64 | BPF_OR | BPF_K:
 545                 case BPF_ALU64 | BPF_XOR | BPF_K:
 546                         if (BPF_CLASS(insn->code) == BPF_ALU64)
 547                                 EMIT1(add_1mod(0x48, dst_reg));
 548                         else if (is_ereg(dst_reg))
 549                                 EMIT1(add_1mod(0x40, dst_reg));
 550
 551                         /*
 552                          * b3 holds 'normal' opcode, b2 short form only valid
 553                          * in case dst is eax/rax.
 554                          */
 555                         switch (BPF_OP(insn->code)) {
 556                         case BPF_ADD:
 557                                 b3 = 0xC0;
 558                                 b2 = 0x05;
 559                                 break;
 560                         case BPF_SUB:
 561                                 b3 = 0xE8;
 562                                 b2 = 0x2D;
 563                                 break;
 564                         case BPF_AND:
 565                                 b3 = 0xE0;
 566                                 b2 = 0x25;
 567                                 break;
 568                         case BPF_OR:
 569                                 b3 = 0xC8;
 570                                 b2 = 0x0D;
 571                                 break;
 572                         case BPF_XOR:
 573                                 b3 = 0xF0;
 574                                 b2 = 0x35;
 575                                 break;
 576                         }
 577
 578                         if (is_imm8(imm32))
 579                                 EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
 580                         else if (is_axreg(dst_reg))
 581                                 EMIT1_off32(b2, imm32);
 582                         else
 583                                 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
 584                         break;
 585
 586                 case BPF_ALU64 | BPF_MOV | BPF_K:
 587                 case BPF_ALU | BPF_MOV | BPF_K:
 588                         emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
 589                                        dst_reg, imm32);
 590                         break;
 591
 592                 case BPF_LD | BPF_IMM | BPF_DW:
 593                         emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
 594                         insn++;
 595                         i++;
 596                         break;
 597
 598                         /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 599                 case BPF_ALU | BPF_MOD | BPF_X:
 600                 case BPF_ALU | BPF_DIV | BPF_X:
 601                 case BPF_ALU | BPF_MOD | BPF_K:
 602                 case BPF_ALU | BPF_DIV | BPF_K:
 603                 case BPF_ALU64 | BPF_MOD | BPF_X:
 604                 case BPF_ALU64 | BPF_DIV | BPF_X:
 605                 case BPF_ALU64 | BPF_MOD | BPF_K:
 606                 case BPF_ALU64 | BPF_DIV | BPF_K:
 607                         EMIT1(0x50); /* push rax */
 608                         EMIT1(0x52); /* push rdx */
 609
 610                         if (BPF_SRC(insn->code) == BPF_X)
 611                                 /* mov r11, src_reg */
 612                                 EMIT_mov(AUX_REG, src_reg);
 613                         else
 614                                 /* mov r11, imm32 */
 615                                 EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
 616
 617                         /* mov rax, dst_reg */
 618                         EMIT_mov(BPF_REG_0, dst_reg);
 619
 620                         /*
 621                          * xor edx, edx
 622                          * equivalent to 'xor rdx, rdx', but one byte less
 623                          */
 624                         EMIT2(0x31, 0xd2);
 625
 626                         if (BPF_CLASS(insn->code) == BPF_ALU64)
 627                                 /* div r11 */
 628                                 EMIT3(0x49, 0xF7, 0xF3);
 629                         else
 630                                 /* div r11d */
 631                                 EMIT3(0x41, 0xF7, 0xF3);
 632
 633                         if (BPF_OP(insn->code) == BPF_MOD)
 634                                 /* mov r11, rdx */
 635                                 EMIT3(0x49, 0x89, 0xD3);
 636                         else
 637                                 /* mov r11, rax */
 638                                 EMIT3(0x49, 0x89, 0xC3);
 639
 640                         EMIT1(0x5A); /* pop rdx */
 641                         EMIT1(0x58); /* pop rax */
 642
 643                         /* mov dst_reg, r11 */
 644                         EMIT_mov(dst_reg, AUX_REG);
 645                         break;
 646
 647                 case BPF_ALU | BPF_MUL | BPF_K:
 648                 case BPF_ALU | BPF_MUL | BPF_X:
 649                 case BPF_ALU64 | BPF_MUL | BPF_K:
 650                 case BPF_ALU64 | BPF_MUL | BPF_X:
 651                 {
 652                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
 653
 654                         if (dst_reg != BPF_REG_0)
 655                                 EMIT1(0x50); /* push rax */
 656                         if (dst_reg != BPF_REG_3)
 657                                 EMIT1(0x52); /* push rdx */
 658
 659                         /* mov r11, dst_reg */
 660                         EMIT_mov(AUX_REG, dst_reg);
 661
 662                         if (BPF_SRC(insn->code) == BPF_X)
 663                                 emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
 664                         else
 665                                 emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 666
 667                         if (is64)
 668                                 EMIT1(add_1mod(0x48, AUX_REG));
 669                         else if (is_ereg(AUX_REG))
 670                                 EMIT1(add_1mod(0x40, AUX_REG));
 671                         /* mul(q) r11 */
 672                         EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 673
 674                         if (dst_reg != BPF_REG_3)
 675                                 EMIT1(0x5A); /* pop rdx */
 676                         if (dst_reg != BPF_REG_0) {
 677                                 /* mov dst_reg, rax */
 678                                 EMIT_mov(dst_reg, BPF_REG_0);
 679                                 EMIT1(0x58); /* pop rax */
 680                         }
 681                         break;
 682                 }
 683                         /* Shifts */
 684                 case BPF_ALU | BPF_LSH | BPF_K:
 685                 case BPF_ALU | BPF_RSH | BPF_K:
 686                 case BPF_ALU | BPF_ARSH | BPF_K:
 687                 case BPF_ALU64 | BPF_LSH | BPF_K:
 688                 case BPF_ALU64 | BPF_RSH | BPF_K:
 689                 case BPF_ALU64 | BPF_ARSH | BPF_K:
 690                         if (BPF_CLASS(insn->code) == BPF_ALU64)
 691                                 EMIT1(add_1mod(0x48, dst_reg));
 692                         else if (is_ereg(dst_reg))
 693                                 EMIT1(add_1mod(0x40, dst_reg));
 694
 695                         switch (BPF_OP(insn->code)) {
 696                         case BPF_LSH: b3 = 0xE0; break;
 697                         case BPF_RSH: b3 = 0xE8; break;
 698                         case BPF_ARSH: b3 = 0xF8; break;
 699                         }
 700
 701                         if (imm32 == 1)
 702                                 EMIT2(0xD1, add_1reg(b3, dst_reg));
 703                         else
 704                                 EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 705                         break;
 706
 707                 case BPF_ALU | BPF_LSH | BPF_X:
 708                 case BPF_ALU | BPF_RSH | BPF_X:
 709                 case BPF_ALU | BPF_ARSH | BPF_X:
 710                 case BPF_ALU64 | BPF_LSH | BPF_X:
 711                 case BPF_ALU64 | BPF_RSH | BPF_X:
 712                 case BPF_ALU64 | BPF_ARSH | BPF_X:
 713
 714                         /* Check for bad case when dst_reg == rcx */
 715                         if (dst_reg == BPF_REG_4) {
 716                                 /* mov r11, dst_reg */
 717                                 EMIT_mov(AUX_REG, dst_reg);
 718                                 dst_reg = AUX_REG;
 719                         }
 720
 721                         if (src_reg != BPF_REG_4) { /* common case */
 722                                 EMIT1(0x51); /* push rcx */
 723
 724                                 /* mov rcx, src_reg */
 725                                 EMIT_mov(BPF_REG_4, src_reg);
 726                         }
 727
 728                         /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
 729                         if (BPF_CLASS(insn->code) == BPF_ALU64)
 730                                 EMIT1(add_1mod(0x48, dst_reg));
 731                         else if (is_ereg(dst_reg))
 732                                 EMIT1(add_1mod(0x40, dst_reg));
 733
 734                         switch (BPF_OP(insn->code)) {
 735                         case BPF_LSH: b3 = 0xE0; break;
 736                         case BPF_RSH: b3 = 0xE8; break;
 737                         case BPF_ARSH: b3 = 0xF8; break;
 738                         }
 739                         EMIT2(0xD3, add_1reg(b3, dst_reg));
 740
 741                         if (src_reg != BPF_REG_4)
 742                                 EMIT1(0x59); /* pop rcx */
 743
 744                         if (insn->dst_reg == BPF_REG_4)
 745                                 /* mov dst_reg, r11 */
 746                                 EMIT_mov(insn->dst_reg, AUX_REG);
 747                         break;
 748
 749                 case BPF_ALU | BPF_END | BPF_FROM_BE:
 750                         switch (imm32) {
 751                         case 16:
 752                                 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
 753                                 EMIT1(0x66);
 754                                 if (is_ereg(dst_reg))
 755                                         EMIT1(0x41);
 756                                 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
 757
 758                                 /* Emit 'movzwl eax, ax' */
 759                                 if (is_ereg(dst_reg))
 760                                         EMIT3(0x45, 0x0F, 0xB7);
 761                                 else
 762                                         EMIT2(0x0F, 0xB7);
 763                                 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
 764                                 break;
 765                         case 32:
 766                                 /* Emit 'bswap eax' to swap lower 4 bytes */
 767                                 if (is_ereg(dst_reg))
 768                                         EMIT2(0x41, 0x0F);
 769                                 else
 770                                         EMIT1(0x0F);
 771                                 EMIT1(add_1reg(0xC8, dst_reg));
 772                                 break;
 773                         case 64:
 774                                 /* Emit 'bswap rax' to swap 8 bytes */
 775                                 EMIT3(add_1mod(0x48, dst_reg), 0x0F,
 776                                       add_1reg(0xC8, dst_reg));
 777                                 break;
 778                         }
 779                         break;
 780
 781                 case BPF_ALU | BPF_END | BPF_FROM_LE:
 782                         switch (imm32) {
 783                         case 16:
 784                                 /*
 785                                  * Emit 'movzwl eax, ax' to zero extend 16-bit
 786                                  * into 64 bit
 787                                  */
 788                                 if (is_ereg(dst_reg))
 789                                         EMIT3(0x45, 0x0F, 0xB7);
 790                                 else
 791                                         EMIT2(0x0F, 0xB7);
 792                                 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
 793                                 break;
 794                         case 32:
 795                                 /* Emit 'mov eax, eax' to clear upper 32-bits */
 796                                 if (is_ereg(dst_reg))
 797                                         EMIT1(0x45);
 798                                 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
 799                                 break;
 800                         case 64:
 801                                 /* nop */
 802                                 break;
 803                         }
 804                         break;
 805
 806                         /* ST: *(u8*)(dst_reg + off) = imm */
 807                 case BPF_ST | BPF_MEM | BPF_B:
 808                         if (is_ereg(dst_reg))
 809                                 EMIT2(0x41, 0xC6);
 810                         else
 811                                 EMIT1(0xC6);
 812                         goto st;
 813                 case BPF_ST | BPF_MEM | BPF_H:
 814                         if (is_ereg(dst_reg))
 815                                 EMIT3(0x66, 0x41, 0xC7);
 816                         else
 817                                 EMIT2(0x66, 0xC7);
 818                         goto st;
 819                 case BPF_ST | BPF_MEM | BPF_W:
 820                         if (is_ereg(dst_reg))
 821                                 EMIT2(0x41, 0xC7);
 822                         else
 823                                 EMIT1(0xC7);
 824                         goto st;
 825                 case BPF_ST | BPF_MEM | BPF_DW:
 826                         EMIT2(add_1mod(0x48, dst_reg), 0xC7);
 827
 828 st:                     if (is_imm8(insn->off))
 829                                 EMIT2(add_1reg(0x40, dst_reg), insn->off);
 830                         else
 831                                 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
 832
 833                         EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
 834                         break;
 835
 836                         /* STX: *(u8*)(dst_reg + off) = src_reg */
 837                 case BPF_STX | BPF_MEM | BPF_B:
 838                         /* Emit 'mov byte ptr [rax + off], al' */
 839                         if (is_ereg(dst_reg) || is_ereg(src_reg) ||
 840                             /* We have to add extra byte for x86 SIL, DIL regs */
 841                             src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
 842                                 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
 843                         else
 844                                 EMIT1(0x88);
 845                         goto stx;
 846                 case BPF_STX | BPF_MEM | BPF_H:
 847                         if (is_ereg(dst_reg) || is_ereg(src_reg))
 848                                 EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
 849                         else
 850                                 EMIT2(0x66, 0x89);
 851                         goto stx;
 852                 case BPF_STX | BPF_MEM | BPF_W:
 853                         if (is_ereg(dst_reg) || is_ereg(src_reg))
 854                                 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
 855                         else
 856                                 EMIT1(0x89);
 857                         goto stx;
 858                 case BPF_STX | BPF_MEM | BPF_DW:
 859                         EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
 860 stx:                    if (is_imm8(insn->off))
 861                                 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 862                         else
 863                                 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 864                                             insn->off);
 865                         break;
 866
 867                         /* LDX: dst_reg = *(u8*)(src_reg + off) */
 868                 case BPF_LDX | BPF_MEM | BPF_B:
 869                         /* Emit 'movzx rax, byte ptr [rax + off]' */
 870                         EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
 871                         goto ldx;
 872                 case BPF_LDX | BPF_MEM | BPF_H:
 873                         /* Emit 'movzx rax, word ptr [rax + off]' */
 874                         EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
 875                         goto ldx;
 876                 case BPF_LDX | BPF_MEM | BPF_W:
 877                         /* Emit 'mov eax, dword ptr [rax+0x14]' */
 878                         if (is_ereg(dst_reg) || is_ereg(src_reg))
 879                                 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
 880                         else
 881                                 EMIT1(0x8B);
 882                         goto ldx;
 883                 case BPF_LDX | BPF_MEM | BPF_DW:
 884                         /* Emit 'mov rax, qword ptr [rax+0x14]' */
 885                         EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 886 ldx:                    /*
 887                          * If insn->off == 0 we can save one extra byte, but
 888                          * special case of x86 R13 which always needs an offset
 889                          * is not worth the hassle
 890                          */
 891                         if (is_imm8(insn->off))
 892                                 EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
 893                         else
 894                                 EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
 895                                             insn->off);
 896                         break;
 897
 898                         /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
 899                 case BPF_STX | BPF_XADD | BPF_W:
 900                         /* Emit 'lock add dword ptr [rax + off], eax' */
 901                         if (is_ereg(dst_reg) || is_ereg(src_reg))
 902                                 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
 903                         else
 904                                 EMIT2(0xF0, 0x01);
 905                         goto xadd;
 906                 case BPF_STX | BPF_XADD | BPF_DW:
 907                         EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
 908 xadd:                   if (is_imm8(insn->off))
 909                                 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 910                         else
 911                                 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 912                                             insn->off);
 913                         break;
 914
 915                         /* call */
 916                 case BPF_JMP | BPF_CALL:
 917                         func = (u8 *) __bpf_call_base + imm32;
 918                         jmp_offset = func - (image + addrs[i]);
 919                         if (seen_ld_abs) {
 920                                 reload_skb_data = bpf_helper_changes_pkt_data(func);
 921                                 if (reload_skb_data) {
 922                                         EMIT1(0x57); /* push %rdi */
 923                                         jmp_offset += 22; /* pop, mov, sub, mov */
 924                                 } else {
 925                                         EMIT2(0x41, 0x52); /* push %r10 */
 926                                         EMIT2(0x41, 0x51); /* push %r9 */
 927                                         /*
 928                                          * We need to adjust jmp offset, since
 929                                          * pop %r9, pop %r10 take 4 bytes after call insn
 930                                          */
 931                                         jmp_offset += 4;
 932                                 }
 933                         }
 934                         if (!imm32 || !is_simm32(jmp_offset)) {
 935                                 pr_err("unsupported BPF func %d addr %p image %p\n",
 936                                        imm32, func, image);
 937                                 return -EINVAL;
 938                         }
 939                         EMIT1_off32(0xE8, jmp_offset);
 940                         if (seen_ld_abs) {
 941                                 if (reload_skb_data) {
 942                                         EMIT1(0x5F); /* pop %rdi */
 943                                         emit_load_skb_data_hlen(&prog);
 944                                 } else {
 945                                         EMIT2(0x41, 0x59); /* pop %r9 */
 946                                         EMIT2(0x41, 0x5A); /* pop %r10 */
 947                                 }
 948                         }
 949                         break;
 950
 951                 case BPF_JMP | BPF_TAIL_CALL:
 952                         emit_bpf_tail_call(&prog);
 953                         break;
 954
 955                         /* cond jump */
 956                 case BPF_JMP | BPF_JEQ | BPF_X:
 957                 case BPF_JMP | BPF_JNE | BPF_X:
 958                 case BPF_JMP | BPF_JGT | BPF_X:
 959                 case BPF_JMP | BPF_JLT | BPF_X:
 960                 case BPF_JMP | BPF_JGE | BPF_X:
 961                 case BPF_JMP | BPF_JLE | BPF_X:
 962                 case BPF_JMP | BPF_JSGT | BPF_X:
 963                 case BPF_JMP | BPF_JSLT | BPF_X:
 964                 case BPF_JMP | BPF_JSGE | BPF_X:
 965                 case BPF_JMP | BPF_JSLE | BPF_X:
 966                         /* cmp dst_reg, src_reg */
 967                         EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
 968                               add_2reg(0xC0, dst_reg, src_reg));
 969                         goto emit_cond_jmp;
 970
 971                 case BPF_JMP | BPF_JSET | BPF_X:
 972                         /* test dst_reg, src_reg */
 973                         EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
 974                               add_2reg(0xC0, dst_reg, src_reg));
 975                         goto emit_cond_jmp;
 976
 977                 case BPF_JMP | BPF_JSET | BPF_K:
 978                         /* test dst_reg, imm32 */
 979                         EMIT1(add_1mod(0x48, dst_reg));
 980                         EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
 981                         goto emit_cond_jmp;
 982
 983                 case BPF_JMP | BPF_JEQ | BPF_K:
 984                 case BPF_JMP | BPF_JNE | BPF_K:
 985                 case BPF_JMP | BPF_JGT | BPF_K:
 986                 case BPF_JMP | BPF_JLT | BPF_K:
 987                 case BPF_JMP | BPF_JGE | BPF_K:
 988                 case BPF_JMP | BPF_JLE | BPF_K:
 989                 case BPF_JMP | BPF_JSGT | BPF_K:
 990                 case BPF_JMP | BPF_JSLT | BPF_K:
 991                 case BPF_JMP | BPF_JSGE | BPF_K:
 992                 case BPF_JMP | BPF_JSLE | BPF_K:
 993                         /* cmp dst_reg, imm8/32 */
 994                         EMIT1(add_1mod(0x48, dst_reg));
 995
 996                         if (is_imm8(imm32))
 997                                 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
 998                         else
 999                                 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
1000
1001 emit_cond_jmp:          /* Convert BPF opcode to x86 */
1002                         switch (BPF_OP(insn->code)) {
1003                         case BPF_JEQ:
1004                                 jmp_cond = X86_JE;
1005                                 break;
1006                         case BPF_JSET:
1007                         case BPF_JNE:
1008                                 jmp_cond = X86_JNE;
1009                                 break;
1010                         case BPF_JGT:
1011                                 /* GT is unsigned '>', JA in x86 */
1012                                 jmp_cond = X86_JA;
1013                                 break;
1014                         case BPF_JLT:
1015                                 /* LT is unsigned '<', JB in x86 */
1016                                 jmp_cond = X86_JB;
1017                                 break;
1018                         case BPF_JGE:
1019                                 /* GE is unsigned '>=', JAE in x86 */
1020                                 jmp_cond = X86_JAE;
1021                                 break;
1022                         case BPF_JLE:
1023                                 /* LE is unsigned '<=', JBE in x86 */
1024                                 jmp_cond = X86_JBE;
1025                                 break;
1026                         case BPF_JSGT:
1027                                 /* Signed '>', GT in x86 */
1028                                 jmp_cond = X86_JG;
1029                                 break;
1030                         case BPF_JSLT:
1031                                 /* Signed '<', LT in x86 */
1032                                 jmp_cond = X86_JL;
1033                                 break;
1034                         case BPF_JSGE:
1035                                 /* Signed '>=', GE in x86 */
1036                                 jmp_cond = X86_JGE;
1037                                 break;
1038                         case BPF_JSLE:
1039                                 /* Signed '<=', LE in x86 */
1040                                 jmp_cond = X86_JLE;
1041                                 break;
1042                         default: /* to silence GCC warning */
1043                                 return -EFAULT;
1044                         }
1045                         jmp_offset = addrs[i + insn->off] - addrs[i];
1046                         if (is_imm8(jmp_offset)) {
1047                                 EMIT2(jmp_cond, jmp_offset);
1048                         } else if (is_simm32(jmp_offset)) {
1049                                 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
1050                         } else {
1051                                 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
1052                                 return -EFAULT;
1053                         }
1054
1055                         break;
1056
1057                 case BPF_JMP | BPF_JA:
1058                         jmp_offset = addrs[i + insn->off] - addrs[i];
1059                         if (!jmp_offset)
1060                                 /* Optimize out nop jumps */
1061                                 break;
1062 emit_jmp:
1063                         if (is_imm8(jmp_offset)) {
1064                                 EMIT2(0xEB, jmp_offset);
1065                         } else if (is_simm32(jmp_offset)) {
1066                                 EMIT1_off32(0xE9, jmp_offset);
1067                         } else {
1068                                 pr_err("jmp gen bug %llx\n", jmp_offset);
1069                                 return -EFAULT;
1070                         }
1071                         break;
1072
1073                 case BPF_LD | BPF_IND | BPF_W:
1074                         func = sk_load_word;
1075                         goto common_load;
1076                 case BPF_LD | BPF_ABS | BPF_W:
1077                         func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
1078 common_load:
1079                         ctx->seen_ld_abs = seen_ld_abs = true;
1080                         jmp_offset = func - (image + addrs[i]);
1081                         if (!func || !is_simm32(jmp_offset)) {
1082                                 pr_err("unsupported BPF func %d addr %p image %p\n",
1083                                        imm32, func, image);
1084                                 return -EINVAL;
1085                         }
1086                         if (BPF_MODE(insn->code) == BPF_ABS) {
1087                                 /* mov %esi, imm32 */
1088                                 EMIT1_off32(0xBE, imm32);
1089                         } else {
1090                                 /* mov %rsi, src_reg */
1091                                 EMIT_mov(BPF_REG_2, src_reg);
1092                                 if (imm32) {
1093                                         if (is_imm8(imm32))
1094                                                 /* add %esi, imm8 */
1095                                                 EMIT3(0x83, 0xC6, imm32);
1096                                         else
1097                                                 /* add %esi, imm32 */
1098                                                 EMIT2_off32(0x81, 0xC6, imm32);
1099                                 }
1100                         }
1101                         /*
1102                          * skb pointer is in R6 (%rbx), it will be copied into
1103                          * %rdi if skb_copy_bits() call is necessary.
1104                          * sk_load_* helpers also use %r10 and %r9d.
1105                          * See bpf_jit.S
1106                          */
1107                         if (seen_ax_reg)
1108                                 /* r10 = skb->data, mov %r10, off32(%rbx) */
1109                                 EMIT3_off32(0x4c, 0x8b, 0x93,
1110                                             offsetof(struct sk_buff, data));
1111                         EMIT1_off32(0xE8, jmp_offset); /* call */
1112                         break;
1113
1114                 case BPF_LD | BPF_IND | BPF_H:
1115                         func = sk_load_half;
1116                         goto common_load;
1117                 case BPF_LD | BPF_ABS | BPF_H:
1118                         func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
1119                         goto common_load;
1120                 case BPF_LD | BPF_IND | BPF_B:
1121                         func = sk_load_byte;
1122                         goto common_load;
1123                 case BPF_LD | BPF_ABS | BPF_B:
1124                         func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
1125                         goto common_load;
1126
1127                 case BPF_JMP | BPF_EXIT:
1128                         if (seen_exit) {
1129                                 jmp_offset = ctx->cleanup_addr - addrs[i];
1130                                 goto emit_jmp;
1131                         }
1132                         seen_exit = true;
1133                         /* Update cleanup_addr */
1134                         ctx->cleanup_addr = proglen;
1135                         /* mov rbx, qword ptr [rbp+0] */
1136                         EMIT4(0x48, 0x8B, 0x5D, 0);
1137                         /* mov r13, qword ptr [rbp+8] */
1138                         EMIT4(0x4C, 0x8B, 0x6D, 8);
1139                         /* mov r14, qword ptr [rbp+16] */
1140                         EMIT4(0x4C, 0x8B, 0x75, 16);
1141                         /* mov r15, qword ptr [rbp+24] */
1142                         EMIT4(0x4C, 0x8B, 0x7D, 24);
1143
1144                         /* add rbp, AUX_STACK_SPACE */
1145                         EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
1146                         EMIT1(0xC9); /* leave */
1147                         EMIT1(0xC3); /* ret */
1148                         break;
1149
1150                 default:
1151                         /*
1152                          * By design x86-64 JIT should support all BPF instructions.
1153                          * This error will be seen if new instruction was added
1154                          * to the interpreter, but not to the JIT, or if there is
1155                          * junk in bpf_prog.
1156                          */
1157                         pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1158                         return -EINVAL;
1159                 }
1160
1161                 ilen = prog - temp;
1162                 if (ilen > BPF_MAX_INSN_SIZE) {
1163                         pr_err("bpf_jit: fatal insn size error\n");
1164                         return -EFAULT;
1165                 }
1166
1167                 if (image) {
1168                         if (unlikely(proglen + ilen > oldproglen)) {
1169                                 pr_err("bpf_jit: fatal error\n");
1170                                 return -EFAULT;
1171                         }
1172                         memcpy(image + proglen, temp, ilen);
1173                 }
1174                 proglen += ilen;
1175                 addrs[i] = proglen;
1176                 prog = temp;
1177         }
1178         return proglen;
1179 }
1180
1181 struct x64_jit_data {
1182         struct bpf_binary_header *header;
1183         int *addrs;
1184         u8 *image;
1185         int proglen;
1186         struct jit_context ctx;
1187 };
1188
1189 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1190 {
1191         struct bpf_binary_header *header = NULL;
1192         struct bpf_prog *tmp, *orig_prog = prog;
1193         struct x64_jit_data *jit_data;
1194         int proglen, oldproglen = 0;
1195         struct jit_context ctx = {};
1196         bool tmp_blinded = false;
1197         bool extra_pass = false;
1198         u8 *image = NULL;
1199         int *addrs;
1200         int pass;
1201         int i;
1202
1203         if (!prog->jit_requested)
1204                 return orig_prog;
1205
1206         tmp = bpf_jit_blind_constants(prog);
1207         /*
1208          * If blinding was requested and we failed during blinding,
1209          * we must fall back to the interpreter.
1210          */
1211         if (IS_ERR(tmp))
1212                 return orig_prog;
1213         if (tmp != prog) {
1214                 tmp_blinded = true;
1215                 prog = tmp;
1216         }
1217
1218         jit_data = prog->aux->jit_data;
1219         if (!jit_data) {
1220                 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1221                 if (!jit_data) {
1222                         prog = orig_prog;
1223                         goto out;
1224                 }
1225                 prog->aux->jit_data = jit_data;
1226         }
1227         addrs = jit_data->addrs;
1228         if (addrs) {
1229                 ctx = jit_data->ctx;
1230                 oldproglen = jit_data->proglen;
1231                 image = jit_data->image;
1232                 header = jit_data->header;
1233                 extra_pass = true;
1234                 goto skip_init_addrs;
1235         }
1236         addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
1237         if (!addrs) {
1238                 prog = orig_prog;
1239                 goto out_addrs;
1240         }
1241
1242         /*
1243          * Before first pass, make a rough estimation of addrs[]
1244          * each BPF instruction is translated to less than 64 bytes
1245          */
1246         for (proglen = 0, i = 0; i < prog->len; i++) {
1247                 proglen += 64;
1248                 addrs[i] = proglen;
1249         }
1250         ctx.cleanup_addr = proglen;
1251 skip_init_addrs:
1252
1253         /*
1254          * JITed image shrinks with every pass and the loop iterates
1255          * until the image stops shrinking. Very large BPF programs
1256          * may converge on the last pass. In such case do one more
1257          * pass to emit the final image.
1258          */
1259         for (pass = 0; pass < 20 || image; pass++) {
1260                 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
1261                 if (proglen <= 0) {
1262                         image = NULL;
1263                         if (header)
1264                                 bpf_jit_binary_free(header);
1265                         prog = orig_prog;
1266                         goto out_addrs;
1267                 }
1268                 if (image) {
1269                         if (proglen != oldproglen) {
1270                                 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
1271                                        proglen, oldproglen);
1272                                 prog = orig_prog;
1273                                 goto out_addrs;
1274                         }
1275                         break;
1276                 }
1277                 if (proglen == oldproglen) {
1278                         header = bpf_jit_binary_alloc(proglen, &image,
1279                                                       1, jit_fill_hole);
1280                         if (!header) {
1281                                 prog = orig_prog;
1282                                 goto out_addrs;
1283                         }
1284                 }
1285                 oldproglen = proglen;
1286                 cond_resched();
1287         }
1288
1289         if (bpf_jit_enable > 1)
1290                 bpf_jit_dump(prog->len, proglen, pass + 1, image);
1291
1292         if (image) {
1293                 if (!prog->is_func || extra_pass) {
1294                         bpf_jit_binary_lock_ro(header);
1295                 } else {
1296                         jit_data->addrs = addrs;
1297                         jit_data->ctx = ctx;
1298                         jit_data->proglen = proglen;
1299                         jit_data->image = image;
1300                         jit_data->header = header;
1301                 }
1302                 prog->bpf_func = (void *)image;
1303                 prog->jited = 1;
1304                 prog->jited_len = proglen;
1305         } else {
1306                 prog = orig_prog;
1307         }
1308
1309         if (!prog->is_func || extra_pass) {
1310 out_addrs:
1311                 kfree(addrs);
1312                 kfree(jit_data);
1313                 prog->aux->jit_data = NULL;
1314         }
1315 out:
1316         if (tmp_blinded)
1317                 bpf_jit_prog_release_other(prog, prog == orig_prog ?
1318                                            tmp : orig_prog);
1319         return prog;
1320 }