2 * bpf_jit_comp.c: BPF JIT compiler
4 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
5 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
12 #include <linux/netdevice.h>
13 #include <linux/filter.h>
14 #include <linux/if_vlan.h>
15 #include <linux/bpf.h>
17 #include <asm/set_memory.h>
18 #include <asm/nospec-branch.h>
21 * Assembly code in arch/x86/net/bpf_jit.S
23 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
24 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
25 extern u8 sk_load_byte_positive_offset[];
26 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
27 extern u8 sk_load_byte_negative_offset[];
29 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
42 #define EMIT(bytes, len) \
43 do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
45 #define EMIT1(b1) EMIT(b1, 1)
46 #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
47 #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
48 #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
50 #define EMIT1_off32(b1, off) \
51 do { EMIT1(b1); EMIT(off, 4); } while (0)
52 #define EMIT2_off32(b1, b2, off) \
53 do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
54 #define EMIT3_off32(b1, b2, b3, off) \
55 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
56 #define EMIT4_off32(b1, b2, b3, b4, off) \
57 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
59 static bool is_imm8(int value)
61 return value <= 127 && value >= -128;
64 static bool is_simm32(s64 value)
66 return value == (s64)(s32)value;
69 static bool is_uimm32(u64 value)
71 return value == (u64)(u32)value;
75 #define EMIT_mov(DST, SRC) \
78 EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
81 static int bpf_size_to_x86_bytes(int bpf_size)
83 if (bpf_size == BPF_W)
85 else if (bpf_size == BPF_H)
87 else if (bpf_size == BPF_B)
89 else if (bpf_size == BPF_DW)
96 * List of x86 cond jumps opcodes (. + s8)
97 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
110 #define CHOOSE_LOAD_FUNC(K, func) \
111 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
113 /* Pick a register outside of BPF range for JIT internal work */
114 #define AUX_REG (MAX_BPF_JIT_REG + 1)
117 * The following table maps BPF registers to x86-64 registers.
119 * x86-64 register R12 is unused, since if used as base address
120 * register in load/store instructions, it always needs an
121 * extra byte of encoding and is callee saved.
123 * R9 caches skb->len - skb->data_len
124 * R10 caches skb->data, and used for blinding (if enabled)
126 static const int reg2hex[] = {
127 [BPF_REG_0] = 0, /* RAX */
128 [BPF_REG_1] = 7, /* RDI */
129 [BPF_REG_2] = 6, /* RSI */
130 [BPF_REG_3] = 2, /* RDX */
131 [BPF_REG_4] = 1, /* RCX */
132 [BPF_REG_5] = 0, /* R8 */
133 [BPF_REG_6] = 3, /* RBX callee saved */
134 [BPF_REG_7] = 5, /* R13 callee saved */
135 [BPF_REG_8] = 6, /* R14 callee saved */
136 [BPF_REG_9] = 7, /* R15 callee saved */
137 [BPF_REG_FP] = 5, /* RBP readonly */
138 [BPF_REG_AX] = 2, /* R10 temp register */
139 [AUX_REG] = 3, /* R11 temp register */
143 * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
144 * which need extra byte of encoding.
145 * rax,rcx,...,rbp have simpler encoding
147 static bool is_ereg(u32 reg)
149 return (1 << reg) & (BIT(BPF_REG_5) |
157 static bool is_axreg(u32 reg)
159 return reg == BPF_REG_0;
162 /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
163 static u8 add_1mod(u8 byte, u32 reg)
170 static u8 add_2mod(u8 byte, u32 r1, u32 r2)
179 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
180 static u8 add_1reg(u8 byte, u32 dst_reg)
182 return byte + reg2hex[dst_reg];
185 /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
186 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
188 return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
191 static void jit_fill_hole(void *area, unsigned int size)
193 /* Fill whole space with INT3 instructions */
194 memset(area, 0xcc, size);
198 int cleanup_addr; /* Epilogue code offset */
203 /* Maximum number of bytes emitted while JITing one eBPF insn */
204 #define BPF_MAX_INSN_SIZE 128
205 #define BPF_INSN_SAFETY 64
207 #define AUX_STACK_SPACE \
208 (32 /* Space for RBX, R13, R14, R15 */ + \
209 8 /* Space for skb_copy_bits() buffer */)
211 #define PROLOGUE_SIZE 37
214 * Emit x86-64 prologue code for BPF program and check its size.
215 * bpf_tail_call helper will skip it while jumping into another program
217 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
226 EMIT3(0x48, 0x89, 0xE5);
228 /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
229 EMIT3_off32(0x48, 0x81, 0xEC,
230 round_up(stack_depth, 8) + AUX_STACK_SPACE);
232 /* sub rbp, AUX_STACK_SPACE */
233 EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
235 /* All classic BPF filters use R6(rbx) save it */
237 /* mov qword ptr [rbp+0],rbx */
238 EMIT4(0x48, 0x89, 0x5D, 0);
241 * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
242 * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
243 * R8(R14). R9(R15) spill could be made conditional, but there is only
244 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
245 * The overhead of extra spill is negligible for any filter other
246 * than synthetic ones. Therefore not worth adding complexity.
249 /* mov qword ptr [rbp+8],r13 */
250 EMIT4(0x4C, 0x89, 0x6D, 8);
251 /* mov qword ptr [rbp+16],r14 */
252 EMIT4(0x4C, 0x89, 0x75, 16);
253 /* mov qword ptr [rbp+24],r15 */
254 EMIT4(0x4C, 0x89, 0x7D, 24);
256 if (!ebpf_from_cbpf) {
258 * Clear the tail call counter (tail_call_cnt): for eBPF tail
259 * calls we need to reset the counter to 0. It's done in two
260 * instructions, resetting RAX register to 0, and moving it
261 * to the counter location.
266 /* mov qword ptr [rbp+32], rax */
267 EMIT4(0x48, 0x89, 0x45, 32);
269 BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
276 * Generate the following code:
278 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
279 * if (index >= array->map.max_entries)
281 * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
283 * prog = array->ptrs[index];
286 * goto *(prog->bpf_func + prologue_size);
289 static void emit_bpf_tail_call(u8 **pprog)
292 int label1, label2, label3;
296 * rdi - pointer to ctx
297 * rsi - pointer to bpf_array
298 * rdx - index in bpf_array
302 * if (index >= array->map.max_entries)
305 EMIT2(0x89, 0xD2); /* mov edx, edx */
306 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
307 offsetof(struct bpf_array, map.max_entries));
308 #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
309 EMIT2(X86_JBE, OFFSET1); /* jbe out */
313 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
316 EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
317 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
318 #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
319 EMIT2(X86_JA, OFFSET2); /* ja out */
321 EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
322 EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
324 /* prog = array->ptrs[index]; */
325 EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
326 offsetof(struct bpf_array, ptrs));
332 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
333 #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
334 EMIT2(X86_JE, OFFSET3); /* je out */
337 /* goto *(prog->bpf_func + prologue_size); */
338 EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */
339 offsetof(struct bpf_prog, bpf_func));
340 EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
343 * Wow we're ready to jump into next BPF program
344 * rdi == ctx (1st arg)
345 * rax == prog->bpf_func + prologue_size
347 RETPOLINE_RAX_BPF_JIT();
350 BUILD_BUG_ON(cnt - label1 != OFFSET1);
351 BUILD_BUG_ON(cnt - label2 != OFFSET2);
352 BUILD_BUG_ON(cnt - label3 != OFFSET3);
357 static void emit_load_skb_data_hlen(u8 **pprog)
363 * r9d = skb->len - skb->data_len (headlen)
366 /* mov %r9d, off32(%rdi) */
367 EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
369 /* sub %r9d, off32(%rdi) */
370 EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
372 /* mov %r10, off32(%rdi) */
373 EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
377 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
378 u32 dst_reg, const u32 imm32)
385 * Optimization: if imm32 is positive, use 'mov %eax, imm32'
386 * (which zero-extends imm32) to save 2 bytes.
388 if (sign_propagate && (s32)imm32 < 0) {
389 /* 'mov %rax, imm32' sign extends imm32 */
390 b1 = add_1mod(0x48, dst_reg);
393 EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
398 * Optimization: if imm32 is zero, use 'xor %eax, %eax'
402 if (is_ereg(dst_reg))
403 EMIT1(add_2mod(0x40, dst_reg, dst_reg));
406 EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
410 /* mov %eax, imm32 */
411 if (is_ereg(dst_reg))
412 EMIT1(add_1mod(0x40, dst_reg));
413 EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
418 static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
419 const u32 imm32_hi, const u32 imm32_lo)
424 if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
426 * For emitting plain u32, where sign bit must not be
427 * propagated LLVM tends to load imm64 over mov32
428 * directly, so save couple of bytes by just doing
429 * 'mov %eax, imm32' instead.
431 emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
433 /* movabsq %rax, imm64 */
434 EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
442 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
449 EMIT_mov(dst_reg, src_reg);
452 if (is_ereg(dst_reg) || is_ereg(src_reg))
453 EMIT1(add_2mod(0x40, dst_reg, src_reg));
454 EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
460 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
461 int oldproglen, struct jit_context *ctx)
463 struct bpf_insn *insn = bpf_prog->insnsi;
464 int insn_cnt = bpf_prog->len;
465 bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
466 bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
467 bool seen_exit = false;
468 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
473 emit_prologue(&prog, bpf_prog->aux->stack_depth,
474 bpf_prog_was_classic(bpf_prog));
477 emit_load_skb_data_hlen(&prog);
479 for (i = 0; i < insn_cnt; i++, insn++) {
480 const s32 imm32 = insn->imm;
481 u32 dst_reg = insn->dst_reg;
482 u32 src_reg = insn->src_reg;
486 bool reload_skb_data;
490 if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
491 ctx->seen_ax_reg = seen_ax_reg = true;
493 switch (insn->code) {
495 case BPF_ALU | BPF_ADD | BPF_X:
496 case BPF_ALU | BPF_SUB | BPF_X:
497 case BPF_ALU | BPF_AND | BPF_X:
498 case BPF_ALU | BPF_OR | BPF_X:
499 case BPF_ALU | BPF_XOR | BPF_X:
500 case BPF_ALU64 | BPF_ADD | BPF_X:
501 case BPF_ALU64 | BPF_SUB | BPF_X:
502 case BPF_ALU64 | BPF_AND | BPF_X:
503 case BPF_ALU64 | BPF_OR | BPF_X:
504 case BPF_ALU64 | BPF_XOR | BPF_X:
505 switch (BPF_OP(insn->code)) {
506 case BPF_ADD: b2 = 0x01; break;
507 case BPF_SUB: b2 = 0x29; break;
508 case BPF_AND: b2 = 0x21; break;
509 case BPF_OR: b2 = 0x09; break;
510 case BPF_XOR: b2 = 0x31; break;
512 if (BPF_CLASS(insn->code) == BPF_ALU64)
513 EMIT1(add_2mod(0x48, dst_reg, src_reg));
514 else if (is_ereg(dst_reg) || is_ereg(src_reg))
515 EMIT1(add_2mod(0x40, dst_reg, src_reg));
516 EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
519 case BPF_ALU64 | BPF_MOV | BPF_X:
520 case BPF_ALU | BPF_MOV | BPF_X:
522 BPF_CLASS(insn->code) == BPF_ALU64,
527 case BPF_ALU | BPF_NEG:
528 case BPF_ALU64 | BPF_NEG:
529 if (BPF_CLASS(insn->code) == BPF_ALU64)
530 EMIT1(add_1mod(0x48, dst_reg));
531 else if (is_ereg(dst_reg))
532 EMIT1(add_1mod(0x40, dst_reg));
533 EMIT2(0xF7, add_1reg(0xD8, dst_reg));
536 case BPF_ALU | BPF_ADD | BPF_K:
537 case BPF_ALU | BPF_SUB | BPF_K:
538 case BPF_ALU | BPF_AND | BPF_K:
539 case BPF_ALU | BPF_OR | BPF_K:
540 case BPF_ALU | BPF_XOR | BPF_K:
541 case BPF_ALU64 | BPF_ADD | BPF_K:
542 case BPF_ALU64 | BPF_SUB | BPF_K:
543 case BPF_ALU64 | BPF_AND | BPF_K:
544 case BPF_ALU64 | BPF_OR | BPF_K:
545 case BPF_ALU64 | BPF_XOR | BPF_K:
546 if (BPF_CLASS(insn->code) == BPF_ALU64)
547 EMIT1(add_1mod(0x48, dst_reg));
548 else if (is_ereg(dst_reg))
549 EMIT1(add_1mod(0x40, dst_reg));
552 * b3 holds 'normal' opcode, b2 short form only valid
553 * in case dst is eax/rax.
555 switch (BPF_OP(insn->code)) {
579 EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
580 else if (is_axreg(dst_reg))
581 EMIT1_off32(b2, imm32);
583 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
586 case BPF_ALU64 | BPF_MOV | BPF_K:
587 case BPF_ALU | BPF_MOV | BPF_K:
588 emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
592 case BPF_LD | BPF_IMM | BPF_DW:
593 emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
598 /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
599 case BPF_ALU | BPF_MOD | BPF_X:
600 case BPF_ALU | BPF_DIV | BPF_X:
601 case BPF_ALU | BPF_MOD | BPF_K:
602 case BPF_ALU | BPF_DIV | BPF_K:
603 case BPF_ALU64 | BPF_MOD | BPF_X:
604 case BPF_ALU64 | BPF_DIV | BPF_X:
605 case BPF_ALU64 | BPF_MOD | BPF_K:
606 case BPF_ALU64 | BPF_DIV | BPF_K:
607 EMIT1(0x50); /* push rax */
608 EMIT1(0x52); /* push rdx */
610 if (BPF_SRC(insn->code) == BPF_X)
611 /* mov r11, src_reg */
612 EMIT_mov(AUX_REG, src_reg);
615 EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
617 /* mov rax, dst_reg */
618 EMIT_mov(BPF_REG_0, dst_reg);
622 * equivalent to 'xor rdx, rdx', but one byte less
626 if (BPF_CLASS(insn->code) == BPF_ALU64)
628 EMIT3(0x49, 0xF7, 0xF3);
631 EMIT3(0x41, 0xF7, 0xF3);
633 if (BPF_OP(insn->code) == BPF_MOD)
635 EMIT3(0x49, 0x89, 0xD3);
638 EMIT3(0x49, 0x89, 0xC3);
640 EMIT1(0x5A); /* pop rdx */
641 EMIT1(0x58); /* pop rax */
643 /* mov dst_reg, r11 */
644 EMIT_mov(dst_reg, AUX_REG);
647 case BPF_ALU | BPF_MUL | BPF_K:
648 case BPF_ALU | BPF_MUL | BPF_X:
649 case BPF_ALU64 | BPF_MUL | BPF_K:
650 case BPF_ALU64 | BPF_MUL | BPF_X:
652 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
654 if (dst_reg != BPF_REG_0)
655 EMIT1(0x50); /* push rax */
656 if (dst_reg != BPF_REG_3)
657 EMIT1(0x52); /* push rdx */
659 /* mov r11, dst_reg */
660 EMIT_mov(AUX_REG, dst_reg);
662 if (BPF_SRC(insn->code) == BPF_X)
663 emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
665 emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
668 EMIT1(add_1mod(0x48, AUX_REG));
669 else if (is_ereg(AUX_REG))
670 EMIT1(add_1mod(0x40, AUX_REG));
672 EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
674 if (dst_reg != BPF_REG_3)
675 EMIT1(0x5A); /* pop rdx */
676 if (dst_reg != BPF_REG_0) {
677 /* mov dst_reg, rax */
678 EMIT_mov(dst_reg, BPF_REG_0);
679 EMIT1(0x58); /* pop rax */
684 case BPF_ALU | BPF_LSH | BPF_K:
685 case BPF_ALU | BPF_RSH | BPF_K:
686 case BPF_ALU | BPF_ARSH | BPF_K:
687 case BPF_ALU64 | BPF_LSH | BPF_K:
688 case BPF_ALU64 | BPF_RSH | BPF_K:
689 case BPF_ALU64 | BPF_ARSH | BPF_K:
690 if (BPF_CLASS(insn->code) == BPF_ALU64)
691 EMIT1(add_1mod(0x48, dst_reg));
692 else if (is_ereg(dst_reg))
693 EMIT1(add_1mod(0x40, dst_reg));
695 switch (BPF_OP(insn->code)) {
696 case BPF_LSH: b3 = 0xE0; break;
697 case BPF_RSH: b3 = 0xE8; break;
698 case BPF_ARSH: b3 = 0xF8; break;
702 EMIT2(0xD1, add_1reg(b3, dst_reg));
704 EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
707 case BPF_ALU | BPF_LSH | BPF_X:
708 case BPF_ALU | BPF_RSH | BPF_X:
709 case BPF_ALU | BPF_ARSH | BPF_X:
710 case BPF_ALU64 | BPF_LSH | BPF_X:
711 case BPF_ALU64 | BPF_RSH | BPF_X:
712 case BPF_ALU64 | BPF_ARSH | BPF_X:
714 /* Check for bad case when dst_reg == rcx */
715 if (dst_reg == BPF_REG_4) {
716 /* mov r11, dst_reg */
717 EMIT_mov(AUX_REG, dst_reg);
721 if (src_reg != BPF_REG_4) { /* common case */
722 EMIT1(0x51); /* push rcx */
724 /* mov rcx, src_reg */
725 EMIT_mov(BPF_REG_4, src_reg);
728 /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
729 if (BPF_CLASS(insn->code) == BPF_ALU64)
730 EMIT1(add_1mod(0x48, dst_reg));
731 else if (is_ereg(dst_reg))
732 EMIT1(add_1mod(0x40, dst_reg));
734 switch (BPF_OP(insn->code)) {
735 case BPF_LSH: b3 = 0xE0; break;
736 case BPF_RSH: b3 = 0xE8; break;
737 case BPF_ARSH: b3 = 0xF8; break;
739 EMIT2(0xD3, add_1reg(b3, dst_reg));
741 if (src_reg != BPF_REG_4)
742 EMIT1(0x59); /* pop rcx */
744 if (insn->dst_reg == BPF_REG_4)
745 /* mov dst_reg, r11 */
746 EMIT_mov(insn->dst_reg, AUX_REG);
749 case BPF_ALU | BPF_END | BPF_FROM_BE:
752 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
754 if (is_ereg(dst_reg))
756 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
758 /* Emit 'movzwl eax, ax' */
759 if (is_ereg(dst_reg))
760 EMIT3(0x45, 0x0F, 0xB7);
763 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
766 /* Emit 'bswap eax' to swap lower 4 bytes */
767 if (is_ereg(dst_reg))
771 EMIT1(add_1reg(0xC8, dst_reg));
774 /* Emit 'bswap rax' to swap 8 bytes */
775 EMIT3(add_1mod(0x48, dst_reg), 0x0F,
776 add_1reg(0xC8, dst_reg));
781 case BPF_ALU | BPF_END | BPF_FROM_LE:
785 * Emit 'movzwl eax, ax' to zero extend 16-bit
788 if (is_ereg(dst_reg))
789 EMIT3(0x45, 0x0F, 0xB7);
792 EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
795 /* Emit 'mov eax, eax' to clear upper 32-bits */
796 if (is_ereg(dst_reg))
798 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
806 /* ST: *(u8*)(dst_reg + off) = imm */
807 case BPF_ST | BPF_MEM | BPF_B:
808 if (is_ereg(dst_reg))
813 case BPF_ST | BPF_MEM | BPF_H:
814 if (is_ereg(dst_reg))
815 EMIT3(0x66, 0x41, 0xC7);
819 case BPF_ST | BPF_MEM | BPF_W:
820 if (is_ereg(dst_reg))
825 case BPF_ST | BPF_MEM | BPF_DW:
826 EMIT2(add_1mod(0x48, dst_reg), 0xC7);
828 st: if (is_imm8(insn->off))
829 EMIT2(add_1reg(0x40, dst_reg), insn->off);
831 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
833 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
836 /* STX: *(u8*)(dst_reg + off) = src_reg */
837 case BPF_STX | BPF_MEM | BPF_B:
838 /* Emit 'mov byte ptr [rax + off], al' */
839 if (is_ereg(dst_reg) || is_ereg(src_reg) ||
840 /* We have to add extra byte for x86 SIL, DIL regs */
841 src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
842 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
846 case BPF_STX | BPF_MEM | BPF_H:
847 if (is_ereg(dst_reg) || is_ereg(src_reg))
848 EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
852 case BPF_STX | BPF_MEM | BPF_W:
853 if (is_ereg(dst_reg) || is_ereg(src_reg))
854 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
858 case BPF_STX | BPF_MEM | BPF_DW:
859 EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
860 stx: if (is_imm8(insn->off))
861 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
863 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
867 /* LDX: dst_reg = *(u8*)(src_reg + off) */
868 case BPF_LDX | BPF_MEM | BPF_B:
869 /* Emit 'movzx rax, byte ptr [rax + off]' */
870 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
872 case BPF_LDX | BPF_MEM | BPF_H:
873 /* Emit 'movzx rax, word ptr [rax + off]' */
874 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
876 case BPF_LDX | BPF_MEM | BPF_W:
877 /* Emit 'mov eax, dword ptr [rax+0x14]' */
878 if (is_ereg(dst_reg) || is_ereg(src_reg))
879 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
883 case BPF_LDX | BPF_MEM | BPF_DW:
884 /* Emit 'mov rax, qword ptr [rax+0x14]' */
885 EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
887 * If insn->off == 0 we can save one extra byte, but
888 * special case of x86 R13 which always needs an offset
889 * is not worth the hassle
891 if (is_imm8(insn->off))
892 EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
894 EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
898 /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
899 case BPF_STX | BPF_XADD | BPF_W:
900 /* Emit 'lock add dword ptr [rax + off], eax' */
901 if (is_ereg(dst_reg) || is_ereg(src_reg))
902 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
906 case BPF_STX | BPF_XADD | BPF_DW:
907 EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
908 xadd: if (is_imm8(insn->off))
909 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
911 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
916 case BPF_JMP | BPF_CALL:
917 func = (u8 *) __bpf_call_base + imm32;
918 jmp_offset = func - (image + addrs[i]);
920 reload_skb_data = bpf_helper_changes_pkt_data(func);
921 if (reload_skb_data) {
922 EMIT1(0x57); /* push %rdi */
923 jmp_offset += 22; /* pop, mov, sub, mov */
925 EMIT2(0x41, 0x52); /* push %r10 */
926 EMIT2(0x41, 0x51); /* push %r9 */
928 * We need to adjust jmp offset, since
929 * pop %r9, pop %r10 take 4 bytes after call insn
934 if (!imm32 || !is_simm32(jmp_offset)) {
935 pr_err("unsupported BPF func %d addr %p image %p\n",
939 EMIT1_off32(0xE8, jmp_offset);
941 if (reload_skb_data) {
942 EMIT1(0x5F); /* pop %rdi */
943 emit_load_skb_data_hlen(&prog);
945 EMIT2(0x41, 0x59); /* pop %r9 */
946 EMIT2(0x41, 0x5A); /* pop %r10 */
951 case BPF_JMP | BPF_TAIL_CALL:
952 emit_bpf_tail_call(&prog);
956 case BPF_JMP | BPF_JEQ | BPF_X:
957 case BPF_JMP | BPF_JNE | BPF_X:
958 case BPF_JMP | BPF_JGT | BPF_X:
959 case BPF_JMP | BPF_JLT | BPF_X:
960 case BPF_JMP | BPF_JGE | BPF_X:
961 case BPF_JMP | BPF_JLE | BPF_X:
962 case BPF_JMP | BPF_JSGT | BPF_X:
963 case BPF_JMP | BPF_JSLT | BPF_X:
964 case BPF_JMP | BPF_JSGE | BPF_X:
965 case BPF_JMP | BPF_JSLE | BPF_X:
966 /* cmp dst_reg, src_reg */
967 EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
968 add_2reg(0xC0, dst_reg, src_reg));
971 case BPF_JMP | BPF_JSET | BPF_X:
972 /* test dst_reg, src_reg */
973 EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
974 add_2reg(0xC0, dst_reg, src_reg));
977 case BPF_JMP | BPF_JSET | BPF_K:
978 /* test dst_reg, imm32 */
979 EMIT1(add_1mod(0x48, dst_reg));
980 EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
983 case BPF_JMP | BPF_JEQ | BPF_K:
984 case BPF_JMP | BPF_JNE | BPF_K:
985 case BPF_JMP | BPF_JGT | BPF_K:
986 case BPF_JMP | BPF_JLT | BPF_K:
987 case BPF_JMP | BPF_JGE | BPF_K:
988 case BPF_JMP | BPF_JLE | BPF_K:
989 case BPF_JMP | BPF_JSGT | BPF_K:
990 case BPF_JMP | BPF_JSLT | BPF_K:
991 case BPF_JMP | BPF_JSGE | BPF_K:
992 case BPF_JMP | BPF_JSLE | BPF_K:
993 /* cmp dst_reg, imm8/32 */
994 EMIT1(add_1mod(0x48, dst_reg));
997 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
999 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
1001 emit_cond_jmp: /* Convert BPF opcode to x86 */
1002 switch (BPF_OP(insn->code)) {
1011 /* GT is unsigned '>', JA in x86 */
1015 /* LT is unsigned '<', JB in x86 */
1019 /* GE is unsigned '>=', JAE in x86 */
1023 /* LE is unsigned '<=', JBE in x86 */
1027 /* Signed '>', GT in x86 */
1031 /* Signed '<', LT in x86 */
1035 /* Signed '>=', GE in x86 */
1039 /* Signed '<=', LE in x86 */
1042 default: /* to silence GCC warning */
1045 jmp_offset = addrs[i + insn->off] - addrs[i];
1046 if (is_imm8(jmp_offset)) {
1047 EMIT2(jmp_cond, jmp_offset);
1048 } else if (is_simm32(jmp_offset)) {
1049 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
1051 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
1057 case BPF_JMP | BPF_JA:
1058 jmp_offset = addrs[i + insn->off] - addrs[i];
1060 /* Optimize out nop jumps */
1063 if (is_imm8(jmp_offset)) {
1064 EMIT2(0xEB, jmp_offset);
1065 } else if (is_simm32(jmp_offset)) {
1066 EMIT1_off32(0xE9, jmp_offset);
1068 pr_err("jmp gen bug %llx\n", jmp_offset);
1073 case BPF_LD | BPF_IND | BPF_W:
1074 func = sk_load_word;
1076 case BPF_LD | BPF_ABS | BPF_W:
1077 func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
1079 ctx->seen_ld_abs = seen_ld_abs = true;
1080 jmp_offset = func - (image + addrs[i]);
1081 if (!func || !is_simm32(jmp_offset)) {
1082 pr_err("unsupported BPF func %d addr %p image %p\n",
1083 imm32, func, image);
1086 if (BPF_MODE(insn->code) == BPF_ABS) {
1087 /* mov %esi, imm32 */
1088 EMIT1_off32(0xBE, imm32);
1090 /* mov %rsi, src_reg */
1091 EMIT_mov(BPF_REG_2, src_reg);
1094 /* add %esi, imm8 */
1095 EMIT3(0x83, 0xC6, imm32);
1097 /* add %esi, imm32 */
1098 EMIT2_off32(0x81, 0xC6, imm32);
1102 * skb pointer is in R6 (%rbx), it will be copied into
1103 * %rdi if skb_copy_bits() call is necessary.
1104 * sk_load_* helpers also use %r10 and %r9d.
1108 /* r10 = skb->data, mov %r10, off32(%rbx) */
1109 EMIT3_off32(0x4c, 0x8b, 0x93,
1110 offsetof(struct sk_buff, data));
1111 EMIT1_off32(0xE8, jmp_offset); /* call */
1114 case BPF_LD | BPF_IND | BPF_H:
1115 func = sk_load_half;
1117 case BPF_LD | BPF_ABS | BPF_H:
1118 func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
1120 case BPF_LD | BPF_IND | BPF_B:
1121 func = sk_load_byte;
1123 case BPF_LD | BPF_ABS | BPF_B:
1124 func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
1127 case BPF_JMP | BPF_EXIT:
1129 jmp_offset = ctx->cleanup_addr - addrs[i];
1133 /* Update cleanup_addr */
1134 ctx->cleanup_addr = proglen;
1135 /* mov rbx, qword ptr [rbp+0] */
1136 EMIT4(0x48, 0x8B, 0x5D, 0);
1137 /* mov r13, qword ptr [rbp+8] */
1138 EMIT4(0x4C, 0x8B, 0x6D, 8);
1139 /* mov r14, qword ptr [rbp+16] */
1140 EMIT4(0x4C, 0x8B, 0x75, 16);
1141 /* mov r15, qword ptr [rbp+24] */
1142 EMIT4(0x4C, 0x8B, 0x7D, 24);
1144 /* add rbp, AUX_STACK_SPACE */
1145 EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
1146 EMIT1(0xC9); /* leave */
1147 EMIT1(0xC3); /* ret */
1152 * By design x86-64 JIT should support all BPF instructions.
1153 * This error will be seen if new instruction was added
1154 * to the interpreter, but not to the JIT, or if there is
1157 pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1162 if (ilen > BPF_MAX_INSN_SIZE) {
1163 pr_err("bpf_jit: fatal insn size error\n");
1168 if (unlikely(proglen + ilen > oldproglen)) {
1169 pr_err("bpf_jit: fatal error\n");
1172 memcpy(image + proglen, temp, ilen);
1181 struct x64_jit_data {
1182 struct bpf_binary_header *header;
1186 struct jit_context ctx;
1189 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1191 struct bpf_binary_header *header = NULL;
1192 struct bpf_prog *tmp, *orig_prog = prog;
1193 struct x64_jit_data *jit_data;
1194 int proglen, oldproglen = 0;
1195 struct jit_context ctx = {};
1196 bool tmp_blinded = false;
1197 bool extra_pass = false;
1203 if (!prog->jit_requested)
1206 tmp = bpf_jit_blind_constants(prog);
1208 * If blinding was requested and we failed during blinding,
1209 * we must fall back to the interpreter.
1218 jit_data = prog->aux->jit_data;
1220 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1225 prog->aux->jit_data = jit_data;
1227 addrs = jit_data->addrs;
1229 ctx = jit_data->ctx;
1230 oldproglen = jit_data->proglen;
1231 image = jit_data->image;
1232 header = jit_data->header;
1234 goto skip_init_addrs;
1236 addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
1243 * Before first pass, make a rough estimation of addrs[]
1244 * each BPF instruction is translated to less than 64 bytes
1246 for (proglen = 0, i = 0; i < prog->len; i++) {
1250 ctx.cleanup_addr = proglen;
1254 * JITed image shrinks with every pass and the loop iterates
1255 * until the image stops shrinking. Very large BPF programs
1256 * may converge on the last pass. In such case do one more
1257 * pass to emit the final image.
1259 for (pass = 0; pass < 20 || image; pass++) {
1260 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
1264 bpf_jit_binary_free(header);
1269 if (proglen != oldproglen) {
1270 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
1271 proglen, oldproglen);
1277 if (proglen == oldproglen) {
1278 header = bpf_jit_binary_alloc(proglen, &image,
1285 oldproglen = proglen;
1289 if (bpf_jit_enable > 1)
1290 bpf_jit_dump(prog->len, proglen, pass + 1, image);
1293 if (!prog->is_func || extra_pass) {
1294 bpf_jit_binary_lock_ro(header);
1296 jit_data->addrs = addrs;
1297 jit_data->ctx = ctx;
1298 jit_data->proglen = proglen;
1299 jit_data->image = image;
1300 jit_data->header = header;
1302 prog->bpf_func = (void *)image;
1304 prog->jited_len = proglen;
1309 if (!prog->is_func || extra_pass) {
1313 prog->aux->jit_data = NULL;
1317 bpf_jit_prog_release_other(prog, prog == orig_prog ?