Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

author David S. Miller <davem@davemloft.net>

Wed, 22 Jul 2020 19:34:55 +0000 (12:34 -0700)

committer David S. Miller <davem@davemloft.net>

Wed, 22 Jul 2020 19:35:33 +0000 (12:35 -0700)
author David S. Miller <davem@davemloft.net>
Wed, 22 Jul 2020 19:34:55 +0000 (12:34 -0700)
committer David S. Miller <davem@davemloft.net>
Wed, 22 Jul 2020 19:35:33 +0000 (12:35 -0700)
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h

index 20e235d..75c1e99 100644 (file)
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -13,6 +13,11 @@
  #include <linux/filter.h>
  #include <asm/cacheflush.h>
  
+static inline bool rvc_enabled(void)
+{
+       return IS_ENABLED(CONFIG_RISCV_ISA_C);
+}
+
  enum {
         RV_REG_ZERO =   0,      /* The constant value 0 */
         RV_REG_RA =     1,      /* Return address */
@@ -48,9 +53,21 @@ enum {
         RV_REG_T6 =     31,
  };
  
+static inline bool is_creg(u8 reg)
+{
+       return (1 << reg) & (BIT(RV_REG_FP) |
+                            BIT(RV_REG_S1) |
+                            BIT(RV_REG_A0) |
+                            BIT(RV_REG_A1) |
+                            BIT(RV_REG_A2) |
+                            BIT(RV_REG_A3) |
+                            BIT(RV_REG_A4) |
+                            BIT(RV_REG_A5));
+}
+
  struct rv_jit_context {
         struct bpf_prog *prog;
-       u32 *insns;             /* RV insns */
+       u16 *insns;             /* RV insns */
         int ninsns;
         int epilogue_offset;
         int *offset;            /* BPF to RV */
@@ -58,6 +75,12 @@ struct rv_jit_context {
         int stack_size;
  };
  
+/* Convert from ninsns to bytes. */
+static inline int ninsns_rvoff(int ninsns)
+{
+       return ninsns << 1;
+}
+
  struct rv_jit_data {
         struct bpf_binary_header *header;
         u8 *image;
@@ -74,8 +97,22 @@ static inline void bpf_flush_icache(void *start, void *end)
         flush_icache_range((unsigned long)start, (unsigned long)end);
  }
  
+/* Emit a 4-byte riscv instruction. */
  static inline void emit(const u32 insn, struct rv_jit_context *ctx)
  {
+       if (ctx->insns) {
+               ctx->insns[ctx->ninsns] = insn;
+               ctx->insns[ctx->ninsns + 1] = (insn >> 16);
+       }
+
+       ctx->ninsns += 2;
+}
+
+/* Emit a 2-byte riscv compressed instruction. */
+static inline void emitc(const u16 insn, struct rv_jit_context *ctx)
+{
+       BUILD_BUG_ON(!rvc_enabled());
+
         if (ctx->insns)
                 ctx->insns[ctx->ninsns] = insn;
  
@@ -86,7 +123,7 @@ static inline int epilogue_offset(struct rv_jit_context *ctx)
  {
         int to = ctx->epilogue_offset, from = ctx->ninsns;
  
-       return (to - from) << 2;
+       return ninsns_rvoff(to - from);
  }
  
  /* Return -1 or inverted cond. */
@@ -117,6 +154,36 @@ static inline int invert_bpf_cond(u8 cond)
         return -1;
  }
  
+static inline bool is_6b_int(long val)
+{
+       return -(1L << 5) <= val && val < (1L << 5);
+}
+
+static inline bool is_7b_uint(unsigned long val)
+{
+       return val < (1UL << 7);
+}
+
+static inline bool is_8b_uint(unsigned long val)
+{
+       return val < (1UL << 8);
+}
+
+static inline bool is_9b_uint(unsigned long val)
+{
+       return val < (1UL << 9);
+}
+
+static inline bool is_10b_int(long val)
+{
+       return -(1L << 9) <= val && val < (1L << 9);
+}
+
+static inline bool is_10b_uint(unsigned long val)
+{
+       return val < (1UL << 10);
+}
+
  static inline bool is_12b_int(long val)
  {
         return -(1L << 11) <= val && val < (1L << 11);
@@ -149,7 +216,7 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
         off++; /* BPF branch is from PC+1, RV is from PC */
         from = (insn > 0) ? ctx->offset[insn - 1] : 0;
         to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
-       return (to - from) << 2;
+       return ninsns_rvoff(to - from);
  }
  
  /* Instruction formats. */
@@ -207,6 +274,59 @@ static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
         return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
  }
  
+/* RISC-V compressed instruction formats. */
+
+static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op)
+{
+       return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op)
+{
+       u32 imm;
+
+       imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+       return (funct3 << 13) | (rd << 7) | op | imm;
+}
+
+static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op)
+{
+       return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op)
+{
+       return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd,
+                            u8 op)
+{
+       return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+               (imm_lo << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2,
+                            u8 op)
+{
+       return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+               (imm_lo << 5) | ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op)
+{
+       return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) |
+               ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op)
+{
+       u32 imm;
+
+       imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+       return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm;
+}
+
  /* Instructions shared by both RV32 and RV64. */
  
  static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
@@ -414,6 +534,135 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
         return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
  }
  
+/* RVC instrutions. */
+
+static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
+{
+       u32 imm;
+
+       imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) |
+               ((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3);
+       return rv_ciw_insn(0x0, imm, rd, 0x0);
+}
+
+static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1)
+{
+       u32 imm_hi, imm_lo;
+
+       imm_hi = (imm7 & 0x38) >> 3;
+       imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+       return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2)
+{
+       u32 imm_hi, imm_lo;
+
+       imm_hi = (imm7 & 0x38) >> 3;
+       imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+       return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_addi(u8 rd, u32 imm6)
+{
+       return rv_ci_insn(0, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_li(u8 rd, u32 imm6)
+{
+       return rv_ci_insn(0x2, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_addi16sp(u32 imm10)
+{
+       u32 imm;
+
+       imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) |
+               ((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5);
+       return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1);
+}
+
+static inline u16 rvc_lui(u8 rd, u32 imm6)
+{
+       return rv_ci_insn(0x3, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_srli(u8 rd, u32 imm6)
+{
+       return rv_cb_insn(0x4, imm6, 0, rd, 0x1);
+}
+
+static inline u16 rvc_srai(u8 rd, u32 imm6)
+{
+       return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1);
+}
+
+static inline u16 rvc_andi(u8 rd, u32 imm6)
+{
+       return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1);
+}
+
+static inline u16 rvc_sub(u8 rd, u8 rs)
+{
+       return rv_ca_insn(0x23, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_xor(u8 rd, u8 rs)
+{
+       return rv_ca_insn(0x23, rd, 0x1, rs, 0x1);
+}
+
+static inline u16 rvc_or(u8 rd, u8 rs)
+{
+       return rv_ca_insn(0x23, rd, 0x2, rs, 0x1);
+}
+
+static inline u16 rvc_and(u8 rd, u8 rs)
+{
+       return rv_ca_insn(0x23, rd, 0x3, rs, 0x1);
+}
+
+static inline u16 rvc_slli(u8 rd, u32 imm6)
+{
+       return rv_ci_insn(0, imm6, rd, 0x2);
+}
+
+static inline u16 rvc_lwsp(u8 rd, u32 imm8)
+{
+       u32 imm;
+
+       imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c);
+       return rv_ci_insn(0x2, imm, rd, 0x2);
+}
+
+static inline u16 rvc_jr(u8 rs1)
+{
+       return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_mv(u8 rd, u8 rs)
+{
+       return rv_cr_insn(0x8, rd, rs, 0x2);
+}
+
+static inline u16 rvc_jalr(u8 rs1)
+{
+       return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_add(u8 rd, u8 rs)
+{
+       return rv_cr_insn(0x9, rd, rs, 0x2);
+}
+
+static inline u16 rvc_swsp(u32 imm8, u8 rs2)
+{
+       u32 imm;
+
+       imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6);
+       return rv_css_insn(0x6, imm, rs2, 0x2);
+}
+
  /*
   * RV64-only instructions.
   *
@@ -503,6 +752,234 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
         return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
  }
  
+/* RV64-only RVC instructions. */
+
+static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1)
+{
+       u32 imm_hi, imm_lo;
+
+       imm_hi = (imm8 & 0x38) >> 3;
+       imm_lo = (imm8 & 0xc0) >> 6;
+       return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2)
+{
+       u32 imm_hi, imm_lo;
+
+       imm_hi = (imm8 & 0x38) >> 3;
+       imm_lo = (imm8 & 0xc0) >> 6;
+       return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_subw(u8 rd, u8 rs)
+{
+       return rv_ca_insn(0x27, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_addiw(u8 rd, u32 imm6)
+{
+       return rv_ci_insn(0x1, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_ldsp(u8 rd, u32 imm9)
+{
+       u32 imm;
+
+       imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38);
+       return rv_ci_insn(0x3, imm, rd, 0x2);
+}
+
+static inline u16 rvc_sdsp(u32 imm9, u8 rs2)
+{
+       u32 imm;
+
+       imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6);
+       return rv_css_insn(0x7, imm, rs2, 0x2);
+}
+
+#endif /* __riscv_xlen == 64 */
+
+/* Helper functions that emit RVC instructions when possible. */
+
+static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd == RV_REG_RA && rs && !imm)
+               emitc(rvc_jalr(rs), ctx);
+       else if (rvc_enabled() && !rd && rs && !imm)
+               emitc(rvc_jr(rs), ctx);
+       else
+               emit(rv_jalr(rd, rs, imm), ctx);
+}
+
+static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd && rs)
+               emitc(rvc_mv(rd, rs), ctx);
+       else
+               emit(rv_addi(rd, rs, 0), ctx);
+}
+
+static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd && rd == rs1 && rs2)
+               emitc(rvc_add(rd, rs2), ctx);
+       else
+               emit(rv_add(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf))
+               emitc(rvc_addi16sp(imm), ctx);
+       else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) &&
+                !(imm & 0x3) && imm)
+               emitc(rvc_addi4spn(rd, imm), ctx);
+       else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm))
+               emitc(rvc_addi(rd, imm), ctx);
+       else
+               emit(rv_addi(rd, rs, imm), ctx);
+}
+
+static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd && is_6b_int(imm))
+               emitc(rvc_li(rd, imm), ctx);
+       else
+               emit(rv_addi(rd, RV_REG_ZERO, imm), ctx);
+}
+
+static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm)
+               emitc(rvc_lui(rd, imm), ctx);
+       else
+               emit(rv_lui(rd, imm), ctx);
+}
+
+static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen)
+               emitc(rvc_slli(rd, imm), ctx);
+       else
+               emit(rv_slli(rd, rs, imm), ctx);
+}
+
+static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm))
+               emitc(rvc_andi(rd, imm), ctx);
+       else
+               emit(rv_andi(rd, rs, imm), ctx);
+}
+
+static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+               emitc(rvc_srli(rd, imm), ctx);
+       else
+               emit(rv_srli(rd, rs, imm), ctx);
+}
+
+static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+               emitc(rvc_srai(rd, imm), ctx);
+       else
+               emit(rv_srai(rd, rs, imm), ctx);
+}
+
+static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+               emitc(rvc_sub(rd, rs2), ctx);
+       else
+               emit(rv_sub(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+               emitc(rvc_or(rd, rs2), ctx);
+       else
+               emit(rv_or(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+               emitc(rvc_and(rd, rs2), ctx);
+       else
+               emit(rv_and(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+               emitc(rvc_xor(rd, rs2), ctx);
+       else
+               emit(rv_xor(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3))
+               emitc(rvc_lwsp(rd, off), ctx);
+       else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3))
+               emitc(rvc_lw(rd, off, rs1), ctx);
+       else
+               emit(rv_lw(rd, off, rs1), ctx);
+}
+
+static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3))
+               emitc(rvc_swsp(off, rs2), ctx);
+       else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3))
+               emitc(rvc_sw(rs1, off, rs2), ctx);
+       else
+               emit(rv_sw(rs1, off, rs2), ctx);
+}
+
+/* RV64-only helper functions. */
+#if __riscv_xlen == 64
+
+static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rd && rd == rs && is_6b_int(imm))
+               emitc(rvc_addiw(rd, imm), ctx);
+       else
+               emit(rv_addiw(rd, rs, imm), ctx);
+}
+
+static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7))
+               emitc(rvc_ldsp(rd, off), ctx);
+       else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7))
+               emitc(rvc_ld(rd, off, rs1), ctx);
+       else
+               emit(rv_ld(rd, off, rs1), ctx);
+}
+
+static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7))
+               emitc(rvc_sdsp(off, rs2), ctx);
+       else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7))
+               emitc(rvc_sd(rs1, off, rs2), ctx);
+       else
+               emit(rv_sd(rs1, off, rs2), ctx);
+}
+
+static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+       if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+               emitc(rvc_subw(rd, rs2), ctx);
+       else
+               emit(rv_subw(rd, rs1, rs2), ctx);
+}
+
  #endif /* __riscv_xlen == 64 */
  
  void bpf_jit_build_prologue(struct rv_jit_context *ctx);
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c

index b198eaa..bc5f220 100644 (file)
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -644,7 +644,7 @@ static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff,
  
         e = ctx->ninsns;
         /* Adjust for extra insns. */
-       rvoff -= (e - s) << 2;
+       rvoff -= ninsns_rvoff(e - s);
         emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
         return 0;
  }
@@ -713,7 +713,7 @@ static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx)
         if (far) {
                 e = ctx->ninsns;
                 /* Adjust for extra insns. */
-               rvoff -= (e - s) << 2;
+               rvoff -= ninsns_rvoff(e - s);
                 emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
         }
         return 0;
@@ -731,7 +731,7 @@ static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff,
  
         e = ctx->ninsns;
         /* Adjust for extra insns. */
-       rvoff -= (e - s) << 2;
+       rvoff -= ninsns_rvoff(e - s);
  
         if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx))
                 return -1;
@@ -795,7 +795,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
          * if (index >= max_entries)
          *   goto out;
          */
-       off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+       off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
         emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
  
         /*
@@ -804,7 +804,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
          *   goto out;
          */
         emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
-       off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+       off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
         emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
  
         /*
@@ -818,7 +818,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
         if (is_12b_check(off, insn))
                 return -1;
         emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
-       off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+       off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
         emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx);
  
         /*
@@ -1214,7 +1214,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
                         emit_imm32(tmp2, imm, ctx);
                         src = tmp2;
                         e = ctx->ninsns;
-                       rvoff -= (e - s) << 2;
+                       rvoff -= ninsns_rvoff(e - s);
                 }
  
                 if (is64)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c

index 6cfd164..8a56b52 100644 (file)
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -132,19 +132,23 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
          *
          * This also means that we need to process LSB to MSB.
          */
-       s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff;
+       s64 upper = (val + (1 << 11)) >> 12;
+       /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
+        * and addi are signed and RVC checks will perform signed comparisons.
+        */
+       s64 lower = ((val & 0xfff) << 52) >> 52;
         int shift;
  
         if (is_32b_int(val)) {
                 if (upper)
-                       emit(rv_lui(rd, upper), ctx);
+                       emit_lui(rd, upper, ctx);
  
                 if (!upper) {
-                       emit(rv_addi(rd, RV_REG_ZERO, lower), ctx);
+                       emit_li(rd, lower, ctx);
                         return;
                 }
  
-               emit(rv_addiw(rd, rd, lower), ctx);
+               emit_addiw(rd, rd, lower, ctx);
                 return;
         }
  
@@ -154,9 +158,9 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
  
         emit_imm(rd, upper, ctx);
  
-       emit(rv_slli(rd, rd, shift), ctx);
+       emit_slli(rd, rd, shift, ctx);
         if (lower)
-               emit(rv_addi(rd, rd, lower), ctx);
+               emit_addi(rd, rd, lower, ctx);
  }
  
  static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
@@ -164,43 +168,43 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
         int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
  
         if (seen_reg(RV_REG_RA, ctx)) {
-               emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
-       emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
+       emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
         store_offset -= 8;
         if (seen_reg(RV_REG_S1, ctx)) {
-               emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S2, ctx)) {
-               emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S3, ctx)) {
-               emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S4, ctx)) {
-               emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S5, ctx)) {
-               emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S6, ctx)) {
-               emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
+               emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
                 store_offset -= 8;
         }
  
-       emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
+       emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
         /* Set return value. */
         if (!is_tail_call)
-               emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
-       emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
-                    is_tail_call ? 4 : 0), /* skip TCC init */
-            ctx);
+               emit_mv(RV_REG_A0, RV_REG_A5, ctx);
+       emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
+                 is_tail_call ? 4 : 0, /* skip TCC init */
+                 ctx);
  }
  
  static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
@@ -280,8 +284,8 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
  
  static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
  {
-       emit(rv_slli(reg, reg, 32), ctx);
-       emit(rv_srli(reg, reg, 32), ctx);
+       emit_slli(reg, reg, 32, ctx);
+       emit_srli(reg, reg, 32, ctx);
  }
  
  static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
@@ -304,35 +308,35 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
         if (is_12b_check(off, insn))
                 return -1;
         emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
-       off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+       off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
         emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
  
         /* if (TCC-- < 0)
          *     goto out;
          */
-       emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
-       off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+       emit_addi(RV_REG_T1, tcc, -1, ctx);
+       off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
         emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
  
         /* prog = array->ptrs[index];
          * if (!prog)
          *     goto out;
          */
-       emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx);
-       emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx);
+       emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
+       emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
         off = offsetof(struct bpf_array, ptrs);
         if (is_12b_check(off, insn))
                 return -1;
-       emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
-       off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+       emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
+       off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
         emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
  
         /* goto *(prog->bpf_func + 4); */
         off = offsetof(struct bpf_prog, bpf_func);
         if (is_12b_check(off, insn))
                 return -1;
-       emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
-       emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
+       emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
+       emit_mv(RV_REG_TCC, RV_REG_T1, ctx);
         __build_epilogue(true, ctx);
         return 0;
  }
@@ -360,9 +364,9 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
  
  static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
  {
-       emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+       emit_mv(RV_REG_T2, *rd, ctx);
         emit_zext_32(RV_REG_T2, ctx);
-       emit(rv_addi(RV_REG_T1, *rs, 0), ctx);
+       emit_mv(RV_REG_T1, *rs, ctx);
         emit_zext_32(RV_REG_T1, ctx);
         *rd = RV_REG_T2;
         *rs = RV_REG_T1;
@@ -370,15 +374,15 @@ static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
  
  static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
  {
-       emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
-       emit(rv_addiw(RV_REG_T1, *rs, 0), ctx);
+       emit_addiw(RV_REG_T2, *rd, 0, ctx);
+       emit_addiw(RV_REG_T1, *rs, 0, ctx);
         *rd = RV_REG_T2;
         *rs = RV_REG_T1;
  }
  
  static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
  {
-       emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+       emit_mv(RV_REG_T2, *rd, ctx);
         emit_zext_32(RV_REG_T2, ctx);
         emit_zext_32(RV_REG_T1, ctx);
         *rd = RV_REG_T2;
@@ -386,7 +390,7 @@ static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
  
  static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
  {
-       emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
+       emit_addiw(RV_REG_T2, *rd, 0, ctx);
         *rd = RV_REG_T2;
  }
  
@@ -432,7 +436,7 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
         if (ret)
                 return ret;
         rd = bpf_to_rv_reg(BPF_REG_0, ctx);
-       emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+       emit_mv(rd, RV_REG_A0, ctx);
         return 0;
  }
  
@@ -458,7 +462,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
                         emit_zext_32(rd, ctx);
                         break;
                 }
-               emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
+               emit_mv(rd, rs, ctx);
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
@@ -466,31 +470,35 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
         /* dst = dst OP src */
         case BPF_ALU | BPF_ADD | BPF_X:
         case BPF_ALU64 | BPF_ADD | BPF_X:
-               emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
+               emit_add(rd, rd, rs, ctx);
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
         case BPF_ALU | BPF_SUB | BPF_X:
         case BPF_ALU64 | BPF_SUB | BPF_X:
-               emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
+               if (is64)
+                       emit_sub(rd, rd, rs, ctx);
+               else
+                       emit_subw(rd, rd, rs, ctx);
+
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
         case BPF_ALU | BPF_AND | BPF_X:
         case BPF_ALU64 | BPF_AND | BPF_X:
-               emit(rv_and(rd, rd, rs), ctx);
+               emit_and(rd, rd, rs, ctx);
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
         case BPF_ALU | BPF_OR | BPF_X:
         case BPF_ALU64 | BPF_OR | BPF_X:
-               emit(rv_or(rd, rd, rs), ctx);
+               emit_or(rd, rd, rs, ctx);
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
         case BPF_ALU | BPF_XOR | BPF_X:
         case BPF_ALU64 | BPF_XOR | BPF_X:
-               emit(rv_xor(rd, rd, rs), ctx);
+               emit_xor(rd, rd, rs, ctx);
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
@@ -534,8 +542,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
         /* dst = -dst */
         case BPF_ALU | BPF_NEG:
         case BPF_ALU64 | BPF_NEG:
-               emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
-                    rv_subw(rd, RV_REG_ZERO, rd), ctx);
+               emit_sub(rd, RV_REG_ZERO, rd, ctx);
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
@@ -544,8 +551,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
         case BPF_ALU | BPF_END | BPF_FROM_LE:
                 switch (imm) {
                 case 16:
-                       emit(rv_slli(rd, rd, 48), ctx);
-                       emit(rv_srli(rd, rd, 48), ctx);
+                       emit_slli(rd, rd, 48, ctx);
+                       emit_srli(rd, rd, 48, ctx);
                         break;
                 case 32:
                         if (!aux->verifier_zext)
@@ -558,51 +565,51 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
                 break;
  
         case BPF_ALU | BPF_END | BPF_FROM_BE:
-               emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
+               emit_li(RV_REG_T2, 0, ctx);
  
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
                 if (imm == 16)
                         goto out_be;
  
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
  
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
                 if (imm == 32)
                         goto out_be;
  
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
-
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
-
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
-
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-               emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-               emit(rv_srli(rd, rd, 8), ctx);
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
+
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
+
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
+
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+               emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+               emit_srli(rd, rd, 8, ctx);
  out_be:
-               emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
+               emit_andi(RV_REG_T1, rd, 0xff, ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
  
-               emit(rv_addi(rd, RV_REG_T2, 0), ctx);
+               emit_mv(rd, RV_REG_T2, ctx);
                 break;
  
         /* dst = imm */
@@ -617,12 +624,10 @@ out_be:
         case BPF_ALU | BPF_ADD | BPF_K:
         case BPF_ALU64 | BPF_ADD | BPF_K:
                 if (is_12b_int(imm)) {
-                       emit(is64 ? rv_addi(rd, rd, imm) :
-                            rv_addiw(rd, rd, imm), ctx);
+                       emit_addi(rd, rd, imm, ctx);
                 } else {
                         emit_imm(RV_REG_T1, imm, ctx);
-                       emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
-                            rv_addw(rd, rd, RV_REG_T1), ctx);
+                       emit_add(rd, rd, RV_REG_T1, ctx);
                 }
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
@@ -630,12 +635,10 @@ out_be:
         case BPF_ALU | BPF_SUB | BPF_K:
         case BPF_ALU64 | BPF_SUB | BPF_K:
                 if (is_12b_int(-imm)) {
-                       emit(is64 ? rv_addi(rd, rd, -imm) :
-                            rv_addiw(rd, rd, -imm), ctx);
+                       emit_addi(rd, rd, -imm, ctx);
                 } else {
                         emit_imm(RV_REG_T1, imm, ctx);
-                       emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
-                            rv_subw(rd, rd, RV_REG_T1), ctx);
+                       emit_sub(rd, rd, RV_REG_T1, ctx);
                 }
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
@@ -643,10 +646,10 @@ out_be:
         case BPF_ALU | BPF_AND | BPF_K:
         case BPF_ALU64 | BPF_AND | BPF_K:
                 if (is_12b_int(imm)) {
-                       emit(rv_andi(rd, rd, imm), ctx);
+                       emit_andi(rd, rd, imm, ctx);
                 } else {
                         emit_imm(RV_REG_T1, imm, ctx);
-                       emit(rv_and(rd, rd, RV_REG_T1), ctx);
+                       emit_and(rd, rd, RV_REG_T1, ctx);
                 }
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
@@ -657,7 +660,7 @@ out_be:
                         emit(rv_ori(rd, rd, imm), ctx);
                 } else {
                         emit_imm(RV_REG_T1, imm, ctx);
-                       emit(rv_or(rd, rd, RV_REG_T1), ctx);
+                       emit_or(rd, rd, RV_REG_T1, ctx);
                 }
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
@@ -668,7 +671,7 @@ out_be:
                         emit(rv_xori(rd, rd, imm), ctx);
                 } else {
                         emit_imm(RV_REG_T1, imm, ctx);
-                       emit(rv_xor(rd, rd, RV_REG_T1), ctx);
+                       emit_xor(rd, rd, RV_REG_T1, ctx);
                 }
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
@@ -699,19 +702,28 @@ out_be:
                 break;
         case BPF_ALU | BPF_LSH | BPF_K:
         case BPF_ALU64 | BPF_LSH | BPF_K:
-               emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
+               emit_slli(rd, rd, imm, ctx);
+
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
         case BPF_ALU | BPF_RSH | BPF_K:
         case BPF_ALU64 | BPF_RSH | BPF_K:
-               emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
+               if (is64)
+                       emit_srli(rd, rd, imm, ctx);
+               else
+                       emit(rv_srliw(rd, rd, imm), ctx);
+
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
         case BPF_ALU | BPF_ARSH | BPF_K:
         case BPF_ALU64 | BPF_ARSH | BPF_K:
-               emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
+               if (is64)
+                       emit_srai(rd, rd, imm, ctx);
+               else
+                       emit(rv_sraiw(rd, rd, imm), ctx);
+
                 if (!is64 && !aux->verifier_zext)
                         emit_zext_32(rd, ctx);
                 break;
@@ -757,13 +769,13 @@ out_be:
                         e = ctx->ninsns;
  
                         /* Adjust for extra insns */
-                       rvoff -= (e - s) << 2;
+                       rvoff -= ninsns_rvoff(e - s);
                 }
  
                 if (BPF_OP(code) == BPF_JSET) {
                         /* Adjust for and */
                         rvoff -= 4;
-                       emit(rv_and(RV_REG_T1, rd, rs), ctx);
+                       emit_and(RV_REG_T1, rd, rs, ctx);
                         emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
                                     ctx);
                 } else {
@@ -810,7 +822,7 @@ out_be:
                 e = ctx->ninsns;
  
                 /* Adjust for extra insns */
-               rvoff -= (e - s) << 2;
+               rvoff -= ninsns_rvoff(e - s);
                 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
                 break;
  
@@ -819,19 +831,19 @@ out_be:
                 rvoff = rv_offset(i, off, ctx);
                 s = ctx->ninsns;
                 if (is_12b_int(imm)) {
-                       emit(rv_andi(RV_REG_T1, rd, imm), ctx);
+                       emit_andi(RV_REG_T1, rd, imm, ctx);
                 } else {
                         emit_imm(RV_REG_T1, imm, ctx);
-                       emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
+                       emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
                 }
                 /* For jset32, we should clear the upper 32 bits of t1, but
                  * sign-extension is sufficient here and saves one instruction,
                  * as t1 is used only in comparison against zero.
                  */
                 if (!is64 && imm < 0)
-                       emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
+                       emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
                 e = ctx->ninsns;
-               rvoff -= (e - s) << 2;
+               rvoff -= ninsns_rvoff(e - s);
                 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
                 break;
  
@@ -887,7 +899,7 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
                 emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
                 if (insn_is_zext(&insn[1]))
                         return 1;
@@ -899,7 +911,7 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
                 emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
                 if (insn_is_zext(&insn[1]))
                         return 1;
@@ -911,20 +923,20 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
                 emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
                 if (insn_is_zext(&insn[1]))
                         return 1;
                 break;
         case BPF_LDX | BPF_MEM | BPF_DW:
                 if (is_12b_int(off)) {
-                       emit(rv_ld(rd, off, rs), ctx);
+                       emit_ld(rd, off, rs, ctx);
                         break;
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
-               emit(rv_ld(rd, 0, RV_REG_T1), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+               emit_ld(rd, 0, RV_REG_T1, ctx);
                 break;
  
         /* ST: *(size *)(dst + off) = imm */
@@ -936,7 +948,7 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T2, off, ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
                 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
                 break;
  
@@ -948,30 +960,30 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T2, off, ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
                 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
                 break;
         case BPF_ST | BPF_MEM | BPF_W:
                 emit_imm(RV_REG_T1, imm, ctx);
                 if (is_12b_int(off)) {
-                       emit(rv_sw(rd, off, RV_REG_T1), ctx);
+                       emit_sw(rd, off, RV_REG_T1, ctx);
                         break;
                 }
  
                 emit_imm(RV_REG_T2, off, ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
-               emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+               emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
                 break;
         case BPF_ST | BPF_MEM | BPF_DW:
                 emit_imm(RV_REG_T1, imm, ctx);
                 if (is_12b_int(off)) {
-                       emit(rv_sd(rd, off, RV_REG_T1), ctx);
+                       emit_sd(rd, off, RV_REG_T1, ctx);
                         break;
                 }
  
                 emit_imm(RV_REG_T2, off, ctx);
-               emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
-               emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx);
+               emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+               emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
                 break;
  
         /* STX: *(size *)(dst + off) = src */
@@ -982,7 +994,7 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
                 emit(rv_sb(RV_REG_T1, 0, rs), ctx);
                 break;
         case BPF_STX | BPF_MEM | BPF_H:
@@ -992,28 +1004,28 @@ out_be:
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
                 emit(rv_sh(RV_REG_T1, 0, rs), ctx);
                 break;
         case BPF_STX | BPF_MEM | BPF_W:
                 if (is_12b_int(off)) {
-                       emit(rv_sw(rd, off, rs), ctx);
+                       emit_sw(rd, off, rs, ctx);
                         break;
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
-               emit(rv_sw(RV_REG_T1, 0, rs), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+               emit_sw(RV_REG_T1, 0, rs, ctx);
                 break;
         case BPF_STX | BPF_MEM | BPF_DW:
                 if (is_12b_int(off)) {
-                       emit(rv_sd(rd, off, rs), ctx);
+                       emit_sd(rd, off, rs, ctx);
                         break;
                 }
  
                 emit_imm(RV_REG_T1, off, ctx);
-               emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
-               emit(rv_sd(RV_REG_T1, 0, rs), ctx);
+               emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+               emit_sd(RV_REG_T1, 0, rs, ctx);
                 break;
         /* STX XADD: lock *(u32 *)(dst + off) += src */
         case BPF_STX | BPF_XADD | BPF_W:
@@ -1021,10 +1033,10 @@ out_be:
         case BPF_STX | BPF_XADD | BPF_DW:
                 if (off) {
                         if (is_12b_int(off)) {
-                               emit(rv_addi(RV_REG_T1, rd, off), ctx);
+                               emit_addi(RV_REG_T1, rd, off, ctx);
                         } else {
                                 emit_imm(RV_REG_T1, off, ctx);
-                               emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+                               emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
                         }
  
                         rd = RV_REG_T1;
@@ -1073,52 +1085,53 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
  
         /* First instruction is always setting the tail-call-counter
          * (TCC) register. This instruction is skipped for tail calls.
+        * Force using a 4-byte (non-compressed) instruction.
          */
         emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
  
-       emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
+       emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
  
         if (seen_reg(RV_REG_RA, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
                 store_offset -= 8;
         }
-       emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx);
+       emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
         store_offset -= 8;
         if (seen_reg(RV_REG_S1, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S2, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S3, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S4, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S5, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
                 store_offset -= 8;
         }
         if (seen_reg(RV_REG_S6, ctx)) {
-               emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx);
+               emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
                 store_offset -= 8;
         }
  
-       emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
+       emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
  
         if (bpf_stack_adjust)
-               emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx);
+               emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
  
         /* Program contains calls and tail calls, so RV_REG_TCC need
          * to be saved across calls.
          */
         if (seen_tail_call(ctx) && seen_call(ctx))
-               emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx);
+               emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
  
         ctx->stack_size = stack_adjust;
  }
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c

index 709b94e..3630d44 100644 (file)
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
  
         if (ctx->offset) {
                 extra_pass = true;
-               image_size = sizeof(u32) * ctx->ninsns;
+               image_size = sizeof(*ctx->insns) * ctx->ninsns;
                 goto skip_init_ctx;
         }
  
@@ -103,7 +103,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                         if (jit_data->header)
                                 break;
  
-                       image_size = sizeof(u32) * ctx->ninsns;
+                       image_size = sizeof(*ctx->insns) * ctx->ninsns;
                         jit_data->header =
                                 bpf_jit_binary_alloc(image_size,
                                                      &jit_data->image,
@@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                                 goto out_offset;
                         }
  
-                       ctx->insns = (u32 *)jit_data->image;
+                       ctx->insns = (u16 *)jit_data->image;
                         /*
                          * Now, when the image is allocated, the image can
                          * potentially shrink more (auipc/jalr -> jal).
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c

index f4242b8..26f97a1 100644 (file)
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -489,6 +489,24 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
         } while (re <= last);
  }
  
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+       if (size >= 6 && !is_valid_rel(size)) {
+               /* brcl 0xf,size */
+               EMIT6_PCREL_RIL(0xc0f4000000, size);
+               size -= 6;
+       } else if (size >= 4 && is_valid_rel(size)) {
+               /* brc 0xf,size */
+               EMIT4_PCREL(0xa7f40000, size);
+               size -= 4;
+       }
+       while (size >= 2) {
+               /* bcr 0,%0 */
+               _EMIT2(0x0700);
+               size -= 2;
+       }
+}
+
  /*
   * Emit function prologue
   *
@@ -501,10 +519,11 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                 /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
                 _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
         } else {
-               /* j tail_call_start: NOP if no tail calls are used */
-               EMIT4_PCREL(0xa7f40000, 6);
-               /* bcr 0,%0 */
-               EMIT2(0x0700, 0, REG_0);
+               /*
+                * There are no tail calls. Insert nops in order to have
+                * tail_call_start at a predictable offset.
+                */
+               bpf_skip(jit, 6);
         }
         /* Tail calls have to skip above initialization */
         jit->tail_call_start = jit->prg;
@@ -1268,8 +1287,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
                 last = (i == fp->len - 1) ? 1 : 0;
                 if (last)
                         break;
-               /* j <exit> */
-               EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+               if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+                       /* brc 0xf, <exit> */
+                       EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+               else
+                       /* brcl 0xf, <exit> */
+                       EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
                 break;
         /*
          * Branch relative (number of skipped instructions) to offset on
@@ -1417,21 +1440,10 @@ branch_ks:
                 }
                 break;
  branch_ku:
-               is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-               /* clfi or clgfi %dst,imm */
-               EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
-                         dst_reg, imm);
-               if (!is_first_pass(jit) &&
-                   can_use_rel(jit, addrs[i + off + 1])) {
-                       /* brc mask,off */
-                       EMIT4_PCREL_RIC(0xa7040000,
-                                       mask >> 12, addrs[i + off + 1]);
-               } else {
-                       /* brcl mask,off */
-                       EMIT6_PCREL_RILC(0xc0040000,
-                                        mask >> 12, addrs[i + off + 1]);
-               }
-               break;
+               /* lgfi %w1,imm (load sign extend imm) */
+               src_reg = REG_1;
+               EMIT6_IMM(0xc0010000, src_reg, imm);
+               goto branch_xu;
  branch_xs:
                 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
                 if (!is_first_pass(jit) &&
@@ -1510,7 +1522,14 @@ static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
   */
  static int bpf_set_addr(struct bpf_jit *jit, int i)
  {
-       if (!bpf_is_new_addr_sane(jit, i))
+       int delta;
+
+       if (is_codegen_pass(jit)) {
+               delta = jit->prg - jit->addrs[i];
+               if (delta < 0)
+                       bpf_skip(jit, -delta);
+       }
+       if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
                 return -1;
         jit->addrs[i] = jit->prg;
         return 0;
diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h

index 47d5b0c..722f799 100644 (file)
--- a/include/linux/bpf-netns.h
+++ b/include/linux/bpf-netns.h
@@ -8,6 +8,7 @@
  enum netns_bpf_attach_type {
         NETNS_BPF_INVALID = -1,
         NETNS_BPF_FLOW_DISSECTOR = 0,
+       NETNS_BPF_SK_LOOKUP,
         MAX_NETNS_BPF_ATTACH_TYPE
  };
  
@@ -17,6 +18,8 @@ to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
         switch (attach_type) {
         case BPF_FLOW_DISSECTOR:
                 return NETNS_BPF_FLOW_DISSECTOR;
+       case BPF_SK_LOOKUP:
+               return NETNS_BPF_SK_LOOKUP;
         default:
                 return NETNS_BPF_INVALID;
         }
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index c67c88a..bae557f 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -249,6 +249,7 @@ enum bpf_arg_type {
         ARG_PTR_TO_INT,         /* pointer to int */
         ARG_PTR_TO_LONG,        /* pointer to long */
         ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock (fullsock) */
+       ARG_PTR_TO_SOCKET_OR_NULL,      /* pointer to bpf_sock (fullsock) or NULL */
         ARG_PTR_TO_BTF_ID,      /* pointer to in-kernel struct */
         ARG_PTR_TO_ALLOC_MEM,   /* pointer to dynamically allocated memory */
         ARG_PTR_TO_ALLOC_MEM_OR_NULL,   /* pointer to dynamically allocated memory or NULL */
@@ -667,6 +668,7 @@ struct bpf_jit_poke_descriptor {
  struct bpf_ctx_arg_aux {
         u32 offset;
         enum bpf_reg_type reg_type;
+       u32 btf_id;
  };
  
  struct bpf_prog_aux {
@@ -928,6 +930,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
  
  void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
                                 struct bpf_prog *old_prog);
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index);
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
+                            struct bpf_prog *prog);
  int bpf_prog_array_copy_info(struct bpf_prog_array *array,
                              u32 *prog_ids, u32 request_cnt,
                              u32 *prog_cnt);
@@ -1272,6 +1277,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
  void __cpu_map_flush(void);
  int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
                     struct net_device *dev_rx);
+bool cpu_map_prog_allowed(struct bpf_map *map);
  
  /* Return map's numa specified by userspace */
  static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1432,6 +1438,11 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
         return 0;
  }
  
+static inline bool cpu_map_prog_allowed(struct bpf_map *map)
+{
+       return false;
+}
+
  static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
                                 enum bpf_prog_type type)
  {
@@ -1531,7 +1542,6 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
  
  struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
  void bpf_map_offload_map_free(struct bpf_map *map);
-void init_btf_sock_ids(struct btf *btf);
  #else
  static inline int bpf_prog_offload_init(struct bpf_prog *prog,
                                         union bpf_attr *attr)
@@ -1557,9 +1567,6 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
  static inline void bpf_map_offload_map_free(struct bpf_map *map)
  {
  }
-static inline void init_btf_sock_ids(struct btf *btf)
-{
-}
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
  
  #if defined(CONFIG_BPF_STREAM_PARSER)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h

index a18ae82..a52a568 100644 (file)
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -64,6 +64,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
  #ifdef CONFIG_INET
  BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport,
               struct sk_reuseport_md, struct sk_reuseport_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_LOOKUP, sk_lookup,
+             struct bpf_sk_lookup, struct bpf_sk_lookup_kern)
  #endif
  #if defined(CONFIG_BPF_JIT)
  BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h

index 1cdb569..4867d54 100644 (file)
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -57,17 +57,20 @@ asm(                                                        \
   * .zero 4
   *
   */
-#define __BTF_ID_LIST(name)                            \
+#define __BTF_ID_LIST(name, scope)                     \
  asm(                                                   \
  ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"     \
-".local " #name ";                             \n"     \
+"." #scope " " #name ";                        \n"     \
  #name ":;                                      \n"     \
  ".popsection;                                  \n");   \
  
  #define BTF_ID_LIST(name)                              \
-__BTF_ID_LIST(name)                                    \
+__BTF_ID_LIST(name, local)                             \
  extern u32 name[];
  
+#define BTF_ID_LIST_GLOBAL(name)                       \
+__BTF_ID_LIST(name, globl)
+
  /*
   * The BTF_ID_UNUSED macro defines 4 zero bytes.
   * It's used when we want to define 'unused' entry
@@ -90,7 +93,38 @@ asm(                                                 \
  #define BTF_ID_LIST(name) static u32 name[5];
  #define BTF_ID(prefix, name)
  #define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
  
  #endif /* CONFIG_DEBUG_INFO_BTF */
  
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock)    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock)        \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock)        \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock)                  \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock)                         \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common)           \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock)                      \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock)          \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)          \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)                      \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
+
  #endif
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 4d049c8..1c6b6d9 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1278,4 +1278,151 @@ struct bpf_sockopt_kern {
  
  int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len);
  
+struct bpf_sk_lookup_kern {
+       u16             family;
+       u16             protocol;
+       struct {
+               __be32 saddr;
+               __be32 daddr;
+       } v4;
+       struct {
+               const struct in6_addr *saddr;
+               const struct in6_addr *daddr;
+       } v6;
+       __be16          sport;
+       u16             dport;
+       struct sock     *selected_sk;
+       bool            no_reuseport;
+};
+
+extern struct static_key_false bpf_sk_lookup_enabled;
+
+/* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup.
+ *
+ * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and
+ * SK_DROP. Their meaning is as follows:
+ *
+ *  SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result
+ *  SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup
+ *  SK_DROP                           : terminate lookup with -ECONNREFUSED
+ *
+ * This macro aggregates return values and selected sockets from
+ * multiple BPF programs according to following rules in order:
+ *
+ *  1. If any program returned SK_PASS and a non-NULL ctx.selected_sk,
+ *     macro result is SK_PASS and last ctx.selected_sk is used.
+ *  2. If any program returned SK_DROP return value,
+ *     macro result is SK_DROP.
+ *  3. Otherwise result is SK_PASS and ctx.selected_sk is NULL.
+ *
+ * Caller must ensure that the prog array is non-NULL, and that the
+ * array as well as the programs it contains remain valid.
+ */
+#define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func)                 \
+       ({                                                              \
+               struct bpf_sk_lookup_kern *_ctx = &(ctx);               \
+               struct bpf_prog_array_item *_item;                      \
+               struct sock *_selected_sk = NULL;                       \
+               bool _no_reuseport = false;                             \
+               struct bpf_prog *_prog;                                 \
+               bool _all_pass = true;                                  \
+               u32 _ret;                                               \
+                                                                       \
+               migrate_disable();                                      \
+               _item = &(array)->items[0];                             \
+               while ((_prog = READ_ONCE(_item->prog))) {              \
+                       /* restore most recent selection */             \
+                       _ctx->selected_sk = _selected_sk;               \
+                       _ctx->no_reuseport = _no_reuseport;             \
+                                                                       \
+                       _ret = func(_prog, _ctx);                       \
+                       if (_ret == SK_PASS && _ctx->selected_sk) {     \
+                               /* remember last non-NULL socket */     \
+                               _selected_sk = _ctx->selected_sk;       \
+                               _no_reuseport = _ctx->no_reuseport;     \
+                       } else if (_ret == SK_DROP && _all_pass) {      \
+                               _all_pass = false;                      \
+                       }                                               \
+                       _item++;                                        \
+               }                                                       \
+               _ctx->selected_sk = _selected_sk;                       \
+               _ctx->no_reuseport = _no_reuseport;                     \
+               migrate_enable();                                       \
+               _all_pass || _selected_sk ? SK_PASS : SK_DROP;          \
+        })
+
+static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
+                                       const __be32 saddr, const __be16 sport,
+                                       const __be32 daddr, const u16 dport,
+                                       struct sock **psk)
+{
+       struct bpf_prog_array *run_array;
+       struct sock *selected_sk = NULL;
+       bool no_reuseport = false;
+
+       rcu_read_lock();
+       run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+       if (run_array) {
+               struct bpf_sk_lookup_kern ctx = {
+                       .family         = AF_INET,
+                       .protocol       = protocol,
+                       .v4.saddr       = saddr,
+                       .v4.daddr       = daddr,
+                       .sport          = sport,
+                       .dport          = dport,
+               };
+               u32 act;
+
+               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+               if (act == SK_PASS) {
+                       selected_sk = ctx.selected_sk;
+                       no_reuseport = ctx.no_reuseport;
+               } else {
+                       selected_sk = ERR_PTR(-ECONNREFUSED);
+               }
+       }
+       rcu_read_unlock();
+       *psk = selected_sk;
+       return no_reuseport;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
+                                       const struct in6_addr *saddr,
+                                       const __be16 sport,
+                                       const struct in6_addr *daddr,
+                                       const u16 dport,
+                                       struct sock **psk)
+{
+       struct bpf_prog_array *run_array;
+       struct sock *selected_sk = NULL;
+       bool no_reuseport = false;
+
+       rcu_read_lock();
+       run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+       if (run_array) {
+               struct bpf_sk_lookup_kern ctx = {
+                       .family         = AF_INET6,
+                       .protocol       = protocol,
+                       .v6.saddr       = saddr,
+                       .v6.daddr       = daddr,
+                       .sport          = sport,
+                       .dport          = dport,
+               };
+               u32 act;
+
+               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+               if (act == SK_PASS) {
+                       selected_sk = ctx.selected_sk;
+                       no_reuseport = ctx.no_reuseport;
+               } else {
+                       selected_sk = ERR_PTR(-ECONNREFUSED);
+               }
+       }
+       rcu_read_unlock();
+       *psk = selected_sk;
+       return no_reuseport;
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
  #endif /* __LINUX_FILTER_H__ */
diff --git a/include/net/xdp.h b/include/net/xdp.h

index d3005be..dbe9c60 100644 (file)
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame {
         struct net_device *dev_rx; /* used by cpumap */
  };
  
+
  static inline struct skb_shared_info *
  xdp_get_shared_info_from_frame(struct xdp_frame *frame)
  {
@@ -113,6 +114,12 @@ xdp_get_shared_info_from_frame(struct xdp_frame *frame)
                                 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
  }
  
+struct xdp_cpumap_stats {
+       unsigned int redirect;
+       unsigned int pass;
+       unsigned int drop;
+};
+
  /* Clear kernel pointers in xdp_frame */
  static inline void xdp_scrub_frame(struct xdp_frame *frame)
  {
@@ -136,39 +143,48 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
         xdp->frame_sz = frame->frame_sz;
  }
  
-/* Convert xdp_buff to xdp_frame */
  static inline
-struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
+int xdp_update_frame_from_buff(struct xdp_buff *xdp,
+                              struct xdp_frame *xdp_frame)
  {
-       struct xdp_frame *xdp_frame;
-       int metasize;
-       int headroom;
-
-       if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
-               return xdp_convert_zc_to_xdp_frame(xdp);
+       int metasize, headroom;
  
         /* Assure headroom is available for storing info */
         headroom = xdp->data - xdp->data_hard_start;
         metasize = xdp->data - xdp->data_meta;
         metasize = metasize > 0 ? metasize : 0;
         if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
-               return NULL;
+               return -ENOSPC;
  
         /* Catch if driver didn't reserve tailroom for skb_shared_info */
         if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
                 XDP_WARN("Driver BUG: missing reserved tailroom");
-               return NULL;
+               return -ENOSPC;
         }
  
-       /* Store info in top of packet */
-       xdp_frame = xdp->data_hard_start;
-
         xdp_frame->data = xdp->data;
         xdp_frame->len  = xdp->data_end - xdp->data;
         xdp_frame->headroom = headroom - sizeof(*xdp_frame);
         xdp_frame->metasize = metasize;
         xdp_frame->frame_sz = xdp->frame_sz;
  
+       return 0;
+}
+
+/* Convert xdp_buff to xdp_frame */
+static inline
+struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
+{
+       struct xdp_frame *xdp_frame;
+
+       if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+               return xdp_convert_zc_to_xdp_frame(xdp);
+
+       /* Store info in top of packet */
+       xdp_frame = xdp->data_hard_start;
+       if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
+               return NULL;
+
         /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
         xdp_frame->mem = xdp->rxq->mem;
  
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h

index b73d3e1..cd24e8a 100644 (file)
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -177,9 +177,9 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
  TRACE_EVENT(xdp_cpumap_kthread,
  
         TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
-                int sched),
+                int sched, struct xdp_cpumap_stats *xdp_stats),
  
-       TP_ARGS(map_id, processed, drops, sched),
+       TP_ARGS(map_id, processed, drops, sched, xdp_stats),
  
         TP_STRUCT__entry(
                 __field(int, map_id)
@@ -188,6 +188,9 @@ TRACE_EVENT(xdp_cpumap_kthread,
                 __field(unsigned int, drops)
                 __field(unsigned int, processed)
                 __field(int, sched)
+               __field(unsigned int, xdp_pass)
+               __field(unsigned int, xdp_drop)
+               __field(unsigned int, xdp_redirect)
         ),
  
         TP_fast_assign(
@@ -197,16 +200,21 @@ TRACE_EVENT(xdp_cpumap_kthread,
                 __entry->drops          = drops;
                 __entry->processed      = processed;
                 __entry->sched  = sched;
+               __entry->xdp_pass       = xdp_stats->pass;
+               __entry->xdp_drop       = xdp_stats->drop;
+               __entry->xdp_redirect   = xdp_stats->redirect;
         ),
  
         TP_printk("kthread"
                   " cpu=%d map_id=%d action=%s"
                   " processed=%u drops=%u"
-                 " sched=%d",
+                 " sched=%d"
+                 " xdp_pass=%u xdp_drop=%u xdp_redirect=%u",
                   __entry->cpu, __entry->map_id,
                   __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
                   __entry->processed, __entry->drops,
-                 __entry->sched)
+                 __entry->sched,
+                 __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect)
  );
  
  TRACE_EVENT(xdp_cpumap_enqueue,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 5e38638..54d0c88 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -189,6 +189,7 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_STRUCT_OPS,
         BPF_PROG_TYPE_EXT,
         BPF_PROG_TYPE_LSM,
+       BPF_PROG_TYPE_SK_LOOKUP,
  };
  
  enum bpf_attach_type {
@@ -227,6 +228,8 @@ enum bpf_attach_type {
         BPF_CGROUP_INET6_GETSOCKNAME,
         BPF_XDP_DEVMAP,
         BPF_CGROUP_INET_SOCK_RELEASE,
+       BPF_XDP_CPUMAP,
+       BPF_SK_LOOKUP,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -2419,7 +2422,7 @@ union bpf_attr {
   *                     Look for an IPv6 socket.
   *
   *             If the *netns* is a negative signed 32-bit integer, then the
- *             socket lookup table in the netns associated with the *ctx* will
+ *             socket lookup table in the netns associated with the *ctx*
   *             will be used. For the TC hooks, this is the netns of the device
   *             in the skb. For socket hooks, this is the netns of the socket.
   *             If *netns* is any other signed 32-bit value greater than or
@@ -2456,7 +2459,7 @@ union bpf_attr {
   *                     Look for an IPv6 socket.
   *
   *             If the *netns* is a negative signed 32-bit integer, then the
- *             socket lookup table in the netns associated with the *ctx* will
+ *             socket lookup table in the netns associated with the *ctx*
   *             will be used. For the TC hooks, this is the netns of the device
   *             in the skb. For socket hooks, this is the netns of the socket.
   *             If *netns* is any other signed 32-bit value greater than or
@@ -3068,6 +3071,10 @@ union bpf_attr {
   *
   * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
   *     Description
+ *             Helper is overloaded depending on BPF program type. This
+ *             description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ *             **BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
   *             Assign the *sk* to the *skb*. When combined with appropriate
   *             routing configuration to receive the packet towards the socket,
   *             will cause *skb* to be delivered to the specified socket.
@@ -3093,6 +3100,56 @@ union bpf_attr {
   *             **-ESOCKTNOSUPPORT** if the socket type is not supported
   *             (reuseport).
   *
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ *     Description
+ *             Helper is overloaded depending on BPF program type. This
+ *             description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ *             Select the *sk* as a result of a socket lookup.
+ *
+ *             For the operation to succeed passed socket must be compatible
+ *             with the packet description provided by the *ctx* object.
+ *
+ *             L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ *             be an exact match. While IP family (**AF_INET** or
+ *             **AF_INET6**) must be compatible, that is IPv6 sockets
+ *             that are not v6-only can be selected for IPv4 packets.
+ *
+ *             Only TCP listeners and UDP unconnected sockets can be
+ *             selected. *sk* can also be NULL to reset any previous
+ *             selection.
+ *
+ *             *flags* argument can combination of following values:
+ *
+ *             * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ *               socket selection, potentially done by a BPF program
+ *               that ran before us.
+ *
+ *             * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ *               load-balancing within reuseport group for the socket
+ *               being selected.
+ *
+ *             On success *ctx->sk* will point to the selected socket.
+ *
+ *     Return
+ *             0 on success, or a negative errno in case of failure.
+ *
+ *             * **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ *               not compatible with packet family (*ctx->family*).
+ *
+ *             * **-EEXIST** if socket has been already selected,
+ *               potentially by another program, and
+ *               **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ *             * **-EINVAL** if unsupported flags were specified.
+ *
+ *             * **-EPROTOTYPE** if socket L4 protocol
+ *               (*sk->protocol*) doesn't match packet protocol
+ *               (*ctx->protocol*).
+ *
+ *             * **-ESOCKTNOSUPPORT** if socket is not in allowed
+ *               state (TCP listening or UDP unconnected).
+ *
   * u64 bpf_ktime_get_boot_ns(void)
   *     Description
   *             Return the time elapsed since system boot, in nanoseconds.
@@ -3606,6 +3663,12 @@ enum {
         BPF_RINGBUF_HDR_SZ              = 8,
  };
  
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+       BPF_SK_LOOKUP_F_REPLACE         = (1ULL << 0),
+       BPF_SK_LOOKUP_F_NO_REUSEPORT    = (1ULL << 1),
+};
+
  /* Mode for BPF_FUNC_skb_adjust_room helper. */
  enum bpf_adj_room_mode {
         BPF_ADJ_ROOM_NET,
@@ -3849,6 +3912,19 @@ struct bpf_devmap_val {
         } bpf_prog;
  };
  
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+       __u32 qsize;    /* queue size to remote target CPU */
+       union {
+               int   fd;       /* prog fd on map write */
+               __u32 id;       /* prog id on map read */
+       } bpf_prog;
+};
+
  enum sk_action {
         SK_DROP = 0,
         SK_PASS,
@@ -3986,7 +4062,7 @@ struct bpf_link_info {
  
  /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
   * by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
   */
  struct bpf_sock_addr {
         __u32 user_family;      /* Allows 4-byte read, but no write. */
@@ -4335,4 +4411,19 @@ struct bpf_pidns_info {
         __u32 pid;
         __u32 tgid;
  };
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+       __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+       __u32 family;           /* Protocol family (AF_INET, AF_INET6) */
+       __u32 protocol;         /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+       __u32 remote_ip4;       /* Network byte order */
+       __u32 remote_ip6[4];    /* Network byte order */
+       __u32 remote_port;      /* Network byte order */
+       __u32 local_ip4;        /* Network byte order */
+       __u32 local_ip6[4];     /* Network byte order */
+       __u32 local_port;       /* Host byte order */
+};
+
  #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c

index 03d6d43..ee36b7f 100644 (file)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3672,7 +3672,6 @@ struct btf *btf_parse_vmlinux(void)
                 goto errout;
  
         bpf_struct_ops_init(btf, log);
-       init_btf_sock_ids(btf);
  
         btf_verifier_env_free(env);
         refcount_set(&btf->refcnt, 1);
@@ -3818,16 +3817,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                 return true;
  
         /* this is a pointer to another type */
-       info->reg_type = PTR_TO_BTF_ID;
         for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
                 const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
  
                 if (ctx_arg_info->offset == off) {
                         info->reg_type = ctx_arg_info->reg_type;
-                       break;
+                       info->btf_id = ctx_arg_info->btf_id;
+                       return true;
                 }
         }
  
+       info->reg_type = PTR_TO_BTF_ID;
         if (tgt_prog) {
                 ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
                 if (ret > 0) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 9df4cc9..7be02e5 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1958,6 +1958,61 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
                 }
  }
  
+/**
+ * bpf_prog_array_delete_safe_at() - Replaces the program at the given
+ *                                   index into the program array with
+ *                                   a dummy no-op program.
+ * @array: a bpf_prog_array
+ * @index: the index of the program to replace
+ *
+ * Skips over dummy programs, by not counting them, when calculating
+ * the the position of the program to replace.
+ *
+ * Return:
+ * * 0         - Success
+ * * -EINVAL   - Invalid index value. Must be a non-negative integer.
+ * * -ENOENT   - Index out of range
+ */
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
+{
+       return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
+}
+
+/**
+ * bpf_prog_array_update_at() - Updates the program at the given index
+ *                              into the program array.
+ * @array: a bpf_prog_array
+ * @index: the index of the program to update
+ * @prog: the program to insert into the array
+ *
+ * Skips over dummy programs, by not counting them, when calculating
+ * the position of the program to update.
+ *
+ * Return:
+ * * 0         - Success
+ * * -EINVAL   - Invalid index value. Must be a non-negative integer.
+ * * -ENOENT   - Index out of range
+ */
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
+                            struct bpf_prog *prog)
+{
+       struct bpf_prog_array_item *item;
+
+       if (unlikely(index < 0))
+               return -EINVAL;
+
+       for (item = array->items; item->prog; item++) {
+               if (item->prog == &dummy_bpf_prog.prog)
+                       continue;
+               if (!index) {
+                       WRITE_ONCE(item->prog, prog);
+                       return 0;
+               }
+               index--;
+       }
+       return -ENOENT;
+}
+
  int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                         struct bpf_prog *exclude_prog,
                         struct bpf_prog *include_prog,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

index bd86580..f1c4652 100644 (file)
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -52,7 +52,6 @@ struct xdp_bulk_queue {
  struct bpf_cpu_map_entry {
         u32 cpu;    /* kthread CPU and map index */
         int map_id; /* Back reference to map */
-       u32 qsize;  /* Queue size placeholder for map lookup */
  
         /* XDP can run multiple RX-ring queues, need __percpu enqueue store */
         struct xdp_bulk_queue __percpu *bulkq;
@@ -62,10 +61,14 @@ struct bpf_cpu_map_entry {
         /* Queue with potential multi-producers, and single-consumer kthread */
         struct ptr_ring *queue;
         struct task_struct *kthread;
-       struct work_struct kthread_stop_wq;
+
+       struct bpf_cpumap_val value;
+       struct bpf_prog *prog;
  
         atomic_t refcnt; /* Control when this struct can be free'ed */
         struct rcu_head rcu;
+
+       struct work_struct kthread_stop_wq;
  };
  
  struct bpf_cpu_map {
@@ -80,6 +83,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
  
  static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
  {
+       u32 value_size = attr->value_size;
         struct bpf_cpu_map *cmap;
         int err = -ENOMEM;
         u64 cost;
@@ -90,7 +94,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
  
         /* check sanity of attributes */
         if (attr->max_entries == 0 || attr->key_size != 4 ||
-           attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+           (value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
+            value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) ||
+           attr->map_flags & ~BPF_F_NUMA_NODE)
                 return ERR_PTR(-EINVAL);
  
         cmap = kzalloc(sizeof(*cmap), GFP_USER);
@@ -212,6 +218,8 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
  static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
  {
         if (atomic_dec_and_test(&rcpu->refcnt)) {
+               if (rcpu->prog)
+                       bpf_prog_put(rcpu->prog);
                 /* The queue should be empty at this point */
                 __cpu_map_ring_cleanup(rcpu->queue);
                 ptr_ring_cleanup(rcpu->queue, NULL);
@@ -220,6 +228,75 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
         }
  }
  
+static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
+                                   void **frames, int n,
+                                   struct xdp_cpumap_stats *stats)
+{
+       struct xdp_rxq_info rxq;
+       struct xdp_buff xdp;
+       int i, nframes = 0;
+
+       if (!rcpu->prog)
+               return n;
+
+       rcu_read_lock_bh();
+
+       xdp_set_return_frame_no_direct();
+       xdp.rxq = &rxq;
+
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+               u32 act;
+               int err;
+
+               rxq.dev = xdpf->dev_rx;
+               rxq.mem = xdpf->mem;
+               /* TODO: report queue_index to xdp_rxq_info */
+
+               xdp_convert_frame_to_buff(xdpf, &xdp);
+
+               act = bpf_prog_run_xdp(rcpu->prog, &xdp);
+               switch (act) {
+               case XDP_PASS:
+                       err = xdp_update_frame_from_buff(&xdp, xdpf);
+                       if (err < 0) {
+                               xdp_return_frame(xdpf);
+                               stats->drop++;
+                       } else {
+                               frames[nframes++] = xdpf;
+                               stats->pass++;
+                       }
+                       break;
+               case XDP_REDIRECT:
+                       err = xdp_do_redirect(xdpf->dev_rx, &xdp,
+                                             rcpu->prog);
+                       if (unlikely(err)) {
+                               xdp_return_frame(xdpf);
+                               stats->drop++;
+                       } else {
+                               stats->redirect++;
+                       }
+                       break;
+               default:
+                       bpf_warn_invalid_xdp_action(act);
+                       /* fallthrough */
+               case XDP_DROP:
+                       xdp_return_frame(xdpf);
+                       stats->drop++;
+                       break;
+               }
+       }
+
+       if (stats->redirect)
+               xdp_do_flush_map();
+
+       xdp_clear_return_frame_no_direct();
+
+       rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
+
+       return nframes;
+}
+
  #define CPUMAP_BATCH 8
  
  static int cpu_map_kthread_run(void *data)
@@ -234,11 +311,12 @@ static int cpu_map_kthread_run(void *data)
          * kthread_stop signal until queue is empty.
          */
         while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
+               struct xdp_cpumap_stats stats = {}; /* zero stats */
+               gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
                 unsigned int drops = 0, sched = 0;
                 void *frames[CPUMAP_BATCH];
                 void *skbs[CPUMAP_BATCH];
-               gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
-               int i, n, m;
+               int i, n, m, nframes;
  
                 /* Release CPU reschedule checks */
                 if (__ptr_ring_empty(rcpu->queue)) {
@@ -259,8 +337,8 @@ static int cpu_map_kthread_run(void *data)
                  * kthread CPU pinned. Lockless access to ptr_ring
                  * consume side valid as no-resize allowed of queue.
                  */
-               n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
-
+               n = __ptr_ring_consume_batched(rcpu->queue, frames,
+                                              CPUMAP_BATCH);
                 for (i = 0; i < n; i++) {
                         void *f = frames[i];
                         struct page *page = virt_to_page(f);
@@ -272,15 +350,19 @@ static int cpu_map_kthread_run(void *data)
                         prefetchw(page);
                 }
  
-               m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
-               if (unlikely(m == 0)) {
-                       for (i = 0; i < n; i++)
-                               skbs[i] = NULL; /* effect: xdp_return_frame */
-                       drops = n;
+               /* Support running another XDP prog on this CPU */
+               nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
+               if (nframes) {
+                       m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
+                       if (unlikely(m == 0)) {
+                               for (i = 0; i < nframes; i++)
+                                       skbs[i] = NULL; /* effect: xdp_return_frame */
+                               drops += nframes;
+                       }
                 }
  
                 local_bh_disable();
-               for (i = 0; i < n; i++) {
+               for (i = 0; i < nframes; i++) {
                         struct xdp_frame *xdpf = frames[i];
                         struct sk_buff *skb = skbs[i];
                         int ret;
@@ -297,7 +379,7 @@ static int cpu_map_kthread_run(void *data)
                                 drops++;
                 }
                 /* Feedback loop via tracepoint */
-               trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
+               trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
  
                 local_bh_enable(); /* resched point, may call do_softirq() */
         }
@@ -307,13 +389,38 @@ static int cpu_map_kthread_run(void *data)
         return 0;
  }
  
-static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
-                                                      int map_id)
+bool cpu_map_prog_allowed(struct bpf_map *map)
  {
+       return map->map_type == BPF_MAP_TYPE_CPUMAP &&
+              map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
+}
+
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+{
+       struct bpf_prog *prog;
+
+       prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+               bpf_prog_put(prog);
+               return -EINVAL;
+       }
+
+       rcpu->value.bpf_prog.id = prog->aux->id;
+       rcpu->prog = prog;
+
+       return 0;
+}
+
+static struct bpf_cpu_map_entry *
+__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
+{
+       int numa, err, i, fd = value->bpf_prog.fd;
         gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
         struct bpf_cpu_map_entry *rcpu;
         struct xdp_bulk_queue *bq;
-       int numa, err, i;
  
         /* Have map->numa_node, but choose node of redirect target CPU */
         numa = cpu_to_node(cpu);
@@ -338,19 +445,22 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
         if (!rcpu->queue)
                 goto free_bulkq;
  
-       err = ptr_ring_init(rcpu->queue, qsize, gfp);
+       err = ptr_ring_init(rcpu->queue, value->qsize, gfp);
         if (err)
                 goto free_queue;
  
         rcpu->cpu    = cpu;
         rcpu->map_id = map_id;
-       rcpu->qsize  = qsize;
+       rcpu->value.qsize  = value->qsize;
+
+       if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+               goto free_ptr_ring;
  
         /* Setup kthread */
         rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
                                                "cpumap/%d/map:%d", cpu, map_id);
         if (IS_ERR(rcpu->kthread))
-               goto free_ptr_ring;
+               goto free_prog;
  
         get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
         get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
@@ -361,6 +471,9 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
  
         return rcpu;
  
+free_prog:
+       if (rcpu->prog)
+               bpf_prog_put(rcpu->prog);
  free_ptr_ring:
         ptr_ring_cleanup(rcpu->queue, NULL);
  free_queue:
@@ -437,12 +550,12 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
                                u64 map_flags)
  {
         struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+       struct bpf_cpumap_val cpumap_value = {};
         struct bpf_cpu_map_entry *rcpu;
-
         /* Array index key correspond to CPU number */
         u32 key_cpu = *(u32 *)key;
-       /* Value is the queue size */
-       u32 qsize = *(u32 *)value;
+
+       memcpy(&cpumap_value, value, map->value_size);
  
         if (unlikely(map_flags > BPF_EXIST))
                 return -EINVAL;
@@ -450,18 +563,18 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
                 return -E2BIG;
         if (unlikely(map_flags == BPF_NOEXIST))
                 return -EEXIST;
-       if (unlikely(qsize > 16384)) /* sanity limit on qsize */
+       if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */
                 return -EOVERFLOW;
  
         /* Make sure CPU is a valid possible cpu */
         if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
                 return -ENODEV;
  
-       if (qsize == 0) {
+       if (cpumap_value.qsize == 0) {
                 rcpu = NULL; /* Same as deleting */
         } else {
                 /* Updating qsize cause re-allocation of bpf_cpu_map_entry */
-               rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
+               rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
                 if (!rcpu)
                         return -ENOMEM;
                 rcpu->cmap = cmap;
@@ -523,7 +636,7 @@ static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
         struct bpf_cpu_map_entry *rcpu =
                 __cpu_map_lookup_elem(map, *(u32 *)key);
  
-       return rcpu ? &rcpu->qsize : NULL;
+       return rcpu ? &rcpu->value : NULL;
  }
  
  static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c

index c69071e..8a7af11 100644 (file)
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -4,6 +4,7 @@
  #include <linux/fs.h>
  #include <linux/filter.h>
  #include <linux/kernel.h>
+#include <linux/btf_ids.h>
  
  struct bpf_iter_seq_map_info {
         u32 mid;
@@ -81,7 +82,10 @@ static const struct seq_operations bpf_map_seq_ops = {
         .show   = bpf_map_seq_show,
  };
  
-static const struct bpf_iter_reg bpf_map_reg_info = {
+BTF_ID_LIST(btf_bpf_map_id)
+BTF_ID(struct, bpf_map)
+
+static struct bpf_iter_reg bpf_map_reg_info = {
         .target                 = "bpf_map",
         .seq_ops                = &bpf_map_seq_ops,
         .init_seq_private       = NULL,
@@ -96,6 +100,7 @@ static const struct bpf_iter_reg bpf_map_reg_info = {
  
  static int __init bpf_map_iter_init(void)
  {
+       bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
         return bpf_iter_reg_target(&bpf_map_reg_info);
  }
  
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c

index 310241c..71405ed 100644 (file)
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -25,6 +25,32 @@ struct bpf_netns_link {
  /* Protects updates to netns_bpf */
  DEFINE_MUTEX(netns_bpf_mutex);
  
+static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
+{
+       switch (type) {
+#ifdef CONFIG_INET
+       case NETNS_BPF_SK_LOOKUP:
+               static_branch_dec(&bpf_sk_lookup_enabled);
+               break;
+#endif
+       default:
+               break;
+       }
+}
+
+static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
+{
+       switch (type) {
+#ifdef CONFIG_INET
+       case NETNS_BPF_SK_LOOKUP:
+               static_branch_inc(&bpf_sk_lookup_enabled);
+               break;
+#endif
+       default:
+               break;
+       }
+}
+
  /* Must be called with netns_bpf_mutex held. */
  static void netns_bpf_run_array_detach(struct net *net,
                                        enum netns_bpf_attach_type type)
@@ -36,12 +62,50 @@ static void netns_bpf_run_array_detach(struct net *net,
         bpf_prog_array_free(run_array);
  }
  
+static int link_index(struct net *net, enum netns_bpf_attach_type type,
+                     struct bpf_netns_link *link)
+{
+       struct bpf_netns_link *pos;
+       int i = 0;
+
+       list_for_each_entry(pos, &net->bpf.links[type], node) {
+               if (pos == link)
+                       return i;
+               i++;
+       }
+       return -ENOENT;
+}
+
+static int link_count(struct net *net, enum netns_bpf_attach_type type)
+{
+       struct list_head *pos;
+       int i = 0;
+
+       list_for_each(pos, &net->bpf.links[type])
+               i++;
+       return i;
+}
+
+static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
+                           struct bpf_prog_array *prog_array)
+{
+       struct bpf_netns_link *pos;
+       unsigned int i = 0;
+
+       list_for_each_entry(pos, &net->bpf.links[type], node) {
+               prog_array->items[i].prog = pos->link.prog;
+               i++;
+       }
+}
+
  static void bpf_netns_link_release(struct bpf_link *link)
  {
         struct bpf_netns_link *net_link =
                 container_of(link, struct bpf_netns_link, link);
         enum netns_bpf_attach_type type = net_link->netns_type;
+       struct bpf_prog_array *old_array, *new_array;
         struct net *net;
+       int cnt, idx;
  
         mutex_lock(&netns_bpf_mutex);
  
@@ -53,9 +117,30 @@ static void bpf_netns_link_release(struct bpf_link *link)
         if (!net)
                 goto out_unlock;
  
-       netns_bpf_run_array_detach(net, type);
+       /* Mark attach point as unused */
+       netns_bpf_attach_type_unneed(type);
+
+       /* Remember link position in case of safe delete */
+       idx = link_index(net, type, net_link);
         list_del(&net_link->node);
  
+       cnt = link_count(net, type);
+       if (!cnt) {
+               netns_bpf_run_array_detach(net, type);
+               goto out_unlock;
+       }
+
+       old_array = rcu_dereference_protected(net->bpf.run_array[type],
+                                             lockdep_is_held(&netns_bpf_mutex));
+       new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
+       if (!new_array) {
+               WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
+               goto out_unlock;
+       }
+       fill_prog_array(net, type, new_array);
+       rcu_assign_pointer(net->bpf.run_array[type], new_array);
+       bpf_prog_array_free(old_array);
+
  out_unlock:
         mutex_unlock(&netns_bpf_mutex);
  }
@@ -77,7 +162,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
         enum netns_bpf_attach_type type = net_link->netns_type;
         struct bpf_prog_array *run_array;
         struct net *net;
-       int ret = 0;
+       int idx, ret;
  
         if (old_prog && old_prog != link->prog)
                 return -EPERM;
@@ -95,7 +180,10 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
  
         run_array = rcu_dereference_protected(net->bpf.run_array[type],
                                               lockdep_is_held(&netns_bpf_mutex));
-       WRITE_ONCE(run_array->items[0].prog, new_prog);
+       idx = link_index(net, type, net_link);
+       ret = bpf_prog_array_update_at(run_array, idx, new_prog);
+       if (ret)
+               goto out_unlock;
  
         old_prog = xchg(&link->prog, new_prog);
         bpf_prog_put(old_prog);
@@ -309,18 +397,30 @@ int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
         return ret;
  }
  
+static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
+{
+       switch (type) {
+       case NETNS_BPF_FLOW_DISSECTOR:
+               return 1;
+       case NETNS_BPF_SK_LOOKUP:
+               return 64;
+       default:
+               return 0;
+       }
+}
+
  static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
                                  enum netns_bpf_attach_type type)
  {
         struct bpf_netns_link *net_link =
                 container_of(link, struct bpf_netns_link, link);
         struct bpf_prog_array *run_array;
-       int err;
+       int cnt, err;
  
         mutex_lock(&netns_bpf_mutex);
  
-       /* Allow attaching only one prog or link for now */
-       if (!list_empty(&net->bpf.links[type])) {
+       cnt = link_count(net, type);
+       if (cnt >= netns_bpf_max_progs(type)) {
                 err = -E2BIG;
                 goto out_unlock;
         }
@@ -334,6 +434,9 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
         case NETNS_BPF_FLOW_DISSECTOR:
                 err = flow_dissector_bpf_prog_attach_check(net, link->prog);
                 break;
+       case NETNS_BPF_SK_LOOKUP:
+               err = 0; /* nothing to check */
+               break;
         default:
                 err = -EINVAL;
                 break;
@@ -341,16 +444,22 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
         if (err)
                 goto out_unlock;
  
-       run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
+       run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
         if (!run_array) {
                 err = -ENOMEM;
                 goto out_unlock;
         }
-       run_array->items[0].prog = link->prog;
-       rcu_assign_pointer(net->bpf.run_array[type], run_array);
  
         list_add_tail(&net_link->node, &net->bpf.links[type]);
  
+       fill_prog_array(net, type, run_array);
+       run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
+                                       lockdep_is_held(&netns_bpf_mutex));
+       bpf_prog_array_free(run_array);
+
+       /* Mark attach point as used */
+       netns_bpf_attach_type_need(type);
+
  out_unlock:
         mutex_unlock(&netns_bpf_mutex);
         return err;
@@ -426,8 +535,10 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
         mutex_lock(&netns_bpf_mutex);
         for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
                 netns_bpf_run_array_detach(net, type);
-               list_for_each_entry(net_link, &net->bpf.links[type], node)
+               list_for_each_entry(net_link, &net->bpf.links[type], node) {
                         net_link->net = NULL; /* auto-detach link */
+                       netns_bpf_attach_type_unneed(type);
+               }
                 if (net->bpf.progs[type])
                         bpf_prog_put(net->bpf.progs[type]);
         }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 7ea9dfb..d07417d 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2022,6 +2022,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                 default:
                         return -EINVAL;
                 }
+       case BPF_PROG_TYPE_SK_LOOKUP:
+               if (expected_attach_type == BPF_SK_LOOKUP)
+                       return 0;
+               return -EINVAL;
         case BPF_PROG_TYPE_EXT:
                 if (expected_attach_type)
                         return -EINVAL;
@@ -2756,6 +2760,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
         case BPF_PROG_TYPE_CGROUP_SOCK:
         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+       case BPF_PROG_TYPE_SK_LOOKUP:
                 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
         case BPF_PROG_TYPE_CGROUP_SKB:
                 if (!capable(CAP_NET_ADMIN))
@@ -2817,6 +2822,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
                 return BPF_PROG_TYPE_CGROUP_SOCKOPT;
         case BPF_TRACE_ITER:
                 return BPF_PROG_TYPE_TRACING;
+       case BPF_SK_LOOKUP:
+               return BPF_PROG_TYPE_SK_LOOKUP;
         default:
                 return BPF_PROG_TYPE_UNSPEC;
         }
@@ -2953,6 +2960,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
         case BPF_LIRC_MODE2:
                 return lirc_prog_query(attr, uattr);
         case BPF_FLOW_DISSECTOR:
+       case BPF_SK_LOOKUP:
                 return netns_bpf_prog_query(attr, uattr);
         default:
                 return -EINVAL;
@@ -3891,6 +3899,7 @@ static int link_create(union bpf_attr *attr)
                 ret = tracing_bpf_link_attach(attr, prog);
                 break;
         case BPF_PROG_TYPE_FLOW_DISSECTOR:
+       case BPF_PROG_TYPE_SK_LOOKUP:
                 ret = netns_bpf_link_create(attr, prog);
                 break;
         default:
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c

index 4dbf2b6..2feecf0 100644 (file)
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -7,6 +7,7 @@
  #include <linux/fs.h>
  #include <linux/fdtable.h>
  #include <linux/filter.h>
+#include <linux/btf_ids.h>
  
  struct bpf_iter_seq_task_common {
         struct pid_namespace *ns;
@@ -312,7 +313,11 @@ static const struct seq_operations task_file_seq_ops = {
         .show   = task_file_seq_show,
  };
  
-static const struct bpf_iter_reg task_reg_info = {
+BTF_ID_LIST(btf_task_file_ids)
+BTF_ID(struct, task_struct)
+BTF_ID(struct, file)
+
+static struct bpf_iter_reg task_reg_info = {
         .target                 = "task",
         .seq_ops                = &task_seq_ops,
         .init_seq_private       = init_seq_pidns,
@@ -325,7 +330,7 @@ static const struct bpf_iter_reg task_reg_info = {
         },
  };
  
-static const struct bpf_iter_reg task_file_reg_info = {
+static struct bpf_iter_reg task_file_reg_info = {
         .target                 = "task_file",
         .seq_ops                = &task_file_seq_ops,
         .init_seq_private       = init_seq_pidns,
@@ -344,10 +349,13 @@ static int __init task_iter_init(void)
  {
         int ret;
  
+       task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
         ret = bpf_iter_reg_target(&task_reg_info);
         if (ret)
                 return ret;
  
+       task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+       task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
         return bpf_iter_reg_target(&task_file_reg_info);
  }
  late_initcall(task_iter_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 3c1efc9..9a6703b 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3878,10 +3878,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
                         }
                         meta->ref_obj_id = reg->ref_obj_id;
                 }
-       } else if (arg_type == ARG_PTR_TO_SOCKET) {
+       } else if (arg_type == ARG_PTR_TO_SOCKET ||
+                  arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
                 expected_type = PTR_TO_SOCKET;
-               if (type != expected_type)
-                       goto err_type;
+               if (!(register_is_null(reg) &&
+                     arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
+                       if (type != expected_type)
+                               goto err_type;
+               }
         } else if (arg_type == ARG_PTR_TO_BTF_ID) {
                 expected_type = PTR_TO_BTF_ID;
                 if (type != expected_type)
@@ -7354,6 +7358,9 @@ static int check_return_code(struct bpf_verifier_env *env)
                         return -ENOTSUPP;
                 }
                 break;
+       case BPF_PROG_TYPE_SK_LOOKUP:
+               range = tnum_range(SK_DROP, SK_PASS);
+               break;
         case BPF_PROG_TYPE_EXT:
                 /* freplace program can return anything as its return value
                  * depends on the to-be-replaced kernel func or bpf program.
diff --git a/lib/test_bpf.c b/lib/test_bpf.c

index a5fddf9..ca7d635 100644 (file)
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5275,31 +5275,21 @@ static struct bpf_test tests[] = {
         {       /* Mainly checking JIT here. */
                 "BPF_MAXINSNS: Ctx heavy transformations",
                 { },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-               CLASSIC | FLAG_EXPECTED_FAIL,
-#else
                 CLASSIC,
-#endif
                 { },
                 {
                         {  1, SKB_VLAN_PRESENT },
                         { 10, SKB_VLAN_PRESENT }
                 },
                 .fill_helper = bpf_fill_maxinsns6,
-               .expected_errcode = -ENOTSUPP,
         },
         {       /* Mainly checking JIT here. */
                 "BPF_MAXINSNS: Call heavy transformations",
                 { },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-               CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
-#else
                 CLASSIC | FLAG_NO_DATA,
-#endif
                 { },
                 { { 1, 0 }, { 10, 0 } },
                 .fill_helper = bpf_fill_maxinsns7,
-               .expected_errcode = -ENOTSUPP,
         },
         {       /* Mainly checking JIT here. */
                 "BPF_MAXINSNS: Jump heavy test",
@@ -5350,28 +5340,18 @@ static struct bpf_test tests[] = {
         {
                 "BPF_MAXINSNS: exec all MSH",
                 { },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-               CLASSIC | FLAG_EXPECTED_FAIL,
-#else
                 CLASSIC,
-#endif
                 { 0xfa, 0xfb, 0xfc, 0xfd, },
                 { { 4, 0xababab83 } },
                 .fill_helper = bpf_fill_maxinsns13,
-               .expected_errcode = -ENOTSUPP,
         },
         {
                 "BPF_MAXINSNS: ld_abs+get_processor_id",
                 { },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
-               CLASSIC | FLAG_EXPECTED_FAIL,
-#else
                 CLASSIC,
-#endif
                 { },
                 { { 1, 0xbee } },
                 .fill_helper = bpf_fill_ld_abs_get_processor_id,
-               .expected_errcode = -ENOTSUPP,
         },
         /*
          * LD_IND / LD_ABS on fragmented SKBs
diff --git a/net/core/dev.c b/net/core/dev.c

index 19f1abc..316349f 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5449,6 +5449,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
                 for (i = 0; i < new->aux->used_map_cnt; i++) {
                         if (dev_map_can_have_prog(new->aux->used_maps[i]))
                                 return -EINVAL;
+                       if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+                               return -EINVAL;
                 }
         }
  
@@ -8880,6 +8882,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                         return -EINVAL;
                 }
  
+               if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
+                       NL_SET_ERR_MSG(extack,
+                                      "BPF_XDP_CPUMAP programs can not be attached to a device");
+                       bpf_prog_put(prog);
+                       return -EINVAL;
+               }
+
                 /* prog->aux->id may be 0 for orphaned device-bound progs */
                 if (prog->aux->id && prog->aux->id == prog_id) {
                         bpf_prog_put(prog);
diff --git a/net/core/filter.c b/net/core/filter.c

index 2bf6624..3fa16b8 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9252,61 +9252,205 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
  
  const struct bpf_prog_ops sk_reuseport_prog_ops = {
  };
-#endif /* CONFIG_INET */
  
-DEFINE_BPF_DISPATCHER(xdp)
+DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
+EXPORT_SYMBOL(bpf_sk_lookup_enabled);
  
-void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
+          struct sock *, sk, u64, flags)
  {
-       bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
+       if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
+                              BPF_SK_LOOKUP_F_NO_REUSEPORT)))
+               return -EINVAL;
+       if (unlikely(sk && sk_is_refcounted(sk)))
+               return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
+       if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
+               return -ESOCKTNOSUPPORT; /* reject connected sockets */
+
+       /* Check if socket is suitable for packet L3/L4 protocol */
+       if (sk && sk->sk_protocol != ctx->protocol)
+               return -EPROTOTYPE;
+       if (sk && sk->sk_family != ctx->family &&
+           (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
+               return -EAFNOSUPPORT;
+
+       if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
+               return -EEXIST;
+
+       /* Select socket as lookup result */
+       ctx->selected_sk = sk;
+       ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
+       return 0;
  }
  
-/* Define a list of socket types which can be the argument for
- * skc_to_*_sock() helpers. All these sockets should have
- * sock_common as the first argument in its memory layout.
- */
-#define BTF_SOCK_TYPE_xxx \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock")                  \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock")  \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock")      \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock")      \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock")                \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock")                       \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common")         \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock")                    \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock")        \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock")        \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock")                  \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock")                    \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
-
-enum {
-#define BTF_SOCK_TYPE(name, str) name,
-BTF_SOCK_TYPE_xxx
-#undef BTF_SOCK_TYPE
-MAX_BTF_SOCK_TYPE,
+static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
+       .func           = bpf_sk_lookup_assign,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_SOCKET_OR_NULL,
+       .arg3_type      = ARG_ANYTHING,
  };
  
-static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
+static const struct bpf_func_proto *
+sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_perf_event_output:
+               return &bpf_event_output_data_proto;
+       case BPF_FUNC_sk_assign:
+               return &bpf_sk_lookup_assign_proto;
+       case BPF_FUNC_sk_release:
+               return &bpf_sk_release_proto;
+       default:
+               return bpf_base_func_proto(func_id);
+       }
+}
  
-#ifdef CONFIG_BPF_SYSCALL
-static const char *bpf_sock_types[] = {
-#define BTF_SOCK_TYPE(name, str) str,
-BTF_SOCK_TYPE_xxx
-#undef BTF_SOCK_TYPE
-};
+static bool sk_lookup_is_valid_access(int off, int size,
+                                     enum bpf_access_type type,
+                                     const struct bpf_prog *prog,
+                                     struct bpf_insn_access_aux *info)
+{
+       if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
+               return false;
+       if (off % size != 0)
+               return false;
+       if (type != BPF_READ)
+               return false;
+
+       switch (off) {
+       case offsetof(struct bpf_sk_lookup, sk):
+               info->reg_type = PTR_TO_SOCKET_OR_NULL;
+               return size == sizeof(__u64);
  
-void init_btf_sock_ids(struct btf *btf)
+       case bpf_ctx_range(struct bpf_sk_lookup, family):
+       case bpf_ctx_range(struct bpf_sk_lookup, protocol):
+       case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
+       case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+       case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+       case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+       case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+       case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+               bpf_ctx_record_field_size(info, sizeof(__u32));
+               return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+
+       default:
+               return false;
+       }
+}
+
+static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
+                                       const struct bpf_insn *si,
+                                       struct bpf_insn *insn_buf,
+                                       struct bpf_prog *prog,
+                                       u32 *target_size)
  {
-       int i, btf_id;
+       struct bpf_insn *insn = insn_buf;
+
+       switch (si->off) {
+       case offsetof(struct bpf_sk_lookup, sk):
+               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sk_lookup_kern, selected_sk));
+               break;
  
-       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
-               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
-                                              BTF_KIND_STRUCT);
-               if (btf_id > 0)
-                       btf_sock_ids[i] = btf_id;
+       case offsetof(struct bpf_sk_lookup, family):
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct bpf_sk_lookup_kern,
+                                                    family, 2, target_size));
+               break;
+
+       case offsetof(struct bpf_sk_lookup, protocol):
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct bpf_sk_lookup_kern,
+                                                    protocol, 2, target_size));
+               break;
+
+       case offsetof(struct bpf_sk_lookup, remote_ip4):
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct bpf_sk_lookup_kern,
+                                                    v4.saddr, 4, target_size));
+               break;
+
+       case offsetof(struct bpf_sk_lookup, local_ip4):
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct bpf_sk_lookup_kern,
+                                                    v4.daddr, 4, target_size));
+               break;
+
+       case bpf_ctx_range_till(struct bpf_sk_lookup,
+                               remote_ip6[0], remote_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+               int off = si->off;
+
+               off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
+               off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sk_lookup_kern, v6.saddr));
+               *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+               *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+               break;
+       }
+       case bpf_ctx_range_till(struct bpf_sk_lookup,
+                               local_ip6[0], local_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+               int off = si->off;
+
+               off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
+               off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sk_lookup_kern, v6.daddr));
+               *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+               *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+               break;
         }
+       case offsetof(struct bpf_sk_lookup, remote_port):
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct bpf_sk_lookup_kern,
+                                                    sport, 2, target_size));
+               break;
+
+       case offsetof(struct bpf_sk_lookup, local_port):
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct bpf_sk_lookup_kern,
+                                                    dport, 2, target_size));
+               break;
+       }
+
+       return insn - insn_buf;
  }
+
+const struct bpf_prog_ops sk_lookup_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+       .get_func_proto         = sk_lookup_func_proto,
+       .is_valid_access        = sk_lookup_is_valid_access,
+       .convert_ctx_access     = sk_lookup_convert_ctx_access,
+};
+
+#endif /* CONFIG_INET */
+
+DEFINE_BPF_DISPATCHER(xdp)
+
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+{
+       bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
+}
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+BTF_ID_LIST_GLOBAL(btf_sock_ids)
+#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+#else
+u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
  #endif
  
  static bool check_arg_btf_id(u32 btf_id, u32 arg)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c

index 2bbaaf0..4eb4cd8 100644 (file)
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net,
         return score;
  }
  
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+                                           struct sk_buff *skb, int doff,
+                                           __be32 saddr, __be16 sport,
+                                           __be32 daddr, unsigned short hnum)
+{
+       struct sock *reuse_sk = NULL;
+       u32 phash;
+
+       if (sk->sk_reuseport) {
+               phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+               reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+       }
+       return reuse_sk;
+}
+
  /*
   * Here are some nice properties to exploit here. The BSD API
   * does not allow a listening sock to specify the remote port nor the
@@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
         struct inet_connection_sock *icsk;
         struct sock *sk, *result = NULL;
         int score, hiscore = 0;
-       u32 phash = 0;
  
         inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
                 sk = (struct sock *)icsk;
                 score = compute_score(sk, net, hnum, daddr,
                                       dif, sdif, exact_dif);
                 if (score > hiscore) {
-                       if (sk->sk_reuseport) {
-                               phash = inet_ehashfn(net, daddr, hnum,
-                                                    saddr, sport);
-                               result = reuseport_select_sock(sk, phash,
-                                                              skb, doff);
-                               if (result)
-                                       return result;
-                       }
+                       result = lookup_reuseport(net, sk, skb, doff,
+                                                 saddr, sport, daddr, hnum);
+                       if (result)
+                               return result;
+
                         result = sk;
                         hiscore = score;
                 }
@@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net,
         return result;
  }
  
+static inline struct sock *inet_lookup_run_bpf(struct net *net,
+                                              struct inet_hashinfo *hashinfo,
+                                              struct sk_buff *skb, int doff,
+                                              __be32 saddr, __be16 sport,
+                                              __be32 daddr, u16 hnum)
+{
+       struct sock *sk, *reuse_sk;
+       bool no_reuseport;
+
+       if (hashinfo != &tcp_hashinfo)
+               return NULL; /* only TCP is supported */
+
+       no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
+                                           saddr, sport, daddr, hnum, &sk);
+       if (no_reuseport || IS_ERR_OR_NULL(sk))
+               return sk;
+
+       reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+       if (reuse_sk)
+               sk = reuse_sk;
+       return sk;
+}
+
  struct sock *__inet_lookup_listener(struct net *net,
                                     struct inet_hashinfo *hashinfo,
                                     struct sk_buff *skb, int doff,
@@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net,
         struct sock *result = NULL;
         unsigned int hash2;
  
+       /* Lookup redirect from BPF */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+               result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
+                                            saddr, sport, daddr, hnum);
+               if (result)
+                       goto done;
+       }
+
         hash2 = ipv4_portaddr_hash(net, daddr, hnum);
         ilb2 = inet_lhash2_bucket(hashinfo, hash2);
  
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index cd81b6e..daa39d3 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -76,6 +76,7 @@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  #include <linux/inetdevice.h>
+#include <linux/btf_ids.h>
  
  #include <crypto/hash.h>
  #include <linux/scatterlist.h>
@@ -2946,7 +2947,7 @@ static void bpf_iter_fini_tcp(void *priv_data)
         bpf_iter_fini_seq_net(priv_data);
  }
  
-static const struct bpf_iter_reg tcp_reg_info = {
+static struct bpf_iter_reg tcp_reg_info = {
         .target                 = "tcp",
         .seq_ops                = &bpf_iter_tcp_seq_ops,
         .init_seq_private       = bpf_iter_init_tcp,
@@ -2961,6 +2962,7 @@ static const struct bpf_iter_reg tcp_reg_info = {
  
  static void __init bpf_iter_register(void)
  {
+       tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON];
         if (bpf_iter_reg_target(&tcp_reg_info))
                 pr_warn("Warning: could not register bpf iterator tcp\n");
  }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index d4be447..bb95cdd 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,6 +106,7 @@
  #include <net/xfrm.h>
  #include <trace/events/udp.h>
  #include <linux/static_key.h>
+#include <linux/btf_ids.h>
  #include <trace/events/skb.h>
  #include <net/busy_poll.h>
  #include "udp_impl.h"
@@ -408,6 +409,25 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
                               udp_ehash_secret + net_hash_mix(net));
  }
  
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+                                           struct sk_buff *skb,
+                                           __be32 saddr, __be16 sport,
+                                           __be32 daddr, unsigned short hnum)
+{
+       struct sock *reuse_sk = NULL;
+       u32 hash;
+
+       if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+               hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
+               reuse_sk = reuseport_select_sock(sk, hash, skb,
+                                                sizeof(struct udphdr));
+               /* Fall back to scoring if group has connections */
+               if (reuseport_has_conns(sk, false))
+                       return NULL;
+       }
+       return reuse_sk;
+}
+
  /* called with rcu_read_lock() */
  static struct sock *udp4_lib_lookup2(struct net *net,
                                      __be32 saddr, __be16 sport,
@@ -418,7 +438,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
  {
         struct sock *sk, *result;
         int score, badness;
-       u32 hash = 0;
  
         result = NULL;
         badness = 0;
@@ -426,15 +445,11 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                 score = compute_score(sk, net, saddr, sport,
                                       daddr, hnum, dif, sdif);
                 if (score > badness) {
-                       if (sk->sk_reuseport &&
-                           sk->sk_state != TCP_ESTABLISHED) {
-                               hash = udp_ehashfn(net, daddr, hnum,
-                                                  saddr, sport);
-                               result = reuseport_select_sock(sk, hash, skb,
-                                                       sizeof(struct udphdr));
-                               if (result && !reuseport_has_conns(sk, false))
-                                       return result;
-                       }
+                       result = lookup_reuseport(net, sk, skb,
+                                                 saddr, sport, daddr, hnum);
+                       if (result)
+                               return result;
+
                         badness = score;
                         result = sk;
                 }
@@ -442,6 +457,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,
         return result;
  }
  
+static inline struct sock *udp4_lookup_run_bpf(struct net *net,
+                                              struct udp_table *udptable,
+                                              struct sk_buff *skb,
+                                              __be32 saddr, __be16 sport,
+                                              __be32 daddr, u16 hnum)
+{
+       struct sock *sk, *reuse_sk;
+       bool no_reuseport;
+
+       if (udptable != &udp_table)
+               return NULL; /* only UDP is supported */
+
+       no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
+                                           saddr, sport, daddr, hnum, &sk);
+       if (no_reuseport || IS_ERR_OR_NULL(sk))
+               return sk;
+
+       reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+       if (reuse_sk)
+               sk = reuse_sk;
+       return sk;
+}
+
  /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
   * harder than this. -DaveM
   */
@@ -449,27 +487,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
                 __be16 sport, __be32 daddr, __be16 dport, int dif,
                 int sdif, struct udp_table *udptable, struct sk_buff *skb)
  {
-       struct sock *result;
         unsigned short hnum = ntohs(dport);
         unsigned int hash2, slot2;
         struct udp_hslot *hslot2;
+       struct sock *result, *sk;
  
         hash2 = ipv4_portaddr_hash(net, daddr, hnum);
         slot2 = hash2 & udptable->mask;
         hslot2 = &udptable->hash2[slot2];
  
+       /* Lookup connected or non-wildcard socket */
         result = udp4_lib_lookup2(net, saddr, sport,
                                   daddr, hnum, dif, sdif,
                                   hslot2, skb);
-       if (!result) {
-               hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
-               slot2 = hash2 & udptable->mask;
-               hslot2 = &udptable->hash2[slot2];
-
-               result = udp4_lib_lookup2(net, saddr, sport,
-                                         htonl(INADDR_ANY), hnum, dif, sdif,
-                                         hslot2, skb);
+       if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+               goto done;
+
+       /* Lookup redirect from BPF */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+               sk = udp4_lookup_run_bpf(net, udptable, skb,
+                                        saddr, sport, daddr, hnum);
+               if (sk) {
+                       result = sk;
+                       goto done;
+               }
         }
+
+       /* Got non-wildcard socket or error on first lookup */
+       if (result)
+               goto done;
+
+       /* Lookup wildcard sockets */
+       hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+       slot2 = hash2 & udptable->mask;
+       hslot2 = &udptable->hash2[slot2];
+
+       result = udp4_lib_lookup2(net, saddr, sport,
+                                 htonl(INADDR_ANY), hnum, dif, sdif,
+                                 hslot2, skb);
+done:
         if (IS_ERR(result))
                 return NULL;
         return result;
@@ -3153,7 +3209,7 @@ static void bpf_iter_fini_udp(void *priv_data)
         bpf_iter_fini_seq_net(priv_data);
  }
  
-static const struct bpf_iter_reg udp_reg_info = {
+static struct bpf_iter_reg udp_reg_info = {
         .target                 = "udp",
         .seq_ops                = &bpf_iter_udp_seq_ops,
         .init_seq_private       = bpf_iter_init_udp,
@@ -3168,6 +3224,7 @@ static const struct bpf_iter_reg udp_reg_info = {
  
  static void __init bpf_iter_register(void)
  {
+       udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
         if (bpf_iter_reg_target(&udp_reg_info))
                 pr_warn("Warning: could not register bpf iterator udp\n");
  }
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c

index fbe9d42..2d3add9 100644 (file)
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,8 @@
  #include <net/ip.h>
  #include <net/sock_reuseport.h>
  
+extern struct inet_hashinfo tcp_hashinfo;
+
  u32 inet6_ehashfn(const struct net *net,
                   const struct in6_addr *laddr, const u16 lport,
                   const struct in6_addr *faddr, const __be16 fport)
@@ -111,6 +113,23 @@ static inline int compute_score(struct sock *sk, struct net *net,
         return score;
  }
  
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+                                           struct sk_buff *skb, int doff,
+                                           const struct in6_addr *saddr,
+                                           __be16 sport,
+                                           const struct in6_addr *daddr,
+                                           unsigned short hnum)
+{
+       struct sock *reuse_sk = NULL;
+       u32 phash;
+
+       if (sk->sk_reuseport) {
+               phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+               reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+       }
+       return reuse_sk;
+}
+
  /* called with rcu_read_lock() */
  static struct sock *inet6_lhash2_lookup(struct net *net,
                 struct inet_listen_hashbucket *ilb2,
@@ -123,21 +142,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
         struct inet_connection_sock *icsk;
         struct sock *sk, *result = NULL;
         int score, hiscore = 0;
-       u32 phash = 0;
  
         inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
                 sk = (struct sock *)icsk;
                 score = compute_score(sk, net, hnum, daddr, dif, sdif,
                                       exact_dif);
                 if (score > hiscore) {
-                       if (sk->sk_reuseport) {
-                               phash = inet6_ehashfn(net, daddr, hnum,
-                                                     saddr, sport);
-                               result = reuseport_select_sock(sk, phash,
-                                                              skb, doff);
-                               if (result)
-                                       return result;
-                       }
+                       result = lookup_reuseport(net, sk, skb, doff,
+                                                 saddr, sport, daddr, hnum);
+                       if (result)
+                               return result;
+
                         result = sk;
                         hiscore = score;
                 }
@@ -146,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
         return result;
  }
  
+static inline struct sock *inet6_lookup_run_bpf(struct net *net,
+                                               struct inet_hashinfo *hashinfo,
+                                               struct sk_buff *skb, int doff,
+                                               const struct in6_addr *saddr,
+                                               const __be16 sport,
+                                               const struct in6_addr *daddr,
+                                               const u16 hnum)
+{
+       struct sock *sk, *reuse_sk;
+       bool no_reuseport;
+
+       if (hashinfo != &tcp_hashinfo)
+               return NULL; /* only TCP is supported */
+
+       no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
+                                           saddr, sport, daddr, hnum, &sk);
+       if (no_reuseport || IS_ERR_OR_NULL(sk))
+               return sk;
+
+       reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+       if (reuse_sk)
+               sk = reuse_sk;
+       return sk;
+}
+
  struct sock *inet6_lookup_listener(struct net *net,
                 struct inet_hashinfo *hashinfo,
                 struct sk_buff *skb, int doff,
@@ -157,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
         struct sock *result = NULL;
         unsigned int hash2;
  
+       /* Lookup redirect from BPF */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+               result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
+                                             saddr, sport, daddr, hnum);
+               if (result)
+                       goto done;
+       }
+
         hash2 = ipv6_portaddr_hash(net, daddr, hnum);
         ilb2 = inet_lhash2_bucket(hashinfo, hash2);
  
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 427b81c..33f5efb 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
  #include <net/l3mdev.h>
  #include <net/ip.h>
  #include <linux/uaccess.h>
+#include <linux/btf_ids.h>
  
  #ifdef CONFIG_SYSCTL
  #include <linux/sysctl.h>
@@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void)
  #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
  DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
  
-static const struct bpf_iter_reg ipv6_route_reg_info = {
+BTF_ID_LIST(btf_fib6_info_id)
+BTF_ID(struct, fib6_info)
+
+static struct bpf_iter_reg ipv6_route_reg_info = {
         .target                 = "ipv6_route",
         .seq_ops                = &ipv6_route_seq_ops,
         .init_seq_private       = bpf_iter_init_seq_net,
@@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = {
  
  static int __init bpf_iter_register(void)
  {
+       ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
         return bpf_iter_reg_target(&ipv6_route_reg_info);
  }
  
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index 5aff085..7c1143f 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net,
         return score;
  }
  
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+                                           struct sk_buff *skb,
+                                           const struct in6_addr *saddr,
+                                           __be16 sport,
+                                           const struct in6_addr *daddr,
+                                           unsigned int hnum)
+{
+       struct sock *reuse_sk = NULL;
+       u32 hash;
+
+       if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+               hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+               reuse_sk = reuseport_select_sock(sk, hash, skb,
+                                                sizeof(struct udphdr));
+               /* Fall back to scoring if group has connections */
+               if (reuseport_has_conns(sk, false))
+                       return NULL;
+       }
+       return reuse_sk;
+}
+
  /* called with rcu_read_lock() */
  static struct sock *udp6_lib_lookup2(struct net *net,
                 const struct in6_addr *saddr, __be16 sport,
@@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
  {
         struct sock *sk, *result;
         int score, badness;
-       u32 hash = 0;
  
         result = NULL;
         badness = -1;
@@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net,
                 score = compute_score(sk, net, saddr, sport,
                                       daddr, hnum, dif, sdif);
                 if (score > badness) {
-                       if (sk->sk_reuseport &&
-                           sk->sk_state != TCP_ESTABLISHED) {
-                               hash = udp6_ehashfn(net, daddr, hnum,
-                                                   saddr, sport);
-
-                               result = reuseport_select_sock(sk, hash, skb,
-                                                       sizeof(struct udphdr));
-                               if (result && !reuseport_has_conns(sk, false))
-                                       return result;
-                       }
+                       result = lookup_reuseport(net, sk, skb,
+                                                 saddr, sport, daddr, hnum);
+                       if (result)
+                               return result;
+
                         result = sk;
                         badness = score;
                 }
@@ -175,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net,
         return result;
  }
  
+static inline struct sock *udp6_lookup_run_bpf(struct net *net,
+                                              struct udp_table *udptable,
+                                              struct sk_buff *skb,
+                                              const struct in6_addr *saddr,
+                                              __be16 sport,
+                                              const struct in6_addr *daddr,
+                                              u16 hnum)
+{
+       struct sock *sk, *reuse_sk;
+       bool no_reuseport;
+
+       if (udptable != &udp_table)
+               return NULL; /* only UDP is supported */
+
+       no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
+                                           saddr, sport, daddr, hnum, &sk);
+       if (no_reuseport || IS_ERR_OR_NULL(sk))
+               return sk;
+
+       reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+       if (reuse_sk)
+               sk = reuse_sk;
+       return sk;
+}
+
  /* rcu_read_lock() must be held */
  struct sock *__udp6_lib_lookup(struct net *net,
                                const struct in6_addr *saddr, __be16 sport,
@@ -185,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net,
         unsigned short hnum = ntohs(dport);
         unsigned int hash2, slot2;
         struct udp_hslot *hslot2;
-       struct sock *result;
+       struct sock *result, *sk;
  
         hash2 = ipv6_portaddr_hash(net, daddr, hnum);
         slot2 = hash2 & udptable->mask;
         hslot2 = &udptable->hash2[slot2];
  
+       /* Lookup connected or non-wildcard sockets */
         result = udp6_lib_lookup2(net, saddr, sport,
                                   daddr, hnum, dif, sdif,
                                   hslot2, skb);
-       if (!result) {
-               hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
-               slot2 = hash2 & udptable->mask;
+       if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+               goto done;
+
+       /* Lookup redirect from BPF */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+               sk = udp6_lookup_run_bpf(net, udptable, skb,
+                                        saddr, sport, daddr, hnum);
+               if (sk) {
+                       result = sk;
+                       goto done;
+               }
+       }
  
-               hslot2 = &udptable->hash2[slot2];
+       /* Got non-wildcard socket or error on first lookup */
+       if (result)
+               goto done;
  
-               result = udp6_lib_lookup2(net, saddr, sport,
-                                         &in6addr_any, hnum, dif, sdif,
-                                         hslot2, skb);
-       }
+       /* Lookup wildcard sockets */
+       hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+       slot2 = hash2 & udptable->mask;
+       hslot2 = &udptable->hash2[slot2];
+
+       result = udp6_lib_lookup2(net, saddr, sport,
+                                 &in6addr_any, hnum, dif, sdif,
+                                 hslot2, skb);
+done:
         if (IS_ERR(result))
                 return NULL;
         return result;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index 4f2c3b1..3cd58f0 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -60,6 +60,7 @@
  #include <linux/genetlink.h>
  #include <linux/net_namespace.h>
  #include <linux/nospec.h>
+#include <linux/btf_ids.h>
  
  #include <net/net_namespace.h>
  #include <net/netns/generic.h>
@@ -2803,7 +2804,10 @@ static const struct rhashtable_params netlink_rhashtable_params = {
  };
  
  #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
-static const struct bpf_iter_reg netlink_reg_info = {
+BTF_ID_LIST(btf_netlink_sock_id)
+BTF_ID(struct, netlink_sock)
+
+static struct bpf_iter_reg netlink_reg_info = {
         .target                 = "netlink",
         .seq_ops                = &netlink_seq_ops,
         .init_seq_private       = bpf_iter_init_seq_net,
@@ -2818,6 +2822,7 @@ static const struct bpf_iter_reg netlink_reg_info = {
  
  static int __init bpf_iter_register(void)
  {
+       netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
         return bpf_iter_reg_target(&netlink_reg_info);
  }
  #endif
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c

index d459f73..e74ee1c 100644 (file)
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -12,7 +12,12 @@
  #include <bpf/bpf_helpers.h>
  #include <bpf/bpf_tracing.h>
  
-#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P)                                                                   \
+       ({                                                                     \
+               typeof(P) val;                                                 \
+               bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
+               val;                                                           \
+       })
  
  #define MINBLOCK_US    1
  
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c

index 8b811c2..f6d593e 100644 (file)
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -10,7 +10,12 @@
  #include <bpf/bpf_helpers.h>
  #include <bpf/bpf_tracing.h>
  
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P)                                                                   \
+       ({                                                                     \
+               typeof(P) val = 0;                                             \
+               bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
+               val;                                                           \
+       })
  
  SEC("kprobe/__set_task_comm")
  int prog(struct pt_regs *ctx)
@@ -25,8 +30,9 @@ int prog(struct pt_regs *ctx)
         tsk = (void *)PT_REGS_PARM1(ctx);
  
         pid = _(tsk->pid);
-       bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
-       bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
+       bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
+       bpf_probe_read_kernel(newcomm, sizeof(newcomm),
+                             (void *)PT_REGS_PARM2(ctx));
         signal = _(tsk->signal);
         oom_score_adj = _(signal->oom_score_adj);
         return 0;
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c

index 8e2610e..3f4599c 100644 (file)
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -11,7 +11,12 @@
  #include <bpf/bpf_helpers.h>
  #include <bpf/bpf_tracing.h>
  
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P)                                                                   \
+       ({                                                                     \
+               typeof(P) val = 0;                                             \
+               bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
+               val;                                                           \
+       })
  
  /* kprobe is NOT a stable ABI
   * kernel functions can be removed, renamed or completely change semantics.
@@ -34,7 +39,7 @@ int bpf_prog1(struct pt_regs *ctx)
         dev = _(skb->dev);
         len = _(skb->len);
  
-       bpf_probe_read(devname, sizeof(devname), dev->name);
+       bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
  
         if (devname[0] == 'l' && devname[1] == 'o') {
                 char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c

index 32b49e8..64a1f75 100644 (file)
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx)
  {
         struct seccomp_data sd;
  
-       bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+       bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
         if (sd.args[2] == 512) {
                 char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
                 bpf_trace_printk(fmt, sizeof(fmt),
@@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
  {
         struct seccomp_data sd;
  
-       bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+       bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
         if (sd.args[2] > 128 && sd.args[2] <= 1024) {
                 char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
                 bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c

index 2baf8db..8255025 100644 (file)
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -21,7 +21,7 @@
  struct {
         __uint(type, BPF_MAP_TYPE_CPUMAP);
         __uint(key_size, sizeof(u32));
-       __uint(value_size, sizeof(u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
         __uint(max_entries, MAX_CPUS);
  } cpu_map SEC(".maps");
  
@@ -30,6 +30,9 @@ struct datarec {
         __u64 processed;
         __u64 dropped;
         __u64 issue;
+       __u64 xdp_pass;
+       __u64 xdp_drop;
+       __u64 xdp_redirect;
  };
  
  /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
@@ -692,13 +695,16 @@ int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
   * Code in:         kernel/include/trace/events/xdp.h
   */
  struct cpumap_kthread_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int map_id;             //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12; size:4; signed:0;
-       int cpu;                //      offset:16; size:4; signed:1;
-       unsigned int drops;     //      offset:20; size:4; signed:0;
-       unsigned int processed; //      offset:24; size:4; signed:0;
-       int sched;              //      offset:28; size:4; signed:1;
+       u64 __pad;                      // First 8 bytes are not accessible
+       int map_id;                     //      offset:8;  size:4; signed:1;
+       u32 act;                        //      offset:12; size:4; signed:0;
+       int cpu;                        //      offset:16; size:4; signed:1;
+       unsigned int drops;             //      offset:20; size:4; signed:0;
+       unsigned int processed;         //      offset:24; size:4; signed:0;
+       int sched;                      //      offset:28; size:4; signed:1;
+       unsigned int xdp_pass;          //      offset:32; size:4; signed:0;
+       unsigned int xdp_drop;          //      offset:36; size:4; signed:0;
+       unsigned int xdp_redirect;      //      offset:40; size:4; signed:0;
  };
  
  SEC("tracepoint/xdp/xdp_cpumap_kthread")
@@ -712,6 +718,9 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
                 return 0;
         rec->processed += ctx->processed;
         rec->dropped   += ctx->drops;
+       rec->xdp_pass  += ctx->xdp_pass;
+       rec->xdp_drop  += ctx->xdp_drop;
+       rec->xdp_redirect  += ctx->xdp_redirect;
  
         /* Count times kthread yielded CPU via schedule call */
         if (ctx->sched)
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c

index f4e755e..004c062 100644 (file)
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -70,6 +70,11 @@ static const struct option long_options[] = {
         {"stress-mode", no_argument,            NULL, 'x' },
         {"no-separators", no_argument,          NULL, 'z' },
         {"force",       no_argument,            NULL, 'F' },
+       {"mprog-disable", no_argument,          NULL, 'n' },
+       {"mprog-name",  required_argument,      NULL, 'e' },
+       {"mprog-filename", required_argument,   NULL, 'f' },
+       {"redirect-device", required_argument,  NULL, 'r' },
+       {"redirect-map", required_argument,     NULL, 'm' },
         {0, 0, NULL,  0 }
  };
  
@@ -156,6 +161,9 @@ struct datarec {
         __u64 processed;
         __u64 dropped;
         __u64 issue;
+       __u64 xdp_pass;
+       __u64 xdp_drop;
+       __u64 xdp_redirect;
  };
  struct record {
         __u64 timestamp;
@@ -175,6 +183,9 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
         /* For percpu maps, userspace gets a value per possible CPU */
         unsigned int nr_cpus = bpf_num_possible_cpus();
         struct datarec values[nr_cpus];
+       __u64 sum_xdp_redirect = 0;
+       __u64 sum_xdp_pass = 0;
+       __u64 sum_xdp_drop = 0;
         __u64 sum_processed = 0;
         __u64 sum_dropped = 0;
         __u64 sum_issue = 0;
@@ -196,10 +207,19 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
                 sum_dropped        += values[i].dropped;
                 rec->cpu[i].issue = values[i].issue;
                 sum_issue        += values[i].issue;
+               rec->cpu[i].xdp_pass = values[i].xdp_pass;
+               sum_xdp_pass += values[i].xdp_pass;
+               rec->cpu[i].xdp_drop = values[i].xdp_drop;
+               sum_xdp_drop += values[i].xdp_drop;
+               rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
+               sum_xdp_redirect += values[i].xdp_redirect;
         }
         rec->total.processed = sum_processed;
         rec->total.dropped   = sum_dropped;
         rec->total.issue     = sum_issue;
+       rec->total.xdp_pass  = sum_xdp_pass;
+       rec->total.xdp_drop  = sum_xdp_drop;
+       rec->total.xdp_redirect = sum_xdp_redirect;
         return true;
  }
  
@@ -300,17 +320,33 @@ static __u64 calc_errs_pps(struct datarec *r,
         return pps;
  }
  
+static void calc_xdp_pps(struct datarec *r, struct datarec *p,
+                        double *xdp_pass, double *xdp_drop,
+                        double *xdp_redirect, double period_)
+{
+       *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+       if (period_ > 0) {
+               *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+               *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+               *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+       }
+}
+
  static void stats_print(struct stats_record *stats_rec,
                         struct stats_record *stats_prev,
-                       char *prog_name)
+                       char *prog_name, char *mprog_name, int mprog_fd)
  {
         unsigned int nr_cpus = bpf_num_possible_cpus();
         double pps = 0, drop = 0, err = 0;
+       bool mprog_enabled = false;
         struct record *rec, *prev;
         int to_cpu;
         double t;
         int i;
  
+       if (mprog_fd > 0)
+               mprog_enabled = true;
+
         /* Header */
         printf("Running XDP/eBPF prog_name:%s\n", prog_name);
         printf("%-15s %-7s %-14s %-11s %-9s\n",
@@ -455,6 +491,34 @@ static void stats_print(struct stats_record *stats_rec,
                 printf(fm2_err, "xdp_exception", "total", pps, drop);
         }
  
+       /* CPUMAP attached XDP program that runs on remote/destination CPU */
+       if (mprog_enabled) {
+               char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
+               char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
+               double xdp_pass, xdp_drop, xdp_redirect;
+
+               printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
+               printf("%-15s %-7s %-14s %-11s %-9s\n",
+                      "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
+
+               rec  = &stats_rec->kthread;
+               prev = &stats_prev->kthread;
+               t = calc_period(rec, prev);
+               for (i = 0; i < nr_cpus; i++) {
+                       struct datarec *r = &rec->cpu[i];
+                       struct datarec *p = &prev->cpu[i];
+
+                       calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
+                                    &xdp_redirect, t);
+                       if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
+                               printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
+                                      xdp_redirect);
+               }
+               calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+                            &xdp_redirect, t);
+               printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
+       }
+
         printf("\n");
         fflush(stdout);
  }
@@ -491,7 +555,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
         *b = tmp;
  }
  
-static int create_cpu_entry(__u32 cpu, __u32 queue_size,
+static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
                             __u32 avail_idx, bool new)
  {
         __u32 curr_cpus_count = 0;
@@ -501,7 +565,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
         /* Add a CPU entry to cpumap, as this allocate a cpu entry in
          * the kernel for the cpu.
          */
-       ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
+       ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0);
         if (ret) {
                 fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
                 exit(EXIT_FAIL_BPF);
@@ -532,9 +596,9 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
                 }
         }
         /* map_fd[7] = cpus_iterator */
-       printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
+       printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
                new ? "Add-new":"Replace", cpu, avail_idx,
-              queue_size, curr_cpus_count);
+              value->qsize, value->bpf_prog.fd, curr_cpus_count);
  
         return 0;
  }
@@ -558,21 +622,26 @@ static void mark_cpus_unavailable(void)
  }
  
  /* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(void)
+static void stress_cpumap(struct bpf_cpumap_val *value)
  {
         /* Changing qsize will cause kernel to free and alloc a new
          * bpf_cpu_map_entry, with an associated/complicated tear-down
          * procedure.
          */
-       create_cpu_entry(1,  1024, 0, false);
-       create_cpu_entry(1,     8, 0, false);
-       create_cpu_entry(1, 16000, 0, false);
+       value->qsize = 1024;
+       create_cpu_entry(1, value, 0, false);
+       value->qsize = 8;
+       create_cpu_entry(1, value, 0, false);
+       value->qsize = 16000;
+       create_cpu_entry(1, value, 0, false);
  }
  
  static void stats_poll(int interval, bool use_separators, char *prog_name,
+                      char *mprog_name, struct bpf_cpumap_val *value,
                        bool stress_mode)
  {
         struct stats_record *record, *prev;
+       int mprog_fd;
  
         record = alloc_stats_record();
         prev   = alloc_stats_record();
@@ -584,11 +653,12 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
  
         while (1) {
                 swap(&prev, &record);
+               mprog_fd = value->bpf_prog.fd;
                 stats_collect(record);
-               stats_print(record, prev, prog_name);
+               stats_print(record, prev, prog_name, mprog_name, mprog_fd);
                 sleep(interval);
                 if (stress_mode)
-                       stress_cpumap();
+                       stress_cpumap(value);
         }
  
         free_stats_record(record);
@@ -661,15 +731,66 @@ static int init_map_fds(struct bpf_object *obj)
         return 0;
  }
  
+static int load_cpumap_prog(char *file_name, char *prog_name,
+                           char *redir_interface, char *redir_map)
+{
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type              = BPF_PROG_TYPE_XDP,
+               .expected_attach_type   = BPF_XDP_CPUMAP,
+               .file = file_name,
+       };
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int fd;
+
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
+               return -1;
+
+       if (fd < 0) {
+               fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+                       strerror(errno));
+               return fd;
+       }
+
+       if (redir_interface && redir_map) {
+               int err, map_fd, ifindex_out, key = 0;
+
+               map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+               if (map_fd < 0)
+                       return map_fd;
+
+               ifindex_out = if_nametoindex(redir_interface);
+               if (!ifindex_out)
+                       return -1;
+
+               err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
+               if (err < 0)
+                       return err;
+       }
+
+       prog = bpf_object__find_program_by_title(obj, prog_name);
+       if (!prog) {
+               fprintf(stderr, "bpf_object__find_program_by_title failed\n");
+               return EXIT_FAIL;
+       }
+
+       return bpf_program__fd(prog);
+}
+
  int main(int argc, char **argv)
  {
         struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
         char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
+       char *mprog_filename = "xdp_redirect_kern.o";
+       char *redir_interface = NULL, *redir_map = NULL;
+       char *mprog_name = "xdp_redirect_dummy";
+       bool mprog_disable = false;
         struct bpf_prog_load_attr prog_load_attr = {
                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
         };
         struct bpf_prog_info info = {};
         __u32 info_len = sizeof(info);
+       struct bpf_cpumap_val value;
         bool use_separators = true;
         bool stress_mode = false;
         struct bpf_program *prog;
@@ -681,6 +802,7 @@ int main(int argc, char **argv)
         int add_cpu = -1;
         int opt, err;
         int prog_fd;
+       int *cpu, i;
         __u32 qsize;
  
         n_cpus = get_nprocs_conf();
@@ -716,8 +838,15 @@ int main(int argc, char **argv)
         }
         mark_cpus_unavailable();
  
+       cpu = malloc(n_cpus * sizeof(int));
+       if (!cpu) {
+               fprintf(stderr, "failed to allocate cpu array\n");
+               return EXIT_FAIL;
+       }
+       memset(cpu, 0, n_cpus * sizeof(int));
+
         /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
+       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
                                   long_options, &longindex)) != -1) {
                 switch (opt) {
                 case 'd':
@@ -751,6 +880,21 @@ int main(int argc, char **argv)
                         /* Selecting eBPF prog to load */
                         prog_name = optarg;
                         break;
+               case 'n':
+                       mprog_disable = true;
+                       break;
+               case 'f':
+                       mprog_filename = optarg;
+                       break;
+               case 'e':
+                       mprog_name = optarg;
+                       break;
+               case 'r':
+                       redir_interface = optarg;
+                       break;
+               case 'm':
+                       redir_map = optarg;
+                       break;
                 case 'c':
                         /* Add multiple CPUs */
                         add_cpu = strtoul(optarg, NULL, 0);
@@ -760,8 +904,7 @@ int main(int argc, char **argv)
                                         errno, strerror(errno));
                                 goto error;
                         }
-                       create_cpu_entry(add_cpu, qsize, added_cpus, true);
-                       added_cpus++;
+                       cpu[added_cpus++] = add_cpu;
                         break;
                 case 'q':
                         qsize = atoi(optarg);
@@ -772,6 +915,7 @@ int main(int argc, char **argv)
                 case 'h':
                 error:
                 default:
+                       free(cpu);
                         usage(argv, obj);
                         return EXIT_FAIL_OPTION;
                 }
@@ -784,15 +928,30 @@ int main(int argc, char **argv)
         if (ifindex == -1) {
                 fprintf(stderr, "ERR: required option --dev missing\n");
                 usage(argv, obj);
-               return EXIT_FAIL_OPTION;
+               err = EXIT_FAIL_OPTION;
+               goto out;
         }
         /* Required option */
         if (add_cpu == -1) {
                 fprintf(stderr, "ERR: required option --cpu missing\n");
                 fprintf(stderr, " Specify multiple --cpu option to add more\n");
                 usage(argv, obj);
-               return EXIT_FAIL_OPTION;
+               err = EXIT_FAIL_OPTION;
+               goto out;
+       }
+
+       value.bpf_prog.fd = 0;
+       if (!mprog_disable)
+               value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
+                                                    redir_interface, redir_map);
+       if (value.bpf_prog.fd < 0) {
+               err = value.bpf_prog.fd;
+               goto out;
         }
+       value.qsize = qsize;
+
+       for (i = 0; i < added_cpus; i++)
+               create_cpu_entry(cpu[i], &value, i, true);
  
         /* Remove XDP program when program is interrupted or killed */
         signal(SIGINT, int_exit);
@@ -801,27 +960,33 @@ int main(int argc, char **argv)
         prog = bpf_object__find_program_by_title(obj, prog_name);
         if (!prog) {
                 fprintf(stderr, "bpf_object__find_program_by_title failed\n");
-               return EXIT_FAIL;
+               err = EXIT_FAIL;
+               goto out;
         }
  
         prog_fd = bpf_program__fd(prog);
         if (prog_fd < 0) {
                 fprintf(stderr, "bpf_program__fd failed\n");
-               return EXIT_FAIL;
+               err = EXIT_FAIL;
+               goto out;
         }
  
         if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
                 fprintf(stderr, "link set xdp fd failed\n");
-               return EXIT_FAIL_XDP;
+               err = EXIT_FAIL_XDP;
+               goto out;
         }
  
         err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
         if (err) {
                 printf("can't get prog info - %s\n", strerror(errno));
-               return err;
+               goto out;
         }
         prog_id = info.id;
  
-       stats_poll(interval, use_separators, prog_name, stress_mode);
-       return EXIT_OK;
+       stats_poll(interval, use_separators, prog_name, mprog_name,
+                  &value, stress_mode);
+out:
+       free(cpu);
+       return err;
  }
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py

index 6843376..5bfa448 100755 (executable)
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -404,6 +404,7 @@ class PrinterHelpers(Printer):
  
      type_fwds = [
              'struct bpf_fib_lookup',
+            'struct bpf_sk_lookup',
              'struct bpf_perf_event_data',
              'struct bpf_perf_event_value',
              'struct bpf_pidns_info',
@@ -450,6 +451,7 @@ class PrinterHelpers(Printer):
              'struct bpf_perf_event_data',
              'struct bpf_perf_event_value',
              'struct bpf_pidns_info',
+            'struct bpf_sk_lookup',
              'struct bpf_sock',
              'struct bpf_sock_addr',
              'struct bpf_sock_ops',
@@ -487,6 +489,11 @@ class PrinterHelpers(Printer):
              'struct sk_msg_buff': 'struct sk_msg_md',
              'struct xdp_buff': 'struct xdp_md',
      }
+    # Helpers overloaded for different context types.
+    overloaded_helpers = [
+        'bpf_get_socket_cookie',
+        'bpf_sk_assign',
+    ]
  
      def print_header(self):
          header = '''\
@@ -543,7 +550,7 @@ class PrinterHelpers(Printer):
          for i, a in enumerate(proto['args']):
              t = a['type']
              n = a['name']
-            if proto['name'] == 'bpf_get_socket_cookie' and i == 0:
+            if proto['name'] in self.overloaded_helpers and i == 0:
                      t = 'void'
                      n = 'ctx'
              one_arg = '{}{}'.format(comma, self.map_type(t))
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst

index 412ea3d..82e356b 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -45,7 +45,7 @@ PROG COMMANDS
  |               **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
  |              **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
  |              **cgroup/getsockopt** | **cgroup/setsockopt** |
-|              **struct_ops** | **fentry** | **fexit** | **freplace**
+|              **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
  |      }
  |       *ATTACH_TYPE* := {
  |              **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool

index 25b25ac..7b13726 100644 (file)
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -479,7 +479,7 @@ _bpftool()
                                  cgroup/post_bind4 cgroup/post_bind6 \
                                  cgroup/sysctl cgroup/getsockopt \
                                  cgroup/setsockopt struct_ops \
-                                fentry fexit freplace" -- \
+                                fentry fexit freplace sk_lookup" -- \
                                                     "$cur" ) )
                              return 0
                              ;;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c

index 29f4e76..6530366 100644 (file)
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,10 +1,11 @@
  // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
  /* Copyright (C) 2017-2018 Netronome Systems, Inc. */
  
+#define _GNU_SOURCE
  #include <ctype.h>
  #include <errno.h>
  #include <fcntl.h>
-#include <fts.h>
+#include <ftw.h>
  #include <libgen.h>
  #include <mntent.h>
  #include <stdbool.h>
@@ -64,6 +65,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
         [BPF_TRACE_FEXIT]               = "fexit",
         [BPF_MODIFY_RETURN]             = "mod_ret",
         [BPF_LSM_MAC]                   = "lsm_mac",
+       [BPF_SK_LOOKUP]                 = "sk_lookup",
  };
  
  void p_err(const char *fmt, ...)
@@ -160,24 +162,35 @@ int mount_tracefs(const char *target)
         return err;
  }
  
-int open_obj_pinned(char *path, bool quiet)
+int open_obj_pinned(const char *path, bool quiet)
  {
-       int fd;
+       char *pname;
+       int fd = -1;
+
+       pname = strdup(path);
+       if (!pname) {
+               if (!quiet)
+                       p_err("mem alloc failed");
+               goto out_ret;
+       }
  
-       fd = bpf_obj_get(path);
+       fd = bpf_obj_get(pname);
         if (fd < 0) {
                 if (!quiet)
-                       p_err("bpf obj get (%s): %s", path,
-                             errno == EACCES && !is_bpffs(dirname(path)) ?
+                       p_err("bpf obj get (%s): %s", pname,
+                             errno == EACCES && !is_bpffs(dirname(pname)) ?
                             "directory not in bpf file system (bpffs)" :
                             strerror(errno));
-               return -1;
+               goto out_free;
         }
  
+out_free:
+       free(pname);
+out_ret:
         return fd;
  }
  
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
  {
         enum bpf_obj_type type;
         int fd;
@@ -367,71 +380,82 @@ void print_hex_data_json(uint8_t *data, size_t len)
         jsonw_end_array(json_wtr);
  }
  
+/* extra params for nftw cb */
+static struct pinned_obj_table *build_fn_table;
+static enum bpf_obj_type build_fn_type;
+
+static int do_build_table_cb(const char *fpath, const struct stat *sb,
+                            int typeflag, struct FTW *ftwbuf)
+{
+       struct bpf_prog_info pinned_info;
+       __u32 len = sizeof(pinned_info);
+       struct pinned_obj *obj_node;
+       enum bpf_obj_type objtype;
+       int fd, err = 0;
+
+       if (typeflag != FTW_F)
+               goto out_ret;
+
+       fd = open_obj_pinned(fpath, true);
+       if (fd < 0)
+               goto out_ret;
+
+       objtype = get_fd_type(fd);
+       if (objtype != build_fn_type)
+               goto out_close;
+
+       memset(&pinned_info, 0, sizeof(pinned_info));
+       if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+               goto out_close;
+
+       obj_node = calloc(1, sizeof(*obj_node));
+       if (!obj_node) {
+               err = -1;
+               goto out_close;
+       }
+
+       obj_node->id = pinned_info.id;
+       obj_node->path = strdup(fpath);
+       if (!obj_node->path) {
+               err = -1;
+               free(obj_node);
+               goto out_close;
+       }
+
+       hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
+out_close:
+       close(fd);
+out_ret:
+       return err;
+}
+
  int build_pinned_obj_table(struct pinned_obj_table *tab,
                            enum bpf_obj_type type)
  {
-       struct bpf_prog_info pinned_info = {};
-       struct pinned_obj *obj_node = NULL;
-       __u32 len = sizeof(pinned_info);
         struct mntent *mntent = NULL;
-       enum bpf_obj_type objtype;
         FILE *mntfile = NULL;
-       FTSENT *ftse = NULL;
-       FTS *fts = NULL;
-       int fd, err;
+       int flags = FTW_PHYS;
+       int nopenfd = 16;
+       int err = 0;
  
         mntfile = setmntent("/proc/mounts", "r");
         if (!mntfile)
                 return -1;
  
+       build_fn_table = tab;
+       build_fn_type = type;
+
         while ((mntent = getmntent(mntfile))) {
-               char *path[] = { mntent->mnt_dir, NULL };
+               char *path = mntent->mnt_dir;
  
                 if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
                         continue;
-
-               fts = fts_open(path, 0, NULL);
-               if (!fts)
-                       continue;
-
-               while ((ftse = fts_read(fts))) {
-                       if (!(ftse->fts_info & FTS_F))
-                               continue;
-                       fd = open_obj_pinned(ftse->fts_path, true);
-                       if (fd < 0)
-                               continue;
-
-                       objtype = get_fd_type(fd);
-                       if (objtype != type) {
-                               close(fd);
-                               continue;
-                       }
-                       memset(&pinned_info, 0, sizeof(pinned_info));
-                       err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
-                       if (err) {
-                               close(fd);
-                               continue;
-                       }
-
-                       obj_node = malloc(sizeof(*obj_node));
-                       if (!obj_node) {
-                               close(fd);
-                               fts_close(fts);
-                               fclose(mntfile);
-                               return -1;
-                       }
-
-                       memset(obj_node, 0, sizeof(*obj_node));
-                       obj_node->id = pinned_info.id;
-                       obj_node->path = strdup(ftse->fts_path);
-                       hash_add(tab->table, &obj_node->hash, obj_node->id);
-
-                       close(fd);
-               }
-               fts_close(fts);
+               err = nftw(path, do_build_table_cb, nopenfd, flags);
+               if (err)
+                       break;
         }
         fclose(mntfile);
-       return 0;
+       return err;
  }
  
  void delete_pinned_obj_table(struct pinned_obj_table *tab)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c

index b59d26e..8a4c2b3 100644 (file)
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -302,8 +302,11 @@ static int do_skeleton(int argc, char **argv)
         opts.object_name = obj_name;
         obj = bpf_object__open_mem(obj_data, file_sz, &opts);
         if (IS_ERR(obj)) {
+               char err_buf[256];
+
+               libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
+               p_err("failed to open BPF object file: %s", err_buf);
                 obj = NULL;
-               p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
                 goto out;
         }
  
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h

index 78d34e8..e3a79b5 100644 (file)
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -152,8 +152,8 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
  int get_fd_type(int fd);
  const char *get_fd_type_name(enum bpf_obj_type type);
  char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path, bool quiet);
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int open_obj_pinned(const char *path, bool quiet);
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
  int mount_bpffs_for_pin(const char *name);
  int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
  int do_pin_fd(int fd, const char *name);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c

index 6863c57..3e6ecc6 100644 (file)
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -59,6 +59,7 @@ const char * const prog_type_name[] = {
         [BPF_PROG_TYPE_TRACING]                 = "tracing",
         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
         [BPF_PROG_TYPE_EXT]                     = "ext",
+       [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
  };
  
  const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
@@ -1905,7 +1906,7 @@ static int do_help(int argc, char **argv)
                 "                 cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
                 "                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
                 "                 cgroup/getsockopt | cgroup/setsockopt |\n"
-               "                 struct_ops | fentry | fexit | freplace }\n"
+               "                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
                 "       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
                 "                        flow_dissector }\n"
                 "       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c

index 8468a60..d9b4209 100644 (file)
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -71,7 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
  
         e.pid = task->tgid;
         e.id = get_obj_id(file->private_data, obj_type);
-       bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
+       bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
+                             task->group_leader->comm);
         bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
  
         return 0;
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h

index fe01977..4867d54 100644 (file)
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,8 @@
  #ifndef _LINUX_BTF_IDS_H
  #define _LINUX_BTF_IDS_H
  
+#ifdef CONFIG_DEBUG_INFO_BTF
+
  #include <linux/compiler.h> /* for __PASTE */
  
  /*
@@ -21,7 +23,7 @@
  asm(                                                   \
  ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"     \
  ".local " #symbol " ;                          \n"     \
-".type  " #symbol ", @object;                  \n"     \
+".type  " #symbol ", STT_OBJECT;               \n"     \
  ".size  " #symbol ", 4;                        \n"     \
  #symbol ":                                     \n"     \
  ".zero 4                                       \n"     \
@@ -55,17 +57,20 @@ asm(                                                        \
   * .zero 4
   *
   */
-#define __BTF_ID_LIST(name)                            \
+#define __BTF_ID_LIST(name, scope)                     \
  asm(                                                   \
  ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"     \
-".local " #name ";                             \n"     \
+"." #scope " " #name ";                        \n"     \
  #name ":;                                      \n"     \
  ".popsection;                                  \n");   \
  
  #define BTF_ID_LIST(name)                              \
-__BTF_ID_LIST(name)                                    \
+__BTF_ID_LIST(name, local)                             \
  extern u32 name[];
  
+#define BTF_ID_LIST_GLOBAL(name)                       \
+__BTF_ID_LIST(name, globl)
+
  /*
   * The BTF_ID_UNUSED macro defines 4 zero bytes.
   * It's used when we want to define 'unused' entry
@@ -83,5 +88,43 @@ asm(                                                 \
  ".zero 4                                       \n"     \
  ".popsection;                                  \n");
  
+#else
+
+#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID(prefix, name)
+#define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+
+#endif /* CONFIG_DEBUG_INFO_BTF */
+
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock)    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock)        \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock)        \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock)                  \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock)                         \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common)           \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock)                      \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock)          \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)          \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)                      \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
  
  #endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 5e38638..54d0c88 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -189,6 +189,7 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_STRUCT_OPS,
         BPF_PROG_TYPE_EXT,
         BPF_PROG_TYPE_LSM,
+       BPF_PROG_TYPE_SK_LOOKUP,
  };
  
  enum bpf_attach_type {
@@ -227,6 +228,8 @@ enum bpf_attach_type {
         BPF_CGROUP_INET6_GETSOCKNAME,
         BPF_XDP_DEVMAP,
         BPF_CGROUP_INET_SOCK_RELEASE,
+       BPF_XDP_CPUMAP,
+       BPF_SK_LOOKUP,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -2419,7 +2422,7 @@ union bpf_attr {
   *                     Look for an IPv6 socket.
   *
   *             If the *netns* is a negative signed 32-bit integer, then the
- *             socket lookup table in the netns associated with the *ctx* will
+ *             socket lookup table in the netns associated with the *ctx*
   *             will be used. For the TC hooks, this is the netns of the device
   *             in the skb. For socket hooks, this is the netns of the socket.
   *             If *netns* is any other signed 32-bit value greater than or
@@ -2456,7 +2459,7 @@ union bpf_attr {
   *                     Look for an IPv6 socket.
   *
   *             If the *netns* is a negative signed 32-bit integer, then the
- *             socket lookup table in the netns associated with the *ctx* will
+ *             socket lookup table in the netns associated with the *ctx*
   *             will be used. For the TC hooks, this is the netns of the device
   *             in the skb. For socket hooks, this is the netns of the socket.
   *             If *netns* is any other signed 32-bit value greater than or
@@ -3068,6 +3071,10 @@ union bpf_attr {
   *
   * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
   *     Description
+ *             Helper is overloaded depending on BPF program type. This
+ *             description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ *             **BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
   *             Assign the *sk* to the *skb*. When combined with appropriate
   *             routing configuration to receive the packet towards the socket,
   *             will cause *skb* to be delivered to the specified socket.
@@ -3093,6 +3100,56 @@ union bpf_attr {
   *             **-ESOCKTNOSUPPORT** if the socket type is not supported
   *             (reuseport).
   *
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ *     Description
+ *             Helper is overloaded depending on BPF program type. This
+ *             description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ *             Select the *sk* as a result of a socket lookup.
+ *
+ *             For the operation to succeed passed socket must be compatible
+ *             with the packet description provided by the *ctx* object.
+ *
+ *             L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ *             be an exact match. While IP family (**AF_INET** or
+ *             **AF_INET6**) must be compatible, that is IPv6 sockets
+ *             that are not v6-only can be selected for IPv4 packets.
+ *
+ *             Only TCP listeners and UDP unconnected sockets can be
+ *             selected. *sk* can also be NULL to reset any previous
+ *             selection.
+ *
+ *             *flags* argument can combination of following values:
+ *
+ *             * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ *               socket selection, potentially done by a BPF program
+ *               that ran before us.
+ *
+ *             * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ *               load-balancing within reuseport group for the socket
+ *               being selected.
+ *
+ *             On success *ctx->sk* will point to the selected socket.
+ *
+ *     Return
+ *             0 on success, or a negative errno in case of failure.
+ *
+ *             * **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ *               not compatible with packet family (*ctx->family*).
+ *
+ *             * **-EEXIST** if socket has been already selected,
+ *               potentially by another program, and
+ *               **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ *             * **-EINVAL** if unsupported flags were specified.
+ *
+ *             * **-EPROTOTYPE** if socket L4 protocol
+ *               (*sk->protocol*) doesn't match packet protocol
+ *               (*ctx->protocol*).
+ *
+ *             * **-ESOCKTNOSUPPORT** if socket is not in allowed
+ *               state (TCP listening or UDP unconnected).
+ *
   * u64 bpf_ktime_get_boot_ns(void)
   *     Description
   *             Return the time elapsed since system boot, in nanoseconds.
@@ -3606,6 +3663,12 @@ enum {
         BPF_RINGBUF_HDR_SZ              = 8,
  };
  
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+       BPF_SK_LOOKUP_F_REPLACE         = (1ULL << 0),
+       BPF_SK_LOOKUP_F_NO_REUSEPORT    = (1ULL << 1),
+};
+
  /* Mode for BPF_FUNC_skb_adjust_room helper. */
  enum bpf_adj_room_mode {
         BPF_ADJ_ROOM_NET,
@@ -3849,6 +3912,19 @@ struct bpf_devmap_val {
         } bpf_prog;
  };
  
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+       __u32 qsize;    /* queue size to remote target CPU */
+       union {
+               int   fd;       /* prog fd on map write */
+               __u32 id;       /* prog id on map read */
+       } bpf_prog;
+};
+
  enum sk_action {
         SK_DROP = 0,
         SK_PASS,
@@ -3986,7 +4062,7 @@ struct bpf_link_info {
  
  /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
   * by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
   */
  struct bpf_sock_addr {
         __u32 user_family;      /* Allows 4-byte read, but no write. */
@@ -4335,4 +4411,19 @@ struct bpf_pidns_info {
         __u32 pid;
         __u32 tgid;
  };
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+       __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+       __u32 family;           /* Protocol family (AF_INET, AF_INET6) */
+       __u32 protocol;         /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+       __u32 remote_ip4;       /* Network byte order */
+       __u32 remote_ip6[4];    /* Network byte order */
+       __u32 remote_port;      /* Network byte order */
+       __u32 local_ip4;        /* Network byte order */
+       __u32 local_ip6[4];     /* Network byte order */
+       __u32 local_port;       /* Host byte order */
+};
+
  #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h

index a510d8e..bc14db7 100644 (file)
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,7 +40,7 @@
   * Helper macro to manipulate data structures
   */
  #ifndef offsetof
-#define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
+#define offsetof(TYPE, MEMBER)  __builtin_offsetof(TYPE, MEMBER)
  #endif
  #ifndef container_of
  #define container_of(ptr, type, member)                                \
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 4489f95..846164c 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6799,6 +6799,7 @@ BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
  BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
  BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
  BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
  
  enum bpf_attach_type
  bpf_program__get_expected_attach_type(struct bpf_program *prog)
@@ -6912,6 +6913,8 @@ static const struct bpf_sec_def section_defs[] = {
                 .attach_fn = attach_iter),
         BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
                                                 BPF_XDP_DEVMAP),
+       BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
+                                               BPF_XDP_CPUMAP),
         BPF_PROG_SEC("xdp",                     BPF_PROG_TYPE_XDP),
         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
@@ -6979,6 +6982,8 @@ static const struct bpf_sec_def section_defs[] = {
         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
                                                 BPF_CGROUP_SETSOCKOPT),
         BPF_PROG_SEC("struct_ops",              BPF_PROG_TYPE_STRUCT_OPS),
+       BPF_EAPROG_SEC("sk_lookup/",            BPF_PROG_TYPE_SK_LOOKUP,
+                                               BPF_SK_LOOKUP),
  };
  
  #undef BPF_PROG_SEC_IMPL
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h

index 2335971..c227213 100644 (file)
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -350,6 +350,7 @@ LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
  LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
  LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
  LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
  
  LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
  LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -377,6 +378,7 @@ LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
  LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
  LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
  LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
  
  /*
   * No need for __attribute__((packed)), all members of 'bpf_map_def'
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map

index c5d5c76..6f0856a 100644 (file)
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -287,6 +287,8 @@ LIBBPF_0.1.0 {
                 bpf_map__type;
                 bpf_map__value_size;
                 bpf_program__autoload;
+               bpf_program__is_sk_lookup;
                 bpf_program__set_autoload;
+               bpf_program__set_sk_lookup;
                 btf__set_fd;
  } LIBBPF_0.0.9;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c

index 10cd8d1..5a3d3f0 100644 (file)
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -78,6 +78,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
                 xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
                 break;
+       case BPF_PROG_TYPE_SK_LOOKUP:
+               xattr.expected_attach_type = BPF_SK_LOOKUP;
+               break;
         case BPF_PROG_TYPE_KPROBE:
                 xattr.kern_version = get_kernel_version();
                 break;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c

index acd0871..f566556 100644 (file)
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -73,29 +73,8 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
         socklen_t len;
         int fd;
  
-       if (family == AF_INET) {
-               struct sockaddr_in *sin = (void *)&addr;
-
-               sin->sin_family = AF_INET;
-               sin->sin_port = htons(port);
-               if (addr_str &&
-                   inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
-                       log_err("inet_pton(AF_INET, %s)", addr_str);
-                       return -1;
-               }
-               len = sizeof(*sin);
-       } else {
-               struct sockaddr_in6 *sin6 = (void *)&addr;
-
-               sin6->sin6_family = AF_INET6;
-               sin6->sin6_port = htons(port);
-               if (addr_str &&
-                   inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
-                       log_err("inet_pton(AF_INET6, %s)", addr_str);
-                       return -1;
-               }
-               len = sizeof(*sin6);
-       }
+       if (make_sockaddr(family, addr_str, port, &addr, &len))
+               return -1;
  
         fd = socket(family, type, 0);
         if (fd < 0) {
@@ -194,3 +173,36 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
  
         return 0;
  }
+
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+                 struct sockaddr_storage *addr, socklen_t *len)
+{
+       if (family == AF_INET) {
+               struct sockaddr_in *sin = (void *)addr;
+
+               sin->sin_family = AF_INET;
+               sin->sin_port = htons(port);
+               if (addr_str &&
+                   inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+                       log_err("inet_pton(AF_INET, %s)", addr_str);
+                       return -1;
+               }
+               if (len)
+                       *len = sizeof(*sin);
+               return 0;
+       } else if (family == AF_INET6) {
+               struct sockaddr_in6 *sin6 = (void *)addr;
+
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_port = htons(port);
+               if (addr_str &&
+                   inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+                       log_err("inet_pton(AF_INET6, %s)", addr_str);
+                       return -1;
+               }
+               if (len)
+                       *len = sizeof(*sin6);
+               return 0;
+       }
+       return -1;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h

index f580e82..c3728f6 100644 (file)
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,5 +37,7 @@ int start_server(int family, int type, const char *addr, __u16 port,
                  int timeout_ms);
  int connect_to_fd(int server_fd, int timeout_ms);
  int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+                 struct sockaddr_storage *addr, socklen_t *len);
  
  #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c

index 403be6f..3b127ca 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -6,6 +6,7 @@
  #include <bpf/libbpf.h>
  #include <linux/btf.h>
  #include <linux/kernel.h>
+#define CONFIG_DEBUG_INFO_BTF
  #include <linux/btf_ids.h>
  #include "test_progs.h"
  
@@ -27,7 +28,17 @@ struct symbol test_symbols[] = {
         { "func",    BTF_KIND_FUNC,    -1 },
  };
  
-BTF_ID_LIST(test_list)
+BTF_ID_LIST(test_list_local)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct,  S)
+BTF_ID(union,   U)
+BTF_ID(func,    func)
+
+extern __u32 test_list_global[];
+BTF_ID_LIST_GLOBAL(test_list_global)
  BTF_ID_UNUSED
  BTF_ID(typedef, S)
  BTF_ID(typedef, T)
@@ -93,18 +104,25 @@ static int resolve_symbols(void)
  
  int test_resolve_btfids(void)
  {
-       unsigned int i;
+       __u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
+       unsigned int i, j;
         int ret = 0;
  
         if (resolve_symbols())
                 return -1;
  
-       /* Check BTF_ID_LIST(test_list) IDs */
-       for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
-               ret = CHECK(test_list[i] != test_symbols[i].id,
-                           "id_check",
-                           "wrong ID for %s (%d != %d)\n", test_symbols[i].name,
-                           test_list[i], test_symbols[i].id);
+       /* Check BTF_ID_LIST(test_list_local) and
+        * BTF_ID_LIST_GLOBAL(test_list_global) IDs
+        */
+       for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
+               test_list = test_lists[j];
+               for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+                       ret = CHECK(test_list[i] != test_symbols[i].id,
+                                   "id_check",
+                                   "wrong ID for %s (%d != %d)\n",
+                                   test_symbols[i].name,
+                                   test_list[i], test_symbols[i].id);
+               }
         }
  
         return ret;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c

new file mode 100644 (file)

index 0000000..f1784ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -0,0 +1,1282 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP).
+ *
+ * Tests exercise:
+ *  - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
+ *  - redirecting socket lookup to a socket selected by BPF program,
+ *  - failing a socket lookup on BPF program's request,
+ *  - error scenarios for selecting a socket from BPF program,
+ *  - accessing BPF program context,
+ *  - attaching and running multiple BPF programs.
+ *
+ * Tests run in a dedicated network namespace.
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_sk_lookup.skel.h"
+
+/* External (address, port) pairs the client sends packets to. */
+#define EXT_IP4                "127.0.0.1"
+#define EXT_IP6                "fd00::1"
+#define EXT_PORT       7007
+
+/* Internal (address, port) pairs the server listens/receives at. */
+#define INT_IP4                "127.0.0.2"
+#define INT_IP4_V6     "::ffff:127.0.0.2"
+#define INT_IP6                "fd00::2"
+#define INT_PORT       8008
+
+#define IO_TIMEOUT_SEC 3
+
+enum server {
+       SERVER_A = 0,
+       SERVER_B = 1,
+       MAX_SERVERS,
+};
+
+enum {
+       PROG1 = 0,
+       PROG2,
+};
+
+struct inet_addr {
+       const char *ip;
+       unsigned short port;
+};
+
+struct test {
+       const char *desc;
+       struct bpf_program *lookup_prog;
+       struct bpf_program *reuseport_prog;
+       struct bpf_map *sock_map;
+       int sotype;
+       struct inet_addr connect_to;
+       struct inet_addr listen_at;
+       enum server accept_on;
+};
+
+static __u32 duration;         /* for CHECK macro */
+
+static bool is_ipv6(const char *ip)
+{
+       return !!strchr(ip, ':');
+}
+
+static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
+{
+       int err, prog_fd;
+
+       prog_fd = bpf_program__fd(reuseport_prog);
+       if (prog_fd < 0) {
+               errno = -prog_fd;
+               return -1;
+       }
+
+       err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+                        &prog_fd, sizeof(prog_fd));
+       if (err)
+               return -1;
+
+       return 0;
+}
+
+static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
+{
+       return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
+               addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
+}
+
+static int make_socket(int sotype, const char *ip, int port,
+                      struct sockaddr_storage *addr)
+{
+       struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+       int err, family, fd;
+
+       family = is_ipv6(ip) ? AF_INET6 : AF_INET;
+       err = make_sockaddr(family, ip, port, addr, NULL);
+       if (CHECK(err, "make_address", "failed\n"))
+               return -1;
+
+       fd = socket(addr->ss_family, sotype, 0);
+       if (CHECK(fd < 0, "socket", "failed\n")) {
+               log_err("failed to make socket");
+               return -1;
+       }
+
+       err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+       if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
+               log_err("failed to set SNDTIMEO");
+               close(fd);
+               return -1;
+       }
+
+       err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+       if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
+               log_err("failed to set RCVTIMEO");
+               close(fd);
+               return -1;
+       }
+
+       return fd;
+}
+
+static int make_server(int sotype, const char *ip, int port,
+                      struct bpf_program *reuseport_prog)
+{
+       struct sockaddr_storage addr = {0};
+       const int one = 1;
+       int err, fd = -1;
+
+       fd = make_socket(sotype, ip, port, &addr);
+       if (fd < 0)
+               return -1;
+
+       /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
+       if (sotype == SOCK_DGRAM) {
+               err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
+                                sizeof(one));
+               if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
+                       log_err("failed to enable IP_RECVORIGDSTADDR");
+                       goto fail;
+               }
+       }
+
+       if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+               err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
+                                sizeof(one));
+               if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
+                       log_err("failed to enable IPV6_RECVORIGDSTADDR");
+                       goto fail;
+               }
+       }
+
+       if (sotype == SOCK_STREAM) {
+               err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+                                sizeof(one));
+               if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
+                       log_err("failed to enable SO_REUSEADDR");
+                       goto fail;
+               }
+       }
+
+       if (reuseport_prog) {
+               err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
+                                sizeof(one));
+               if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
+                       log_err("failed to enable SO_REUSEPORT");
+                       goto fail;
+               }
+       }
+
+       err = bind(fd, (void *)&addr, inetaddr_len(&addr));
+       if (CHECK(err, "bind", "failed\n")) {
+               log_err("failed to bind listen socket");
+               goto fail;
+       }
+
+       if (sotype == SOCK_STREAM) {
+               err = listen(fd, SOMAXCONN);
+               if (CHECK(err, "make_server", "listen")) {
+                       log_err("failed to listen on port %d", port);
+                       goto fail;
+               }
+       }
+
+       /* Late attach reuseport prog so we can have one init path */
+       if (reuseport_prog) {
+               err = attach_reuseport(fd, reuseport_prog);
+               if (CHECK(err, "attach_reuseport", "failed\n")) {
+                       log_err("failed to attach reuseport prog");
+                       goto fail;
+               }
+       }
+
+       return fd;
+fail:
+       close(fd);
+       return -1;
+}
+
+static int make_client(int sotype, const char *ip, int port)
+{
+       struct sockaddr_storage addr = {0};
+       int err, fd;
+
+       fd = make_socket(sotype, ip, port, &addr);
+       if (fd < 0)
+               return -1;
+
+       err = connect(fd, (void *)&addr, inetaddr_len(&addr));
+       if (CHECK(err, "make_client", "connect")) {
+               log_err("failed to connect client socket");
+               goto fail;
+       }
+
+       return fd;
+fail:
+       close(fd);
+       return -1;
+}
+
+static int send_byte(int fd)
+{
+       ssize_t n;
+
+       errno = 0;
+       n = send(fd, "a", 1, 0);
+       if (CHECK(n <= 0, "send_byte", "send")) {
+               log_err("failed/partial send");
+               return -1;
+       }
+       return 0;
+}
+
+static int recv_byte(int fd)
+{
+       char buf[1];
+       ssize_t n;
+
+       n = recv(fd, buf, sizeof(buf), 0);
+       if (CHECK(n <= 0, "recv_byte", "recv")) {
+               log_err("failed/partial recv");
+               return -1;
+       }
+       return 0;
+}
+
+static int tcp_recv_send(int server_fd)
+{
+       char buf[1];
+       int ret, fd;
+       ssize_t n;
+
+       fd = accept(server_fd, NULL, NULL);
+       if (CHECK(fd < 0, "accept", "failed\n")) {
+               log_err("failed to accept");
+               return -1;
+       }
+
+       n = recv(fd, buf, sizeof(buf), 0);
+       if (CHECK(n <= 0, "recv", "failed\n")) {
+               log_err("failed/partial recv");
+               ret = -1;
+               goto close;
+       }
+
+       n = send(fd, buf, n, 0);
+       if (CHECK(n <= 0, "send", "failed\n")) {
+               log_err("failed/partial send");
+               ret = -1;
+               goto close;
+       }
+
+       ret = 0;
+close:
+       close(fd);
+       return ret;
+}
+
+static void v4_to_v6(struct sockaddr_storage *ss)
+{
+       struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss;
+       struct sockaddr_in v4 = *(struct sockaddr_in *)ss;
+
+       v6->sin6_family = AF_INET6;
+       v6->sin6_port = v4.sin_port;
+       v6->sin6_addr.s6_addr[10] = 0xff;
+       v6->sin6_addr.s6_addr[11] = 0xff;
+       memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+}
+
+static int udp_recv_send(int server_fd)
+{
+       char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))];
+       struct sockaddr_storage _src_addr = { 0 };
+       struct sockaddr_storage *src_addr = &_src_addr;
+       struct sockaddr_storage *dst_addr = NULL;
+       struct msghdr msg = { 0 };
+       struct iovec iov = { 0 };
+       struct cmsghdr *cm;
+       char buf[1];
+       int ret, fd;
+       ssize_t n;
+
+       iov.iov_base = buf;
+       iov.iov_len = sizeof(buf);
+
+       msg.msg_name = src_addr;
+       msg.msg_namelen = sizeof(*src_addr);
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+       msg.msg_control = cmsg_buf;
+       msg.msg_controllen = sizeof(cmsg_buf);
+
+       errno = 0;
+       n = recvmsg(server_fd, &msg, 0);
+       if (CHECK(n <= 0, "recvmsg", "failed\n")) {
+               log_err("failed to receive");
+               return -1;
+       }
+       if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n"))
+               return -1;
+
+       for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+               if ((cm->cmsg_level == SOL_IP &&
+                    cm->cmsg_type == IP_ORIGDSTADDR) ||
+                   (cm->cmsg_level == SOL_IPV6 &&
+                    cm->cmsg_type == IPV6_ORIGDSTADDR)) {
+                       dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm);
+                       break;
+               }
+               log_err("warning: ignored cmsg at level %d type %d",
+                       cm->cmsg_level, cm->cmsg_type);
+       }
+       if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n"))
+               return -1;
+
+       /* Server socket bound to IPv4-mapped IPv6 address */
+       if (src_addr->ss_family == AF_INET6 &&
+           dst_addr->ss_family == AF_INET) {
+               v4_to_v6(dst_addr);
+       }
+
+       /* Reply from original destination address. */
+       fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
+       if (CHECK(fd < 0, "socket", "failed\n")) {
+               log_err("failed to create tx socket");
+               return -1;
+       }
+
+       ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
+       if (CHECK(ret, "bind", "failed\n")) {
+               log_err("failed to bind tx socket");
+               goto out;
+       }
+
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       n = sendmsg(fd, &msg, 0);
+       if (CHECK(n <= 0, "sendmsg", "failed\n")) {
+               log_err("failed to send echo reply");
+               ret = -1;
+               goto out;
+       }
+
+       ret = 0;
+out:
+       close(fd);
+       return ret;
+}
+
+static int tcp_echo_test(int client_fd, int server_fd)
+{
+       int err;
+
+       err = send_byte(client_fd);
+       if (err)
+               return -1;
+       err = tcp_recv_send(server_fd);
+       if (err)
+               return -1;
+       err = recv_byte(client_fd);
+       if (err)
+               return -1;
+
+       return 0;
+}
+
+static int udp_echo_test(int client_fd, int server_fd)
+{
+       int err;
+
+       err = send_byte(client_fd);
+       if (err)
+               return -1;
+       err = udp_recv_send(server_fd);
+       if (err)
+               return -1;
+       err = recv_byte(client_fd);
+       if (err)
+               return -1;
+
+       return 0;
+}
+
+static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
+{
+       struct bpf_link *link;
+       int net_fd;
+
+       net_fd = open("/proc/self/ns/net", O_RDONLY);
+       if (CHECK(net_fd < 0, "open", "failed\n")) {
+               log_err("failed to open /proc/self/ns/net");
+               return NULL;
+       }
+
+       link = bpf_program__attach_netns(prog, net_fd);
+       if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+               errno = -PTR_ERR(link);
+               log_err("failed to attach program '%s' to netns",
+                       bpf_program__name(prog));
+               link = NULL;
+       }
+
+       close(net_fd);
+       return link;
+}
+
+static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
+{
+       int err, map_fd;
+       uint64_t value;
+
+       map_fd = bpf_map__fd(map);
+       if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) {
+               errno = -map_fd;
+               log_err("failed to get map FD");
+               return -1;
+       }
+
+       value = (uint64_t)sock_fd;
+       err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST);
+       if (CHECK(err, "bpf_map_update_elem", "failed\n")) {
+               log_err("failed to update redir_map @ %d", index);
+               return -1;
+       }
+
+       return 0;
+}
+
+static __u32 link_info_prog_id(struct bpf_link *link)
+{
+       struct bpf_link_info info = {};
+       __u32 info_len = sizeof(info);
+       int link_fd, err;
+
+       link_fd = bpf_link__fd(link);
+       if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) {
+               errno = -link_fd;
+               log_err("bpf_link__fd failed");
+               return 0;
+       }
+
+       err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+       if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) {
+               log_err("bpf_obj_get_info_by_fd");
+               return 0;
+       }
+       if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd",
+                 "unexpected info len %u\n", info_len))
+               return 0;
+
+       return info.prog_id;
+}
+
+static void query_lookup_prog(struct test_sk_lookup *skel)
+{
+       struct bpf_link *link[3] = {};
+       __u32 attach_flags = 0;
+       __u32 prog_ids[3] = {};
+       __u32 prog_cnt = 3;
+       __u32 prog_id;
+       int net_fd;
+       int err;
+
+       net_fd = open("/proc/self/ns/net", O_RDONLY);
+       if (CHECK(net_fd < 0, "open", "failed\n")) {
+               log_err("failed to open /proc/self/ns/net");
+               return;
+       }
+
+       link[0] = attach_lookup_prog(skel->progs.lookup_pass);
+       if (!link[0])
+               goto close;
+       link[1] = attach_lookup_prog(skel->progs.lookup_pass);
+       if (!link[1])
+               goto detach;
+       link[2] = attach_lookup_prog(skel->progs.lookup_drop);
+       if (!link[2])
+               goto detach;
+
+       err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */,
+                            &attach_flags, prog_ids, &prog_cnt);
+       if (CHECK(err, "bpf_prog_query", "failed\n")) {
+               log_err("failed to query lookup prog");
+               goto detach;
+       }
+
+       errno = 0;
+       if (CHECK(attach_flags != 0, "bpf_prog_query",
+                 "wrong attach_flags on query: %u", attach_flags))
+               goto detach;
+       if (CHECK(prog_cnt != 3, "bpf_prog_query",
+                 "wrong program count on query: %u", prog_cnt))
+               goto detach;
+       prog_id = link_info_prog_id(link[0]);
+       CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+             "invalid program #0 id on query: %u != %u\n",
+             prog_ids[0], prog_id);
+       prog_id = link_info_prog_id(link[1]);
+       CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
+             "invalid program #1 id on query: %u != %u\n",
+             prog_ids[1], prog_id);
+       prog_id = link_info_prog_id(link[2]);
+       CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
+             "invalid program #2 id on query: %u != %u\n",
+             prog_ids[2], prog_id);
+
+detach:
+       if (link[2])
+               bpf_link__destroy(link[2]);
+       if (link[1])
+               bpf_link__destroy(link[1]);
+       if (link[0])
+               bpf_link__destroy(link[0]);
+close:
+       close(net_fd);
+}
+
+static void run_lookup_prog(const struct test *t)
+{
+       int client_fd, server_fds[MAX_SERVERS] = { -1 };
+       struct bpf_link *lookup_link;
+       int i, err;
+
+       lookup_link = attach_lookup_prog(t->lookup_prog);
+       if (!lookup_link)
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+               server_fds[i] = make_server(t->sotype, t->listen_at.ip,
+                                           t->listen_at.port,
+                                           t->reuseport_prog);
+               if (server_fds[i] < 0)
+                       goto close;
+
+               err = update_lookup_map(t->sock_map, i, server_fds[i]);
+               if (err)
+                       goto close;
+
+               /* want just one server for non-reuseport test */
+               if (!t->reuseport_prog)
+                       break;
+       }
+
+       client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
+       if (client_fd < 0)
+               goto close;
+
+       if (t->sotype == SOCK_STREAM)
+               tcp_echo_test(client_fd, server_fds[t->accept_on]);
+       else
+               udp_echo_test(client_fd, server_fds[t->accept_on]);
+
+       close(client_fd);
+close:
+       for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+               if (server_fds[i] != -1)
+                       close(server_fds[i]);
+       }
+       bpf_link__destroy(lookup_link);
+}
+
+static void test_redirect_lookup(struct test_sk_lookup *skel)
+{
+       const struct test tests[] = {
+               {
+                       .desc           = "TCP IPv4 redir port",
+                       .lookup_prog    = skel->progs.redir_port,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { EXT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv4 redir addr",
+                       .lookup_prog    = skel->progs.redir_ip4,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, EXT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv4 redir with reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, INT_PORT },
+                       .accept_on      = SERVER_B,
+               },
+               {
+                       .desc           = "TCP IPv4 redir skip reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a_no_reuseport,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, INT_PORT },
+                       .accept_on      = SERVER_A,
+               },
+               {
+                       .desc           = "TCP IPv6 redir port",
+                       .lookup_prog    = skel->progs.redir_port,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { EXT_IP6, INT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv6 redir addr",
+                       .lookup_prog    = skel->progs.redir_ip6,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, EXT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv4->IPv6 redir port",
+                       .lookup_prog    = skel->progs.redir_port,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4_V6, INT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv6 redir with reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, INT_PORT },
+                       .accept_on      = SERVER_B,
+               },
+               {
+                       .desc           = "TCP IPv6 redir skip reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a_no_reuseport,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, INT_PORT },
+                       .accept_on      = SERVER_A,
+               },
+               {
+                       .desc           = "UDP IPv4 redir port",
+                       .lookup_prog    = skel->progs.redir_port,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { EXT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv4 redir addr",
+                       .lookup_prog    = skel->progs.redir_ip4,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, EXT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv4 redir with reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, INT_PORT },
+                       .accept_on      = SERVER_B,
+               },
+               {
+                       .desc           = "UDP IPv4 redir skip reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a_no_reuseport,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, INT_PORT },
+                       .accept_on      = SERVER_A,
+               },
+               {
+                       .desc           = "UDP IPv6 redir port",
+                       .lookup_prog    = skel->progs.redir_port,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { EXT_IP6, INT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv6 redir addr",
+                       .lookup_prog    = skel->progs.redir_ip6,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, EXT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv4->IPv6 redir port",
+                       .lookup_prog    = skel->progs.redir_port,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .listen_at      = { INT_IP4_V6, INT_PORT },
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv6 redir and reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, INT_PORT },
+                       .accept_on      = SERVER_B,
+               },
+               {
+                       .desc           = "UDP IPv6 redir skip reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a_no_reuseport,
+                       .reuseport_prog = skel->progs.select_sock_b,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, INT_PORT },
+                       .accept_on      = SERVER_A,
+               },
+       };
+       const struct test *t;
+
+       for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+               if (test__start_subtest(t->desc))
+                       run_lookup_prog(t);
+       }
+}
+
+static void drop_on_lookup(const struct test *t)
+{
+       struct sockaddr_storage dst = {};
+       int client_fd, server_fd, err;
+       struct bpf_link *lookup_link;
+       ssize_t n;
+
+       lookup_link = attach_lookup_prog(t->lookup_prog);
+       if (!lookup_link)
+               return;
+
+       server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+                               t->reuseport_prog);
+       if (server_fd < 0)
+               goto detach;
+
+       client_fd = make_socket(t->sotype, t->connect_to.ip,
+                               t->connect_to.port, &dst);
+       if (client_fd < 0)
+               goto close_srv;
+
+       err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+       if (t->sotype == SOCK_DGRAM) {
+               err = send_byte(client_fd);
+               if (err)
+                       goto close_all;
+
+               /* Read out asynchronous error */
+               n = recv(client_fd, NULL, 0, 0);
+               err = n == -1;
+       }
+       if (CHECK(!err || errno != ECONNREFUSED, "connect",
+                 "unexpected success or error\n"))
+               log_err("expected ECONNREFUSED on connect");
+
+close_all:
+       close(client_fd);
+close_srv:
+       close(server_fd);
+detach:
+       bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_lookup(struct test_sk_lookup *skel)
+{
+       const struct test tests[] = {
+               {
+                       .desc           = "TCP IPv4 drop on lookup",
+                       .lookup_prog    = skel->progs.lookup_drop,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { EXT_IP4, EXT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv6 drop on lookup",
+                       .lookup_prog    = skel->progs.lookup_drop,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { EXT_IP6, EXT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv4 drop on lookup",
+                       .lookup_prog    = skel->progs.lookup_drop,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { EXT_IP4, EXT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv6 drop on lookup",
+                       .lookup_prog    = skel->progs.lookup_drop,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { EXT_IP6, INT_PORT },
+               },
+       };
+       const struct test *t;
+
+       for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+               if (test__start_subtest(t->desc))
+                       drop_on_lookup(t);
+       }
+}
+
+static void drop_on_reuseport(const struct test *t)
+{
+       struct sockaddr_storage dst = { 0 };
+       int client, server1, server2, err;
+       struct bpf_link *lookup_link;
+       ssize_t n;
+
+       lookup_link = attach_lookup_prog(t->lookup_prog);
+       if (!lookup_link)
+               return;
+
+       server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+                             t->reuseport_prog);
+       if (server1 < 0)
+               goto detach;
+
+       err = update_lookup_map(t->sock_map, SERVER_A, server1);
+       if (err)
+               goto detach;
+
+       /* second server on destination address we should never reach */
+       server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
+                             NULL /* reuseport prog */);
+       if (server2 < 0)
+               goto close_srv1;
+
+       client = make_socket(t->sotype, t->connect_to.ip,
+                            t->connect_to.port, &dst);
+       if (client < 0)
+               goto close_srv2;
+
+       err = connect(client, (void *)&dst, inetaddr_len(&dst));
+       if (t->sotype == SOCK_DGRAM) {
+               err = send_byte(client);
+               if (err)
+                       goto close_all;
+
+               /* Read out asynchronous error */
+               n = recv(client, NULL, 0, 0);
+               err = n == -1;
+       }
+       if (CHECK(!err || errno != ECONNREFUSED, "connect",
+                 "unexpected success or error\n"))
+               log_err("expected ECONNREFUSED on connect");
+
+close_all:
+       close(client);
+close_srv2:
+       close(server2);
+close_srv1:
+       close(server1);
+detach:
+       bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_reuseport(struct test_sk_lookup *skel)
+{
+       const struct test tests[] = {
+               {
+                       .desc           = "TCP IPv4 drop on reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.reuseport_drop,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv6 drop on reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.reuseport_drop,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, INT_PORT },
+               },
+               {
+                       .desc           = "UDP IPv4 drop on reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.reuseport_drop,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_DGRAM,
+                       .connect_to     = { EXT_IP4, EXT_PORT },
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "TCP IPv6 drop on reuseport",
+                       .lookup_prog    = skel->progs.select_sock_a,
+                       .reuseport_prog = skel->progs.reuseport_drop,
+                       .sock_map       = skel->maps.redir_map,
+                       .sotype         = SOCK_STREAM,
+                       .connect_to     = { EXT_IP6, EXT_PORT },
+                       .listen_at      = { INT_IP6, INT_PORT },
+               },
+       };
+       const struct test *t;
+
+       for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+               if (test__start_subtest(t->desc))
+                       drop_on_reuseport(t);
+       }
+}
+
+static void run_sk_assign(struct test_sk_lookup *skel,
+                         struct bpf_program *lookup_prog,
+                         const char *listen_ip, const char *connect_ip)
+{
+       int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
+       struct bpf_link *lookup_link;
+       int i, err;
+
+       lookup_link = attach_lookup_prog(lookup_prog);
+       if (!lookup_link)
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+               server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+               if (server_fds[i] < 0)
+                       goto close_servers;
+
+               err = update_lookup_map(skel->maps.redir_map, i,
+                                       server_fds[i]);
+               if (err)
+                       goto close_servers;
+       }
+
+       client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
+       if (client_fd < 0)
+               goto close_servers;
+
+       peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
+       if (CHECK(peer_fd < 0, "accept", "failed\n"))
+               goto close_client;
+
+       close(peer_fd);
+close_client:
+       close(client_fd);
+close_servers:
+       for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+               if (server_fds[i] != -1)
+                       close(server_fds[i]);
+       }
+       bpf_link__destroy(lookup_link);
+}
+
+static void run_sk_assign_v4(struct test_sk_lookup *skel,
+                            struct bpf_program *lookup_prog)
+{
+       run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4);
+}
+
+static void run_sk_assign_v6(struct test_sk_lookup *skel,
+                            struct bpf_program *lookup_prog)
+{
+       run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6);
+}
+
+static void run_sk_assign_connected(struct test_sk_lookup *skel,
+                                   int sotype)
+{
+       int err, client_fd, connected_fd, server_fd;
+       struct bpf_link *lookup_link;
+
+       server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL);
+       if (server_fd < 0)
+               return;
+
+       connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+       if (connected_fd < 0)
+               goto out_close_server;
+
+       /* Put a connected socket in redirect map */
+       err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd);
+       if (err)
+               goto out_close_connected;
+
+       lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport);
+       if (!lookup_link)
+               goto out_close_connected;
+
+       /* Try to redirect TCP SYN / UDP packet to a connected socket */
+       client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+       if (client_fd < 0)
+               goto out_unlink_prog;
+       if (sotype == SOCK_DGRAM) {
+               send_byte(client_fd);
+               recv_byte(server_fd);
+       }
+
+       close(client_fd);
+out_unlink_prog:
+       bpf_link__destroy(lookup_link);
+out_close_connected:
+       close(connected_fd);
+out_close_server:
+       close(server_fd);
+}
+
+static void test_sk_assign_helper(struct test_sk_lookup *skel)
+{
+       if (test__start_subtest("sk_assign returns EEXIST"))
+               run_sk_assign_v4(skel, skel->progs.sk_assign_eexist);
+       if (test__start_subtest("sk_assign honors F_REPLACE"))
+               run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag);
+       if (test__start_subtest("sk_assign accepts NULL socket"))
+               run_sk_assign_v4(skel, skel->progs.sk_assign_null);
+       if (test__start_subtest("access ctx->sk"))
+               run_sk_assign_v4(skel, skel->progs.access_ctx_sk);
+       if (test__start_subtest("narrow access to ctx v4"))
+               run_sk_assign_v4(skel, skel->progs.ctx_narrow_access);
+       if (test__start_subtest("narrow access to ctx v6"))
+               run_sk_assign_v6(skel, skel->progs.ctx_narrow_access);
+       if (test__start_subtest("sk_assign rejects TCP established"))
+               run_sk_assign_connected(skel, SOCK_STREAM);
+       if (test__start_subtest("sk_assign rejects UDP connected"))
+               run_sk_assign_connected(skel, SOCK_DGRAM);
+}
+
+struct test_multi_prog {
+       const char *desc;
+       struct bpf_program *prog1;
+       struct bpf_program *prog2;
+       struct bpf_map *redir_map;
+       struct bpf_map *run_map;
+       int expect_errno;
+       struct inet_addr listen_at;
+};
+
+static void run_multi_prog_lookup(const struct test_multi_prog *t)
+{
+       struct sockaddr_storage dst = {};
+       int map_fd, server_fd, client_fd;
+       struct bpf_link *link1, *link2;
+       int prog_idx, done, err;
+
+       map_fd = bpf_map__fd(t->run_map);
+
+       done = 0;
+       prog_idx = PROG1;
+       err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+       if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+               return;
+       prog_idx = PROG2;
+       err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+       if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+               return;
+
+       link1 = attach_lookup_prog(t->prog1);
+       if (!link1)
+               return;
+       link2 = attach_lookup_prog(t->prog2);
+       if (!link2)
+               goto out_unlink1;
+
+       server_fd = make_server(SOCK_STREAM, t->listen_at.ip,
+                               t->listen_at.port, NULL);
+       if (server_fd < 0)
+               goto out_unlink2;
+
+       err = update_lookup_map(t->redir_map, SERVER_A, server_fd);
+       if (err)
+               goto out_close_server;
+
+       client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
+       if (client_fd < 0)
+               goto out_close_server;
+
+       err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+       if (CHECK(err && !t->expect_errno, "connect",
+                 "unexpected error %d\n", errno))
+               goto out_close_client;
+       if (CHECK(err && t->expect_errno && errno != t->expect_errno,
+                 "connect", "unexpected error %d\n", errno))
+               goto out_close_client;
+
+       done = 0;
+       prog_idx = PROG1;
+       err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+       CHECK(err, "bpf_map_lookup_elem", "failed\n");
+       CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n");
+
+       done = 0;
+       prog_idx = PROG2;
+       err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+       CHECK(err, "bpf_map_lookup_elem", "failed\n");
+       CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n");
+
+out_close_client:
+       close(client_fd);
+out_close_server:
+       close(server_fd);
+out_unlink2:
+       bpf_link__destroy(link2);
+out_unlink1:
+       bpf_link__destroy(link1);
+}
+
+static void test_multi_prog_lookup(struct test_sk_lookup *skel)
+{
+       struct test_multi_prog tests[] = {
+               {
+                       .desc           = "multi prog - pass, pass",
+                       .prog1          = skel->progs.multi_prog_pass1,
+                       .prog2          = skel->progs.multi_prog_pass2,
+                       .listen_at      = { EXT_IP4, EXT_PORT },
+               },
+               {
+                       .desc           = "multi prog - drop, drop",
+                       .prog1          = skel->progs.multi_prog_drop1,
+                       .prog2          = skel->progs.multi_prog_drop2,
+                       .listen_at      = { EXT_IP4, EXT_PORT },
+                       .expect_errno   = ECONNREFUSED,
+               },
+               {
+                       .desc           = "multi prog - pass, drop",
+                       .prog1          = skel->progs.multi_prog_pass1,
+                       .prog2          = skel->progs.multi_prog_drop2,
+                       .listen_at      = { EXT_IP4, EXT_PORT },
+                       .expect_errno   = ECONNREFUSED,
+               },
+               {
+                       .desc           = "multi prog - drop, pass",
+                       .prog1          = skel->progs.multi_prog_drop1,
+                       .prog2          = skel->progs.multi_prog_pass2,
+                       .listen_at      = { EXT_IP4, EXT_PORT },
+                       .expect_errno   = ECONNREFUSED,
+               },
+               {
+                       .desc           = "multi prog - pass, redir",
+                       .prog1          = skel->progs.multi_prog_pass1,
+                       .prog2          = skel->progs.multi_prog_redir2,
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "multi prog - redir, pass",
+                       .prog1          = skel->progs.multi_prog_redir1,
+                       .prog2          = skel->progs.multi_prog_pass2,
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "multi prog - drop, redir",
+                       .prog1          = skel->progs.multi_prog_drop1,
+                       .prog2          = skel->progs.multi_prog_redir2,
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "multi prog - redir, drop",
+                       .prog1          = skel->progs.multi_prog_redir1,
+                       .prog2          = skel->progs.multi_prog_drop2,
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+               {
+                       .desc           = "multi prog - redir, redir",
+                       .prog1          = skel->progs.multi_prog_redir1,
+                       .prog2          = skel->progs.multi_prog_redir2,
+                       .listen_at      = { INT_IP4, INT_PORT },
+               },
+       };
+       struct test_multi_prog *t;
+
+       for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+               t->redir_map = skel->maps.redir_map;
+               t->run_map = skel->maps.run_map;
+               if (test__start_subtest(t->desc))
+                       run_multi_prog_lookup(t);
+       }
+}
+
+static void run_tests(struct test_sk_lookup *skel)
+{
+       if (test__start_subtest("query lookup prog"))
+               query_lookup_prog(skel);
+       test_redirect_lookup(skel);
+       test_drop_on_lookup(skel);
+       test_drop_on_reuseport(skel);
+       test_sk_assign_helper(skel);
+       test_multi_prog_lookup(skel);
+}
+
+static int switch_netns(void)
+{
+       static const char * const setup_script[] = {
+               "ip -6 addr add dev lo " EXT_IP6 "/128 nodad",
+               "ip -6 addr add dev lo " INT_IP6 "/128 nodad",
+               "ip link set dev lo up",
+               NULL,
+       };
+       const char * const *cmd;
+       int err;
+
+       err = unshare(CLONE_NEWNET);
+       if (CHECK(err, "unshare", "failed\n")) {
+               log_err("unshare(CLONE_NEWNET)");
+               return -1;
+       }
+
+       for (cmd = setup_script; *cmd; cmd++) {
+               err = system(*cmd);
+               if (CHECK(err, "system", "failed\n")) {
+                       log_err("system(%s)", *cmd);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+void test_sk_lookup(void)
+{
+       struct test_sk_lookup *skel;
+       int err;
+
+       err = switch_netns();
+       if (err)
+               return;
+
+       skel = test_sk_lookup__open_and_load();
+       if (CHECK(!skel, "skel open_and_load", "failed\n"))
+               return;
+
+       run_tests(skel);
+
+       test_sk_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c

index 8547ecb..ec281b0 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -193,11 +193,10 @@ static void run_test(int cgroup_fd)
         if (CHECK_FAIL(server_fd < 0))
                 goto close_bpf_object;
  
+       pthread_mutex_lock(&server_started_mtx);
         if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
                                       (void *)&server_fd)))
                 goto close_server_fd;
-
-       pthread_mutex_lock(&server_started_mtx);
         pthread_cond_wait(&server_started, &server_started_mtx);
         pthread_mutex_unlock(&server_started_mtx);
  
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c

new file mode 100644 (file)

index 0000000..0176573
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_with_cpumap_helpers.skel.h"
+
+#define IFINDEX_LO     1
+
+void test_xdp_with_cpumap_helpers(void)
+{
+       struct test_xdp_with_cpumap_helpers *skel;
+       struct bpf_prog_info info = {};
+       struct bpf_cpumap_val val = {
+               .qsize = 192,
+       };
+       __u32 duration = 0, idx = 0;
+       __u32 len = sizeof(info);
+       int err, prog_fd, map_fd;
+
+       skel = test_xdp_with_cpumap_helpers__open_and_load();
+       if (CHECK_FAIL(!skel)) {
+               perror("test_xdp_with_cpumap_helpers__open_and_load");
+               return;
+       }
+
+       /* can not attach program with cpumaps that allow programs
+        * as xdp generic
+        */
+       prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+       err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+       CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
+             "should have failed\n");
+
+       prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+       map_fd = bpf_map__fd(skel->maps.cpu_map);
+       err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+       if (CHECK_FAIL(err))
+               goto out_close;
+
+       val.bpf_prog.fd = prog_fd;
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
+             err, errno);
+
+       err = bpf_map_lookup_elem(map_fd, &idx, &val);
+       CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
+       CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
+             "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+       /* can not attach BPF_XDP_CPUMAP program to a device */
+       err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+       CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
+             "should have failed\n");
+
+       val.qsize = 192;
+       val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+       err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+       CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
+             "should have failed\n");
+
+out_close:
+       test_xdp_with_cpumap_helpers__destroy(skel);
+}
+
+void test_xdp_cpumap_attach(void)
+{
+       if (test__start_subtest("cpumap_with_progs"))
+               test_xdp_with_cpumap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c

index 7de98a6..95989f4 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -36,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
         if (!nlk->groups)  {
                 group = 0;
         } else {
-               /* FIXME: temporary use bpf_probe_read here, needs
+               /* FIXME: temporary use bpf_probe_read_kernel here, needs
                  * verifier support to do direct access.
                  */
-               bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+               bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
         }
         BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
                        nlk->portid, (u32)group,
@@ -56,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
                  * with current verifier.
                  */
                 inode = SOCK_INODE(sk);
-               bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+               bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
         }
         BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c

index 30fd587..54380c5 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
                 return 0;
  
         inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-       bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
         return ino;
  }
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

index 10dec43..b4fbddf 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
                 return 0;
  
         inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-       bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
         return ino;
  }
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c

index 7053784..f258583 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -18,7 +18,7 @@ static long sock_i_ino(const struct sock *sk)
                 return 0;
  
         inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-       bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
         return ino;
  }
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

index c1175a6..65f93bb 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -25,7 +25,7 @@ static long sock_i_ino(const struct sock *sk)
                 return 0;
  
         inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-       bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
         return ino;
  }
  
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c

new file mode 100644 (file)

index 0000000..bbf8296
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define IP4(a, b, c, d)                                        \
+       bpf_htonl((((__u32)(a) & 0xffU) << 24) |        \
+                 (((__u32)(b) & 0xffU) << 16) |        \
+                 (((__u32)(c) & 0xffU) <<  8) |        \
+                 (((__u32)(d) & 0xffU) <<  0))
+#define IP6(aaaa, bbbb, cccc, dddd)                    \
+       { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+
+#define MAX_SOCKS 32
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, MAX_SOCKS);
+       __type(key, __u32);
+       __type(value, __u64);
+} redir_map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 2);
+       __type(key, int);
+       __type(value, int);
+} run_map SEC(".maps");
+
+enum {
+       PROG1 = 0,
+       PROG2,
+};
+
+enum {
+       SERVER_A = 0,
+       SERVER_B,
+};
+
+/* Addressable key/value constants for convenience */
+static const int KEY_PROG1 = PROG1;
+static const int KEY_PROG2 = PROG2;
+static const int PROG_DONE = 1;
+
+static const __u32 KEY_SERVER_A = SERVER_A;
+static const __u32 KEY_SERVER_B = SERVER_B;
+
+static const __u16 DST_PORT = 7007; /* Host byte order */
+static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
+static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
+
+SEC("sk_lookup/lookup_pass")
+int lookup_pass(struct bpf_sk_lookup *ctx)
+{
+       return SK_PASS;
+}
+
+SEC("sk_lookup/lookup_drop")
+int lookup_drop(struct bpf_sk_lookup *ctx)
+{
+       return SK_DROP;
+}
+
+SEC("sk_reuseport/reuse_pass")
+int reuseport_pass(struct sk_reuseport_md *ctx)
+{
+       return SK_PASS;
+}
+
+SEC("sk_reuseport/reuse_drop")
+int reuseport_drop(struct sk_reuseport_md *ctx)
+{
+       return SK_DROP;
+}
+
+/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
+SEC("sk_lookup/redir_port")
+int redir_port(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err;
+
+       if (ctx->local_port != DST_PORT)
+               return SK_PASS;
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               return SK_PASS;
+
+       err = bpf_sk_assign(ctx, sk, 0);
+       bpf_sk_release(sk);
+       return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip4")
+int redir_ip4(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err;
+
+       if (ctx->family != AF_INET)
+               return SK_PASS;
+       if (ctx->local_port != DST_PORT)
+               return SK_PASS;
+       if (ctx->local_ip4 != DST_IP4)
+               return SK_PASS;
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               return SK_PASS;
+
+       err = bpf_sk_assign(ctx, sk, 0);
+       bpf_sk_release(sk);
+       return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip6")
+int redir_ip6(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err;
+
+       if (ctx->family != AF_INET6)
+               return SK_PASS;
+       if (ctx->local_port != DST_PORT)
+               return SK_PASS;
+       if (ctx->local_ip6[0] != DST_IP6[0] ||
+           ctx->local_ip6[1] != DST_IP6[1] ||
+           ctx->local_ip6[2] != DST_IP6[2] ||
+           ctx->local_ip6[3] != DST_IP6[3])
+               return SK_PASS;
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               return SK_PASS;
+
+       err = bpf_sk_assign(ctx, sk, 0);
+       bpf_sk_release(sk);
+       return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a")
+int select_sock_a(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err;
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               return SK_PASS;
+
+       err = bpf_sk_assign(ctx, sk, 0);
+       bpf_sk_release(sk);
+       return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a_no_reuseport")
+int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err;
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               return SK_DROP;
+
+       err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
+       bpf_sk_release(sk);
+       return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_reuseport/select_sock_b")
+int select_sock_b(struct sk_reuseport_md *ctx)
+{
+       __u32 key = KEY_SERVER_B;
+       int err;
+
+       err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
+       return err ? SK_DROP : SK_PASS;
+}
+
+/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
+SEC("sk_lookup/sk_assign_eexist")
+int sk_assign_eexist(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err, ret;
+
+       ret = SK_DROP;
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+       if (!sk)
+               goto out;
+       err = bpf_sk_assign(ctx, sk, 0);
+       if (err)
+               goto out;
+       bpf_sk_release(sk);
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               goto out;
+       err = bpf_sk_assign(ctx, sk, 0);
+       if (err != -EEXIST) {
+               bpf_printk("sk_assign returned %d, expected %d\n",
+                          err, -EEXIST);
+               goto out;
+       }
+
+       ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+       if (sk)
+               bpf_sk_release(sk);
+       return ret;
+}
+
+/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
+SEC("sk_lookup/sk_assign_replace_flag")
+int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err, ret;
+
+       ret = SK_DROP;
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               goto out;
+       err = bpf_sk_assign(ctx, sk, 0);
+       if (err)
+               goto out;
+       bpf_sk_release(sk);
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+       if (!sk)
+               goto out;
+       err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+       if (err) {
+               bpf_printk("sk_assign returned %d, expected 0\n", err);
+               goto out;
+       }
+
+       ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+       if (sk)
+               bpf_sk_release(sk);
+       return ret;
+}
+
+/* Check that bpf_sk_assign(sk=NULL) is accepted. */
+SEC("sk_lookup/sk_assign_null")
+int sk_assign_null(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk = NULL;
+       int err, ret;
+
+       ret = SK_DROP;
+
+       err = bpf_sk_assign(ctx, NULL, 0);
+       if (err) {
+               bpf_printk("sk_assign returned %d, expected 0\n", err);
+               goto out;
+       }
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+       if (!sk)
+               goto out;
+       err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+       if (err) {
+               bpf_printk("sk_assign returned %d, expected 0\n", err);
+               goto out;
+       }
+
+       if (ctx->sk != sk)
+               goto out;
+       err = bpf_sk_assign(ctx, NULL, 0);
+       if (err != -EEXIST)
+               goto out;
+       err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+       if (err)
+               goto out;
+       err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+       if (err)
+               goto out;
+
+       ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+       if (sk)
+               bpf_sk_release(sk);
+       return ret;
+}
+
+/* Check that selected sk is accessible through context. */
+SEC("sk_lookup/access_ctx_sk")
+int access_ctx_sk(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk1 = NULL, *sk2 = NULL;
+       int err, ret;
+
+       ret = SK_DROP;
+
+       /* Try accessing unassigned (NULL) ctx->sk field */
+       if (ctx->sk && ctx->sk->family != AF_INET)
+               goto out;
+
+       /* Assign a value to ctx->sk */
+       sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk1)
+               goto out;
+       err = bpf_sk_assign(ctx, sk1, 0);
+       if (err)
+               goto out;
+       if (ctx->sk != sk1)
+               goto out;
+
+       /* Access ctx->sk fields */
+       if (ctx->sk->family != AF_INET ||
+           ctx->sk->type != SOCK_STREAM ||
+           ctx->sk->state != BPF_TCP_LISTEN)
+               goto out;
+
+       /* Reset selection */
+       err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+       if (err)
+               goto out;
+       if (ctx->sk)
+               goto out;
+
+       /* Assign another socket */
+       sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+       if (!sk2)
+               goto out;
+       err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
+       if (err)
+               goto out;
+       if (ctx->sk != sk2)
+               goto out;
+
+       /* Access reassigned ctx->sk fields */
+       if (ctx->sk->family != AF_INET ||
+           ctx->sk->type != SOCK_STREAM ||
+           ctx->sk->state != BPF_TCP_LISTEN)
+               goto out;
+
+       ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+       if (sk1)
+               bpf_sk_release(sk1);
+       if (sk2)
+               bpf_sk_release(sk2);
+       return ret;
+}
+
+/* Check narrow loads from ctx fields that support them.
+ *
+ * Narrow loads of size >= target field size from a non-zero offset
+ * are not covered because they give bogus results, that is the
+ * verifier ignores the offset.
+ */
+SEC("sk_lookup/ctx_narrow_access")
+int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err, family;
+       __u16 *half;
+       __u8 *byte;
+       bool v4;
+
+       v4 = (ctx->family == AF_INET);
+
+       /* Narrow loads from family field */
+       byte = (__u8 *)&ctx->family;
+       half = (__u16 *)&ctx->family;
+       if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
+           byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+               return SK_DROP;
+       if (half[0] != (v4 ? AF_INET : AF_INET6))
+               return SK_DROP;
+
+       byte = (__u8 *)&ctx->protocol;
+       if (byte[0] != IPPROTO_TCP ||
+           byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+               return SK_DROP;
+       half = (__u16 *)&ctx->protocol;
+       if (half[0] != IPPROTO_TCP)
+               return SK_DROP;
+
+       /* Narrow loads from remote_port field. Expect non-0 value. */
+       byte = (__u8 *)&ctx->remote_port;
+       if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+               return SK_DROP;
+       half = (__u16 *)&ctx->remote_port;
+       if (half[0] == 0)
+               return SK_DROP;
+
+       /* Narrow loads from local_port field. Expect DST_PORT. */
+       byte = (__u8 *)&ctx->local_port;
+       if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
+           byte[1] != ((DST_PORT >> 8) & 0xff) ||
+           byte[2] != 0 || byte[3] != 0)
+               return SK_DROP;
+       half = (__u16 *)&ctx->local_port;
+       if (half[0] != DST_PORT)
+               return SK_DROP;
+
+       /* Narrow loads from IPv4 fields */
+       if (v4) {
+               /* Expect non-0.0.0.0 in remote_ip4 */
+               byte = (__u8 *)&ctx->remote_ip4;
+               if (byte[0] == 0 && byte[1] == 0 &&
+                   byte[2] == 0 && byte[3] == 0)
+                       return SK_DROP;
+               half = (__u16 *)&ctx->remote_ip4;
+               if (half[0] == 0 && half[1] == 0)
+                       return SK_DROP;
+
+               /* Expect DST_IP4 in local_ip4 */
+               byte = (__u8 *)&ctx->local_ip4;
+               if (byte[0] != ((DST_IP4 >>  0) & 0xff) ||
+                   byte[1] != ((DST_IP4 >>  8) & 0xff) ||
+                   byte[2] != ((DST_IP4 >> 16) & 0xff) ||
+                   byte[3] != ((DST_IP4 >> 24) & 0xff))
+                       return SK_DROP;
+               half = (__u16 *)&ctx->local_ip4;
+               if (half[0] != ((DST_IP4 >>  0) & 0xffff) ||
+                   half[1] != ((DST_IP4 >> 16) & 0xffff))
+                       return SK_DROP;
+       } else {
+               /* Expect 0.0.0.0 IPs when family != AF_INET */
+               byte = (__u8 *)&ctx->remote_ip4;
+               if (byte[0] != 0 || byte[1] != 0 &&
+                   byte[2] != 0 || byte[3] != 0)
+                       return SK_DROP;
+               half = (__u16 *)&ctx->remote_ip4;
+               if (half[0] != 0 || half[1] != 0)
+                       return SK_DROP;
+
+               byte = (__u8 *)&ctx->local_ip4;
+               if (byte[0] != 0 || byte[1] != 0 &&
+                   byte[2] != 0 || byte[3] != 0)
+                       return SK_DROP;
+               half = (__u16 *)&ctx->local_ip4;
+               if (half[0] != 0 || half[1] != 0)
+                       return SK_DROP;
+       }
+
+       /* Narrow loads from IPv6 fields */
+       if (!v4) {
+               /* Expenct non-:: IP in remote_ip6 */
+               byte = (__u8 *)&ctx->remote_ip6;
+               if (byte[0] == 0 && byte[1] == 0 &&
+                   byte[2] == 0 && byte[3] == 0 &&
+                   byte[4] == 0 && byte[5] == 0 &&
+                   byte[6] == 0 && byte[7] == 0 &&
+                   byte[8] == 0 && byte[9] == 0 &&
+                   byte[10] == 0 && byte[11] == 0 &&
+                   byte[12] == 0 && byte[13] == 0 &&
+                   byte[14] == 0 && byte[15] == 0)
+                       return SK_DROP;
+               half = (__u16 *)&ctx->remote_ip6;
+               if (half[0] == 0 && half[1] == 0 &&
+                   half[2] == 0 && half[3] == 0 &&
+                   half[4] == 0 && half[5] == 0 &&
+                   half[6] == 0 && half[7] == 0)
+                       return SK_DROP;
+
+               /* Expect DST_IP6 in local_ip6 */
+               byte = (__u8 *)&ctx->local_ip6;
+               if (byte[0] != ((DST_IP6[0] >>  0) & 0xff) ||
+                   byte[1] != ((DST_IP6[0] >>  8) & 0xff) ||
+                   byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
+                   byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
+                   byte[4] != ((DST_IP6[1] >>  0) & 0xff) ||
+                   byte[5] != ((DST_IP6[1] >>  8) & 0xff) ||
+                   byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
+                   byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
+                   byte[8] != ((DST_IP6[2] >>  0) & 0xff) ||
+                   byte[9] != ((DST_IP6[2] >>  8) & 0xff) ||
+                   byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
+                   byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
+                   byte[12] != ((DST_IP6[3] >>  0) & 0xff) ||
+                   byte[13] != ((DST_IP6[3] >>  8) & 0xff) ||
+                   byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
+                   byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+                       return SK_DROP;
+               half = (__u16 *)&ctx->local_ip6;
+               if (half[0] != ((DST_IP6[0] >>  0) & 0xffff) ||
+                   half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
+                   half[2] != ((DST_IP6[1] >>  0) & 0xffff) ||
+                   half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
+                   half[4] != ((DST_IP6[2] >>  0) & 0xffff) ||
+                   half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
+                   half[6] != ((DST_IP6[3] >>  0) & 0xffff) ||
+                   half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+                       return SK_DROP;
+       } else {
+               /* Expect :: IPs when family != AF_INET6 */
+               byte = (__u8 *)&ctx->remote_ip6;
+               if (byte[0] != 0 || byte[1] != 0 ||
+                   byte[2] != 0 || byte[3] != 0 ||
+                   byte[4] != 0 || byte[5] != 0 ||
+                   byte[6] != 0 || byte[7] != 0 ||
+                   byte[8] != 0 || byte[9] != 0 ||
+                   byte[10] != 0 || byte[11] != 0 ||
+                   byte[12] != 0 || byte[13] != 0 ||
+                   byte[14] != 0 || byte[15] != 0)
+                       return SK_DROP;
+               half = (__u16 *)&ctx->remote_ip6;
+               if (half[0] != 0 || half[1] != 0 ||
+                   half[2] != 0 || half[3] != 0 ||
+                   half[4] != 0 || half[5] != 0 ||
+                   half[6] != 0 || half[7] != 0)
+                       return SK_DROP;
+
+               byte = (__u8 *)&ctx->local_ip6;
+               if (byte[0] != 0 || byte[1] != 0 ||
+                   byte[2] != 0 || byte[3] != 0 ||
+                   byte[4] != 0 || byte[5] != 0 ||
+                   byte[6] != 0 || byte[7] != 0 ||
+                   byte[8] != 0 || byte[9] != 0 ||
+                   byte[10] != 0 || byte[11] != 0 ||
+                   byte[12] != 0 || byte[13] != 0 ||
+                   byte[14] != 0 || byte[15] != 0)
+                       return SK_DROP;
+               half = (__u16 *)&ctx->local_ip6;
+               if (half[0] != 0 || half[1] != 0 ||
+                   half[2] != 0 || half[3] != 0 ||
+                   half[4] != 0 || half[5] != 0 ||
+                   half[6] != 0 || half[7] != 0)
+                       return SK_DROP;
+       }
+
+       /* Success, redirect to KEY_SERVER_B */
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+       if (sk) {
+               bpf_sk_assign(ctx, sk, 0);
+               bpf_sk_release(sk);
+       }
+       return SK_PASS;
+}
+
+/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
+SEC("sk_lookup/sk_assign_esocknosupport")
+int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err, ret;
+
+       ret = SK_DROP;
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               goto out;
+
+       err = bpf_sk_assign(ctx, sk, 0);
+       if (err != -ESOCKTNOSUPPORT) {
+               bpf_printk("sk_assign returned %d, expected %d\n",
+                          err, -ESOCKTNOSUPPORT);
+               goto out;
+       }
+
+       ret = SK_PASS; /* Success, pass to regular lookup */
+out:
+       if (sk)
+               bpf_sk_release(sk);
+       return ret;
+}
+
+SEC("sk_lookup/multi_prog_pass1")
+int multi_prog_pass1(struct bpf_sk_lookup *ctx)
+{
+       bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+       return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_pass2")
+int multi_prog_pass2(struct bpf_sk_lookup *ctx)
+{
+       bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+       return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_drop1")
+int multi_prog_drop1(struct bpf_sk_lookup *ctx)
+{
+       bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+       return SK_DROP;
+}
+
+SEC("sk_lookup/multi_prog_drop2")
+int multi_prog_drop2(struct bpf_sk_lookup *ctx)
+{
+       bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+       return SK_DROP;
+}
+
+static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
+{
+       struct bpf_sock *sk;
+       int err;
+
+       sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+       if (!sk)
+               return SK_DROP;
+
+       err = bpf_sk_assign(ctx, sk, 0);
+       bpf_sk_release(sk);
+       if (err)
+               return SK_DROP;
+
+       return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir1")
+int multi_prog_redir1(struct bpf_sk_lookup *ctx)
+{
+       int ret;
+
+       ret = select_server_a(ctx);
+       bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+       return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir2")
+int multi_prog_redir2(struct bpf_sk_lookup *ctx)
+{
+       int ret;
+
+       ret = select_server_a(ctx);
+       bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

new file mode 100644 (file)

index 0000000..59ee4f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO     1
+
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+       __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+       return bpf_redirect_map(&cpu_map, 1, 0);
+}
+
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+       if (ctx->ingress_ifindex == IFINDEX_LO)
+               return XDP_DROP;
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh

index 9df0d2a..4f6444b 100755 (executable)
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then
         exit $ksft_skip
  fi
  
-SRC_TREE=../../../../
+if [ "$building_out_of_srctree" ]; then
+       # We are in linux-build/kselftest/bpf
+       OUTPUT=../../
+else
+       # We are in linux/tools/testing/selftests/bpf
+       OUTPUT=../../../../
+fi
  
  test_run()
  {
@@ -19,8 +25,8 @@ test_run()
  
         echo "[ JIT enabled:$1 hardened:$2 ]"
         dmesg -C
-       if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
-               insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+       if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
+               insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
                 if [ $? -ne 0 ]; then
                         rc=1
                 fi
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh

index 785eabf..5620919 100755 (executable)
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
  ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
  ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
  sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -INT $!
+kill -TERM $!
  
  if [[ $(< $TMP_FILE) != "foobar" ]]; then
         exit 1
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c

new file mode 100644 (file)

index 0000000..2ad5f97
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -0,0 +1,492 @@
+{
+       "valid 1,2,4,8-byte reads from bpf_sk_lookup",
+       .insns = {
+               /* 1-byte read from family field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family) + 3),
+               /* 2-byte read from family field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family) + 2),
+               /* 4-byte read from family field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family)),
+
+               /* 1-byte read from protocol field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol) + 3),
+               /* 2-byte read from protocol field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol) + 2),
+               /* 4-byte read from protocol field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol)),
+
+               /* 1-byte read from remote_ip4 field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
+               /* 2-byte read from remote_ip4 field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+               /* 4-byte read from remote_ip4 field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4)),
+
+               /* 1-byte read from remote_ip6 field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
+               /* 2-byte read from remote_ip6 field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+               /* 4-byte read from remote_ip6 field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6)),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+
+               /* 1-byte read from remote_port field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port) + 3),
+               /* 2-byte read from remote_port field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port) + 2),
+               /* 4-byte read from remote_port field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port)),
+
+               /* 1-byte read from local_ip4 field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4) + 3),
+               /* 2-byte read from local_ip4 field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+               /* 4-byte read from local_ip4 field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4)),
+
+               /* 1-byte read from local_ip6 field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 3),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 5),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 7),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 9),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 11),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 13),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 15),
+               /* 2-byte read from local_ip6 field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+               /* 4-byte read from local_ip6 field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6)),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+
+               /* 1-byte read from local_port field */
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port)),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port) + 1),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port) + 2),
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port) + 3),
+               /* 2-byte read from local_port field */
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port)),
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port) + 2),
+               /* 4-byte read from local_port field */
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port)),
+
+               /* 8-byte read from sk field */
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, sk)),
+
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+{
+       "invalid 8-byte read from bpf_sk_lookup family field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, family)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup protocol field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, protocol)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip4)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_ip6)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup remote_port field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, remote_port)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup local_ip4 field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip4)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup local_ip6 field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_ip6)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 8-byte read from bpf_sk_lookup local_port field",
+       .insns = {
+               BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, local_port)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
+{
+       "invalid 4-byte read from bpf_sk_lookup sk field",
+       .insns = {
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, sk)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 2-byte read from bpf_sk_lookup sk field",
+       .insns = {
+               BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, sk)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 1-byte read from bpf_sk_lookup sk field",
+       .insns = {
+               BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+                           offsetof(struct bpf_sk_lookup, sk)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* out of bounds and unaligned reads from bpf_sk_lookup */
+{
+       "invalid 4-byte read past end of bpf_sk_lookup",
+       .insns = {
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                           sizeof(struct bpf_sk_lookup)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
+       .insns = {
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
+       .insns = {
+               BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* in-bound and out-of-bound writes to bpf_sk_lookup */
+{
+       "invalid 8-byte write to bpf_sk_lookup",
+       .insns = {
+               BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+               BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 4-byte write to bpf_sk_lookup",
+       .insns = {
+               BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+               BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 2-byte write to bpf_sk_lookup",
+       .insns = {
+               BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+               BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 1-byte write to bpf_sk_lookup",
+       .insns = {
+               BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+               BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+       "invalid 4-byte write past end of bpf_sk_lookup",
+       .insns = {
+               BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+               BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+                           sizeof(struct bpf_sk_lookup)),
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "invalid bpf_context access",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
+},
author	David S. Miller <davem@davemloft.net>
	Wed, 22 Jul 2020 19:34:55 +0000 (12:34 -0700)
committer	David S. Miller <davem@davemloft.net>
	Wed, 22 Jul 2020 19:35:33 +0000 (12:35 -0700)
arch/riscv/net/bpf_jit.h		patch \| blob \| history
arch/riscv/net/bpf_jit_comp32.c		patch \| blob \| history
arch/riscv/net/bpf_jit_comp64.c		patch \| blob \| history
arch/riscv/net/bpf_jit_core.c		patch \| blob \| history
arch/s390/net/bpf_jit_comp.c		patch \| blob \| history
include/linux/bpf-netns.h		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/linux/bpf_types.h		patch \| blob \| history
include/linux/btf_ids.h		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/net/xdp.h		patch \| blob \| history
include/trace/events/xdp.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/btf.c		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/cpumap.c		patch \| blob \| history
kernel/bpf/map_iter.c		patch \| blob \| history
kernel/bpf/net_namespace.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/bpf/task_iter.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
lib/test_bpf.c		patch \| blob \| history
net/core/dev.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/ipv4/inet_hashtables.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/ipv4/udp.c		patch \| blob \| history
net/ipv6/inet6_hashtables.c		patch \| blob \| history
net/ipv6/route.c		patch \| blob \| history
net/ipv6/udp.c		patch \| blob \| history
net/netlink/af_netlink.c		patch \| blob \| history
samples/bpf/offwaketime_kern.c		patch \| blob \| history
samples/bpf/test_overhead_kprobe_kern.c		patch \| blob \| history
samples/bpf/tracex1_kern.c		patch \| blob \| history
samples/bpf/tracex5_kern.c		patch \| blob \| history
samples/bpf/xdp_redirect_cpu_kern.c		patch \| blob \| history
samples/bpf/xdp_redirect_cpu_user.c		patch \| blob \| history
scripts/bpf_helpers_doc.py		patch \| blob \| history
tools/bpf/bpftool/Documentation/bpftool-prog.rst		patch \| blob \| history
tools/bpf/bpftool/bash-completion/bpftool		patch \| blob \| history
tools/bpf/bpftool/common.c		patch \| blob \| history
tools/bpf/bpftool/gen.c		patch \| blob \| history
tools/bpf/bpftool/main.h		patch \| blob \| history
tools/bpf/bpftool/prog.c		patch \| blob \| history
tools/bpf/bpftool/skeleton/pid_iter.bpf.c		patch \| blob \| history
tools/include/linux/btf_ids.h		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/lib/bpf/bpf_helpers.h		patch \| blob \| history
tools/lib/bpf/libbpf.c		patch \| blob \| history
tools/lib/bpf/libbpf.h		patch \| blob \| history
tools/lib/bpf/libbpf.map		patch \| blob \| history
tools/lib/bpf/libbpf_probes.c		patch \| blob \| history
tools/testing/selftests/bpf/network_helpers.c		patch \| blob \| history
tools/testing/selftests/bpf/network_helpers.h		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/resolve_btfids.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sk_lookup.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/bpf_iter_netlink.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_udp4.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_udp6.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_sk_lookup.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_kmod.sh		patch \| blob \| history
tools/testing/selftests/bpf/test_lwt_seg6local.sh		patch \| blob \| history
tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c	[new file with mode: 0644]	patch \| blob