Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

author David S. Miller <davem@davemloft.net>

Sat, 16 Mar 2019 19:20:08 +0000 (12:20 -0700)

committer David S. Miller <davem@davemloft.net>

Sat, 16 Mar 2019 19:20:08 +0000 (12:20 -0700)
author David S. Miller <davem@davemloft.net>
Sat, 16 Mar 2019 19:20:08 +0000 (12:20 -0700)
committer David S. Miller <davem@davemloft.net>
Sat, 16 Mar 2019 19:20:08 +0000 (12:20 -0700)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h

index c5698a5..23f7ed7 100644 (file)
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -302,6 +302,7 @@
  /* Misc instructions for BPF compiler */
  #define PPC_INST_LBZ                   0x88000000
  #define PPC_INST_LD                    0xe8000000
+#define PPC_INST_LDX                   0x7c00002a
  #define PPC_INST_LHZ                   0xa0000000
  #define PPC_INST_LWZ                   0x80000000
  #define PPC_INST_LHBRX                 0x7c00062c
@@ -309,6 +310,7 @@
  #define PPC_INST_STB                   0x98000000
  #define PPC_INST_STH                   0xb0000000
  #define PPC_INST_STD                   0xf8000000
+#define PPC_INST_STDX                  0x7c00012a
  #define PPC_INST_STDU                  0xf8000001
  #define PPC_INST_STW                   0x90000000
  #define PPC_INST_STWU                  0x94000000
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h

index 549e949..dcac377 100644 (file)
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,6 +51,8 @@
  #define PPC_LIS(r, i)          PPC_ADDIS(r, 0, i)
  #define PPC_STD(r, base, i)    EMIT(PPC_INST_STD | ___PPC_RS(r) |            \
                                      ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_STDX(r, base, b)   EMIT(PPC_INST_STDX | ___PPC_RS(r) |           \
+                                    ___PPC_RA(base) | ___PPC_RB(b))
  #define PPC_STDU(r, base, i)   EMIT(PPC_INST_STDU | ___PPC_RS(r) |           \
                                      ___PPC_RA(base) | ((i) & 0xfffc))
  #define PPC_STW(r, base, i)    EMIT(PPC_INST_STW | ___PPC_RS(r) |            \
@@ -65,7 +67,9 @@
  #define PPC_LBZ(r, base, i)    EMIT(PPC_INST_LBZ | ___PPC_RT(r) |            \
                                      ___PPC_RA(base) | IMM_L(i))
  #define PPC_LD(r, base, i)     EMIT(PPC_INST_LD | ___PPC_RT(r) |             \
-                                    ___PPC_RA(base) | IMM_L(i))
+                                    ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_LDX(r, base, b)    EMIT(PPC_INST_LDX | ___PPC_RT(r) |            \
+                                    ___PPC_RA(base) | ___PPC_RB(b))
  #define PPC_LWZ(r, base, i)    EMIT(PPC_INST_LWZ | ___PPC_RT(r) |            \
                                      ___PPC_RA(base) | IMM_L(i))
  #define PPC_LHZ(r, base, i)    EMIT(PPC_INST_LHZ | ___PPC_RT(r) |            \
@@ -85,17 +89,6 @@
                                         ___PPC_RA(a) | ___PPC_RB(b))
  #define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) |          \
                                         ___PPC_RA(a) | ___PPC_RB(b))
-
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
-#else
-#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
-#endif
-
  #define PPC_CMPWI(a, i)                EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
  #define PPC_CMPDI(a, i)                EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
  #define PPC_CMPW(a, b)         EMIT(PPC_INST_CMPW | ___PPC_RA(a) |           \
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h

index dc50a8d..21744d8 100644 (file)
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -122,6 +122,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
  #define PPC_NTOHS_OFFS(r, base, i)     PPC_LHZ_OFFS(r, base, i)
  #endif
  
+#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
+#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
+
  #define SEEN_DATAREF 0x10000 /* might call external helpers */
  #define SEEN_XREG    0x20000 /* X reg is used */
  #define SEEN_MEM     0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h

index 3609be4..47f441f 100644 (file)
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -68,6 +68,26 @@ static const int b2p[] = {
  /* PPC NVR range -- update this if we ever use NVRs below r27 */
  #define BPF_PPC_NVR_MIN                27
  
+/*
+ * WARNING: These can use TMP_REG_2 if the offset is not at word boundary,
+ * so ensure that it isn't in use already.
+ */
+#define PPC_BPF_LL(r, base, i) do {                                          \
+                               if ((i) % 4) {                                \
+                                       PPC_LI(b2p[TMP_REG_2], (i));          \
+                                       PPC_LDX(r, base, b2p[TMP_REG_2]);     \
+                               } else                                        \
+                                       PPC_LD(r, base, i);                   \
+                               } while(0)
+#define PPC_BPF_STL(r, base, i) do {                                         \
+                               if ((i) % 4) {                                \
+                                       PPC_LI(b2p[TMP_REG_2], (i));          \
+                                       PPC_STDX(r, base, b2p[TMP_REG_2]);    \
+                               } else                                        \
+                                       PPC_STD(r, base, i);                  \
+                               } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
+
  #define SEEN_FUNC      0x1000 /* might call external helpers */
  #define SEEN_STACK     0x2000 /* uses BPF stack */
  #define SEEN_TAILCALL  0x4000 /* uses tail calls */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c

index 4194d3c..21a1dcd 100644 (file)
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -252,7 +252,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
          * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
          *   goto out;
          */
-       PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+       PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
         PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
         PPC_BCC(COND_GT, out);
  
@@ -265,7 +265,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
         /* prog = array->ptrs[index]; */
         PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
         PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
-       PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+       PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
  
         /*
          * if (prog == NULL)
@@ -275,7 +275,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
         PPC_BCC(COND_EQ, out);
  
         /* goto *(prog->bpf_func + prologue_size); */
-       PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
+       PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
  #ifdef PPC64_ELF_ABI_v1
         /* skip past the function descriptor */
         PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
@@ -606,7 +606,7 @@ bpf_alu32_trunc:
                                  * the instructions generated will remain the
                                  * same across all passes
                                  */
-                               PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
+                               PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
                                 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
                                 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
                                 break;
@@ -662,7 +662,7 @@ emit_clear:
                                 PPC_LI32(b2p[TMP_REG_1], imm);
                                 src_reg = b2p[TMP_REG_1];
                         }
-                       PPC_STD(src_reg, dst_reg, off);
+                       PPC_BPF_STL(src_reg, dst_reg, off);
                         break;
  
                 /*
@@ -709,7 +709,7 @@ emit_clear:
                         break;
                 /* dst = *(u64 *)(ul) (src + off) */
                 case BPF_LDX | BPF_MEM | BPF_DW:
-                       PPC_LD(dst_reg, src_reg, off);
+                       PPC_BPF_LL(dst_reg, src_reg, off);
                         break;
  
                 /*
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index a2132e0..f02367f 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,6 @@ enum bpf_arg_type {
  
         ARG_PTR_TO_CTX,         /* pointer to context */
         ARG_ANYTHING,           /* any (initialized) argument is ok */
-       ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock */
         ARG_PTR_TO_SPIN_LOCK,   /* pointer to bpf_spin_lock */
         ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
  };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index 69f7a34..7d8228d 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -66,6 +66,46 @@ struct bpf_reg_state {
          * same reference to the socket, to determine proper reference freeing.
          */
         u32 id;
+       /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
+        * from a pointer-cast helper, bpf_sk_fullsock() and
+        * bpf_tcp_sock().
+        *
+        * Consider the following where "sk" is a reference counted
+        * pointer returned from "sk = bpf_sk_lookup_tcp();":
+        *
+        * 1: sk = bpf_sk_lookup_tcp();
+        * 2: if (!sk) { return 0; }
+        * 3: fullsock = bpf_sk_fullsock(sk);
+        * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
+        * 5: tp = bpf_tcp_sock(fullsock);
+        * 6: if (!tp) { bpf_sk_release(sk); return 0; }
+        * 7: bpf_sk_release(sk);
+        * 8: snd_cwnd = tp->snd_cwnd;  // verifier will complain
+        *
+        * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
+        * "tp" ptr should be invalidated also.  In order to do that,
+        * the reg holding "fullsock" and "sk" need to remember
+        * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
+        * such that the verifier can reset all regs which have
+        * ref_obj_id matching the sk_reg->id.
+        *
+        * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
+        * sk_reg->id will stay as NULL-marking purpose only.
+        * After NULL-marking is done, sk_reg->id can be reset to 0.
+        *
+        * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
+        * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
+        *
+        * After "tp = bpf_tcp_sock(fullsock);" at line 5,
+        * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
+        * which is the same as sk_reg->ref_obj_id.
+        *
+        * From the verifier perspective, if sk, fullsock and tp
+        * are not NULL, they are the same ptr with different
+        * reg->type.  In particular, bpf_sk_release(tp) is also
+        * allowed and has the same effect as bpf_sk_release(sk).
+        */
+       u32 ref_obj_id;
         /* For scalar types (SCALAR_VALUE), this represents our knowledge of
          * the actual value.
          * For pointer types, this represents the variable part of the offset
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h

index 61cf7db..d074b6d 100644 (file)
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -36,7 +36,6 @@ struct xdp_umem {
         u32 headroom;
         u32 chunk_size_nohr;
         struct user_struct *user;
-       struct pid *pid;
         unsigned long address;
         refcount_t users;
         struct work_struct work;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 3c38ac9..929c8e5 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
- *     Description
- *             Push an element *value* in *map*. *flags* is one of:
- *
- *             **BPF_EXIST**
- *             If the queue/stack is full, the oldest element is removed to
- *             make room for this.
- *     Return
- *             0 on success, or a negative error in case of failure.
- *
   * int bpf_probe_read(void *dst, u32 size, const void *src)
   *     Description
   *             For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
   * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
   *     Description
   *             Equivalent to bpf_get_socket_cookie() helper that accepts
- *             *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ *             *skb*, but gets socket from **struct bpf_sock_addr** context.
   *     Return
   *             A 8-byte long non-decreasing number.
   *
   * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
   *     Description
   *             Equivalent to bpf_get_socket_cookie() helper that accepts
- *             *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ *             *skb*, but gets socket from **struct bpf_sock_ops** context.
   *     Return
   *             A 8-byte long non-decreasing number.
   *
@@ -2098,52 +2088,52 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * int bpf_rc_repeat(void *ctx)
   *     Description
   *             This helper is used in programs implementing IR decoding, to
- *             report a successfully decoded key press with *scancode*,
- *             *toggle* value in the given *protocol*. The scancode will be
- *             translated to a keycode using the rc keymap, and reported as
- *             an input key down event. After a period a key up event is
- *             generated. This period can be extended by calling either
- *             **bpf_rc_keydown**\ () again with the same values, or calling
- *             **bpf_rc_repeat**\ ().
+ *             report a successfully decoded repeat key message. This delays
+ *             the generation of a key up event for previously generated
+ *             key down event.
   *
- *             Some protocols include a toggle bit, in case the button was
- *             released and pressed again between consecutive scancodes.
+ *             Some IR protocols like NEC have a special IR message for
+ *             repeating last button, for when a button is held down.
   *
   *             The *ctx* should point to the lirc sample as passed into
   *             the program.
   *
- *             The *protocol* is the decoded protocol number (see
- *             **enum rc_proto** for some predefined values).
- *
   *             This helper is only available is the kernel was compiled with
   *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
   *             "**y**".
   *     Return
   *             0
   *
- * int bpf_rc_repeat(void *ctx)
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
   *     Description
   *             This helper is used in programs implementing IR decoding, to
- *             report a successfully decoded repeat key message. This delays
- *             the generation of a key up event for previously generated
- *             key down event.
+ *             report a successfully decoded key press with *scancode*,
+ *             *toggle* value in the given *protocol*. The scancode will be
+ *             translated to a keycode using the rc keymap, and reported as
+ *             an input key down event. After a period a key up event is
+ *             generated. This period can be extended by calling either
+ *             **bpf_rc_keydown**\ () again with the same values, or calling
+ *             **bpf_rc_repeat**\ ().
   *
- *             Some IR protocols like NEC have a special IR message for
- *             repeating last button, for when a button is held down.
+ *             Some protocols include a toggle bit, in case the button was
+ *             released and pressed again between consecutive scancodes.
   *
   *             The *ctx* should point to the lirc sample as passed into
   *             the program.
   *
+ *             The *protocol* is the decoded protocol number (see
+ *             **enum rc_proto** for some predefined values).
+ *
   *             This helper is only available is the kernel was compiled with
   *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
   *             "**y**".
   *     Return
   *             0
   *
- * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
   *     Description
   *             Return the cgroup v2 id of the socket associated with the *skb*.
   *             This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
   *     Return
   *             The id is returned or 0 in case the id could not be retrieved.
   *
- * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
- *     Description
- *             Return id of cgroup v2 that is ancestor of cgroup associated
- *             with the *skb* at the *ancestor_level*.  The root cgroup is at
- *             *ancestor_level* zero and each step down the hierarchy
- *             increments the level. If *ancestor_level* == level of cgroup
- *             associated with *skb*, then return value will be same as that
- *             of **bpf_skb_cgroup_id**\ ().
- *
- *             The helper is useful to implement policies based on cgroups
- *             that are upper in hierarchy than immediate cgroup associated
- *             with *skb*.
- *
- *             The format of returned id and helper limitations are same as in
- *             **bpf_skb_cgroup_id**\ ().
- *     Return
- *             The id is returned or 0 in case the id could not be retrieved.
- *
   * u64 bpf_get_current_cgroup_id(void)
   *     Return
   *             A 64-bit integer containing the current cgroup id based
   *             on the cgroup within which the current task is running.
   *
- * void* get_local_storage(void *map, u64 flags)
+ * void *bpf_get_local_storage(void *map, u64 flags)
   *     Description
   *             Get the pointer to the local storage area.
   *             The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *     Description
+ *             Return id of cgroup v2 that is ancestor of cgroup associated
+ *             with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *             *ancestor_level* zero and each step down the hierarchy
+ *             increments the level. If *ancestor_level* == level of cgroup
+ *             associated with *skb*, then return value will be same as that
+ *             of **bpf_skb_cgroup_id**\ ().
+ *
+ *             The helper is useful to implement policies based on cgroups
+ *             that are upper in hierarchy than immediate cgroup associated
+ *             with *skb*.
+ *
+ *             The format of returned id and helper limitations are same as in
+ *             **bpf_skb_cgroup_id**\ ().
+ *     Return
+ *             The id is returned or 0 in case the id could not be retrieved.
+ *
   * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
   *     Description
   *             Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ *     Description
+ *             Push an element *value* in *map*. *flags* is one of:
+ *
+ *             **BPF_EXIST**
+ *                     If the queue/stack is full, the oldest element is
+ *                     removed to make room for this.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
   * int bpf_map_pop_elem(struct bpf_map *map, void *value)
   *     Description
   *             Pop an element from *map*.
@@ -2343,29 +2343,94 @@ union bpf_attr {
   *     Return
   *             0
   *
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ *     Description
+ *             Acquire a spinlock represented by the pointer *lock*, which is
+ *             stored as part of a value of a map. Taking the lock allows to
+ *             safely update the rest of the fields in that value. The
+ *             spinlock can (and must) later be released with a call to
+ *             **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ *             Spinlocks in BPF programs come with a number of restrictions
+ *             and constraints:
+ *
+ *             * **bpf_spin_lock** objects are only allowed inside maps of
+ *               types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ *               list could be extended in the future).
+ *             * BTF description of the map is mandatory.
+ *             * The BPF program can take ONE lock at a time, since taking two
+ *               or more could cause dead locks.
+ *             * Only one **struct bpf_spin_lock** is allowed per map element.
+ *             * When the lock is taken, calls (either BPF to BPF or helpers)
+ *               are not allowed.
+ *             * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ *               allowed inside a spinlock-ed region.
+ *             * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ *               the lock, on all execution paths, before it returns.
+ *             * The BPF program can access **struct bpf_spin_lock** only via
+ *               the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ *               helpers. Loading or storing data into the **struct
+ *               bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ *             * To use the **bpf_spin_lock**\ () helper, the BTF description
+ *               of the map value must be a struct and have **struct
+ *               bpf_spin_lock** *anyname*\ **;** field at the top level.
+ *               Nested lock inside another struct is not allowed.
+ *             * The **struct bpf_spin_lock** *lock* field in a map value must
+ *               be aligned on a multiple of 4 bytes in that value.
+ *             * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ *               the **bpf_spin_lock** field to user space.
+ *             * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ *               a BPF program, do not update the **bpf_spin_lock** field.
+ *             * **bpf_spin_lock** cannot be on the stack or inside a
+ *               networking packet (it can only be inside of a map values).
+ *             * **bpf_spin_lock** is available to root only.
+ *             * Tracing programs and socket filter programs cannot use
+ *               **bpf_spin_lock**\ () due to insufficient preemption checks
+ *               (but this may change in the future).
+ *             * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ *     Return
+ *             0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ *     Description
+ *             Release the *lock* previously locked by a call to
+ *             **bpf_spin_lock**\ (\ *lock*\ ).
+ *     Return
+ *             0
+ *
   * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
   *     Description
   *             This helper gets a **struct bpf_sock** pointer such
- *             that all the fields in bpf_sock can be accessed.
+ *             that all the fields in this **bpf_sock** can be accessed.
   *     Return
- *             A **struct bpf_sock** pointer on success, or NULL in
+ *             A **struct bpf_sock** pointer on success, or **NULL** in
   *             case of failure.
   *
   * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
   *     Description
   *             This helper gets a **struct bpf_tcp_sock** pointer from a
   *             **struct bpf_sock** pointer.
- *
   *     Return
- *             A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *             A **struct bpf_tcp_sock** pointer on success, or **NULL** in
   *             case of failure.
   *
   * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
- *     Description
- *             Sets ECN of IP header to ce (congestion encountered) if
- *             current value is ect (ECN capable). Works with IPv6 and IPv4.
- *     Return
- *             1 if set, 0 if not set.
+ *     Description
+ *             Set ECN (Explicit Congestion Notification) field of IP header
+ *             to **CE** (Congestion Encountered) if current value is **ECT**
+ *             (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ *             and IPv4.
+ *     Return
+ *             1 if the **CE** flag is set (either by the current helper call
+ *             or because it was already present), 0 if it is not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ *     Description
+ *             Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ *             **bpf_sk_release**\ () is unnecessary and not allowed.
+ *     Return
+ *             A **struct bpf_sock** pointer on success, or **NULL** in
+ *             case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -2465,7 +2530,8 @@ union bpf_attr {
         FN(spin_unlock),                \
         FN(sk_fullsock),                \
         FN(tcp_sock),                   \
-       FN(skb_ecn_set_ce),
+       FN(skb_ecn_set_ce),             \
+       FN(get_listener_sock),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index ce166a0..86f9cd5 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
         int access_size;
         s64 msize_smax_value;
         u64 msize_umax_value;
-       int ptr_id;
+       int ref_obj_id;
         int func_id;
  };
  
@@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
                type == PTR_TO_TCP_SOCK_OR_NULL;
  }
  
-static bool type_is_refcounted(enum bpf_reg_type type)
-{
-       return type == PTR_TO_SOCKET;
-}
-
-static bool type_is_refcounted_or_null(enum bpf_reg_type type)
-{
-       return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
-}
-
-static bool reg_is_refcounted(const struct bpf_reg_state *reg)
-{
-       return type_is_refcounted(reg->type);
-}
-
  static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
  {
         return reg->type == PTR_TO_MAP_VALUE &&
                 map_value_has_spin_lock(reg->map_ptr);
  }
  
-static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
  {
-       return type_is_refcounted_or_null(reg->type);
-}
-
-static bool arg_type_is_refcounted(enum bpf_arg_type type)
-{
-       return type == ARG_PTR_TO_SOCKET;
+       return type == ARG_PTR_TO_SOCK_COMMON;
  }
  
  /* Determine whether the function releases some resources allocated by another
@@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
                 func_id == BPF_FUNC_sk_lookup_udp;
  }
  
+static bool is_ptr_cast_function(enum bpf_func_id func_id)
+{
+       return func_id == BPF_FUNC_tcp_sock ||
+               func_id == BPF_FUNC_sk_fullsock;
+}
+
  /* string representation of 'enum bpf_reg_type' */
  static const char * const reg_type_str[] = {
         [NOT_INIT]              = "?",
@@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
                         if (t == PTR_TO_STACK)
                                 verbose(env, ",call_%d", func(env, reg)->callsite);
                 } else {
-                       verbose(env, "(id=%d", reg->id);
+                       verbose(env, "(id=%d ref_obj_id=%d", reg->id,
+                               reg->ref_obj_id);
                         if (t != SCALAR_VALUE)
                                 verbose(env, ",off=%d", reg->off);
                         if (type_is_pkt_pointer(t))
@@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
                 if (!type_is_sk_pointer(type))
                         goto err_type;
-       } else if (arg_type == ARG_PTR_TO_SOCKET) {
-               expected_type = PTR_TO_SOCKET;
-               if (type != expected_type)
-                       goto err_type;
-               if (meta->ptr_id || !reg->id) {
-                       verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
-                               meta->ptr_id, reg->id);
-                       return -EFAULT;
+               if (reg->ref_obj_id) {
+                       if (meta->ref_obj_id) {
+                               verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+                                       regno, reg->ref_obj_id,
+                                       meta->ref_obj_id);
+                               return -EFAULT;
+                       }
+                       meta->ref_obj_id = reg->ref_obj_id;
                 }
-               meta->ptr_id = reg->id;
         } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
                 if (meta->func_id == BPF_FUNC_spin_lock) {
                         if (process_spin_lock(env, regno, true))
@@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
         return true;
  }
  
-static bool check_refcount_ok(const struct bpf_func_proto *fn)
+static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
  {
         int count = 0;
  
-       if (arg_type_is_refcounted(fn->arg1_type))
+       if (arg_type_may_be_refcounted(fn->arg1_type))
                 count++;
-       if (arg_type_is_refcounted(fn->arg2_type))
+       if (arg_type_may_be_refcounted(fn->arg2_type))
                 count++;
-       if (arg_type_is_refcounted(fn->arg3_type))
+       if (arg_type_may_be_refcounted(fn->arg3_type))
                 count++;
-       if (arg_type_is_refcounted(fn->arg4_type))
+       if (arg_type_may_be_refcounted(fn->arg4_type))
                 count++;
-       if (arg_type_is_refcounted(fn->arg5_type))
+       if (arg_type_may_be_refcounted(fn->arg5_type))
                 count++;
  
+       /* A reference acquiring function cannot acquire
+        * another refcounted ptr.
+        */
+       if (is_acquire_function(func_id) && count)
+               return false;
+
         /* We only support one arg being unreferenced at the moment,
          * which is sufficient for the helper functions we have right now.
          */
         return count <= 1;
  }
  
-static int check_func_proto(const struct bpf_func_proto *fn)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
  {
         return check_raw_mode_ok(fn) &&
                check_arg_pair_ok(fn) &&
-              check_refcount_ok(fn) ? 0 : -EINVAL;
+              check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
  }
  
  /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
  }
  
  static void release_reg_references(struct bpf_verifier_env *env,
-                                  struct bpf_func_state *state, int id)
+                                  struct bpf_func_state *state,
+                                  int ref_obj_id)
  {
         struct bpf_reg_state *regs = state->regs, *reg;
         int i;
  
         for (i = 0; i < MAX_BPF_REG; i++)
-               if (regs[i].id == id)
+               if (regs[i].ref_obj_id == ref_obj_id)
                         mark_reg_unknown(env, regs, i);
  
         bpf_for_each_spilled_reg(i, state, reg) {
                 if (!reg)
                         continue;
-               if (reg_is_refcounted(reg) && reg->id == id)
+               if (reg->ref_obj_id == ref_obj_id)
                         __mark_reg_unknown(reg);
         }
  }
@@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
   * resources. Identify all copies of the same pointer and clear the reference.
   */
  static int release_reference(struct bpf_verifier_env *env,
-                            struct bpf_call_arg_meta *meta)
+                            int ref_obj_id)
  {
         struct bpf_verifier_state *vstate = env->cur_state;
+       int err;
         int i;
  
+       err = release_reference_state(cur_func(env), ref_obj_id);
+       if (err)
+               return err;
+
         for (i = 0; i <= vstate->curframe; i++)
-               release_reg_references(env, vstate->frame[i], meta->ptr_id);
+               release_reg_references(env, vstate->frame[i], ref_obj_id);
  
-       return release_reference_state(cur_func(env), meta->ptr_id);
+       return 0;
  }
  
  static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         memset(&meta, 0, sizeof(meta));
         meta.pkt_access = fn->pkt_access;
  
-       err = check_func_proto(fn);
+       err = check_func_proto(fn, func_id);
         if (err) {
                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
                         func_id_name(func_id), func_id);
@@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                         return err;
                 }
         } else if (is_release_function(func_id)) {
-               err = release_reference(env, &meta);
+               err = release_reference(env, meta.ref_obj_id);
                 if (err) {
                         verbose(env, "func %s#%d reference has not been acquired before\n",
                                 func_id_name(func_id), func_id);
@@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
  
                         if (id < 0)
                                 return id;
-                       /* For release_reference() */
+                       /* For mark_ptr_or_null_reg() */
                         regs[BPF_REG_0].id = id;
+                       /* For release_reference() */
+                       regs[BPF_REG_0].ref_obj_id = id;
                 } else {
                         /* For mark_ptr_or_null_reg() */
                         regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                 return -EINVAL;
         }
  
+       if (is_ptr_cast_function(func_id))
+               /* For release_reference() */
+               regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+
         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
  
         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
                 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
                         reg->type = PTR_TO_TCP_SOCK;
                 }
-               if (is_null || !(reg_is_refcounted(reg) ||
-                                reg_may_point_to_spin_lock(reg))) {
-                       /* We don't need id from this point onwards anymore,
-                        * thus we should better reset it, so that state
-                        * pruning has chances to take effect.
+               if (is_null) {
+                       /* We don't need id and ref_obj_id from this point
+                        * onwards anymore, thus we should better reset it,
+                        * so that state pruning has chances to take effect.
+                        */
+                       reg->id = 0;
+                       reg->ref_obj_id = 0;
+               } else if (!reg_may_point_to_spin_lock(reg)) {
+                       /* For not-NULL ptr, reg->ref_obj_id will be reset
+                        * in release_reg_references().
+                        *
+                        * reg->id is still used by spin_lock ptr. Other
+                        * than spin_lock ptr type, reg->id can be reset.
                          */
                         reg->id = 0;
                 }
@@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
  {
         struct bpf_func_state *state = vstate->frame[vstate->curframe];
         struct bpf_reg_state *reg, *regs = state->regs;
+       u32 ref_obj_id = regs[regno].ref_obj_id;
         u32 id = regs[regno].id;
         int i, j;
  
-       if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
-               release_reference_state(state, id);
+       if (ref_obj_id && ref_obj_id == id && is_null)
+               /* regs[regno] is in the " == NULL" branch.
+                * No one could have freed the reference state before
+                * doing the NULL check.
+                */
+               WARN_ON_ONCE(release_reference_state(state, id));
  
         for (i = 0; i < MAX_BPF_REG; i++)
                 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
diff --git a/net/core/filter.c b/net/core/filter.c

index f274620..647c63a 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
  
  BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
  {
-       sk = sk_to_full_sk(sk);
-
         return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
  }
  
@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
         .func           = bpf_sk_release,
         .gpl_only       = false,
         .ret_type       = RET_INTEGER,
-       .arg1_type      = ARG_PTR_TO_SOCKET,
+       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
  };
  
  BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
  
  BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
  {
-       sk = sk_to_full_sk(sk);
-
         if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
                 return (unsigned long)sk;
  
@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
         .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
  };
  
+BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
+{
+       sk = sk_to_full_sk(sk);
+
+       if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+               return (unsigned long)sk;
+
+       return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_get_listener_sock_proto = {
+       .func           = bpf_get_listener_sock,
+       .gpl_only       = false,
+       .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
+       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
+};
+
  BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
  {
         unsigned int iphdr_len;
@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  #ifdef CONFIG_INET
         case BPF_FUNC_tcp_sock:
                 return &bpf_tcp_sock_proto;
+       case BPF_FUNC_get_listener_sock:
+               return &bpf_get_listener_sock_proto;
         case BPF_FUNC_skb_ecn_set_ce:
                 return &bpf_skb_ecn_set_ce_proto;
  #endif
@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_sk_release_proto;
         case BPF_FUNC_tcp_sock:
                 return &bpf_tcp_sock_proto;
+       case BPF_FUNC_get_listener_sock:
+               return &bpf_get_listener_sock_proto;
  #endif
         default:
                 return bpf_base_func_proto(func_id);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c

index 77520ea..989e523 100644 (file)
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
  
  static void xdp_umem_release(struct xdp_umem *umem)
  {
-       struct task_struct *task;
-       struct mm_struct *mm;
-
         xdp_umem_clear_dev(umem);
  
         ida_simple_remove(&umem_ida, umem->id);
@@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem)
  
         xdp_umem_unpin_pages(umem);
  
-       task = get_pid_task(umem->pid, PIDTYPE_PID);
-       put_pid(umem->pid);
-       if (!task)
-               goto out;
-       mm = get_task_mm(task);
-       put_task_struct(task);
-       if (!mm)
-               goto out;
-
-       mmput(mm);
         kfree(umem->pages);
         umem->pages = NULL;
  
         xdp_umem_unaccount_pages(umem);
-out:
         kfree(umem);
  }
  
@@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
         if (size_chk < 0)
                 return -EINVAL;
  
-       umem->pid = get_task_pid(current, PIDTYPE_PID);
         umem->address = (unsigned long)addr;
         umem->chunk_mask = ~((u64)chunk_size - 1);
         umem->size = size;
@@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
  
         err = xdp_umem_account_pages(umem);
         if (err)
-               goto out;
+               return err;
  
         err = xdp_umem_pin_pages(umem);
         if (err)
@@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
  
  out_account:
         xdp_umem_unaccount_pages(umem);
-out:
-       put_pid(umem->pid);
         return err;
  }
  
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 3c38ac9..929c8e5 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
- *     Description
- *             Push an element *value* in *map*. *flags* is one of:
- *
- *             **BPF_EXIST**
- *             If the queue/stack is full, the oldest element is removed to
- *             make room for this.
- *     Return
- *             0 on success, or a negative error in case of failure.
- *
   * int bpf_probe_read(void *dst, u32 size, const void *src)
   *     Description
   *             For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
   * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
   *     Description
   *             Equivalent to bpf_get_socket_cookie() helper that accepts
- *             *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ *             *skb*, but gets socket from **struct bpf_sock_addr** context.
   *     Return
   *             A 8-byte long non-decreasing number.
   *
   * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
   *     Description
   *             Equivalent to bpf_get_socket_cookie() helper that accepts
- *             *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ *             *skb*, but gets socket from **struct bpf_sock_ops** context.
   *     Return
   *             A 8-byte long non-decreasing number.
   *
@@ -2098,52 +2088,52 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * int bpf_rc_repeat(void *ctx)
   *     Description
   *             This helper is used in programs implementing IR decoding, to
- *             report a successfully decoded key press with *scancode*,
- *             *toggle* value in the given *protocol*. The scancode will be
- *             translated to a keycode using the rc keymap, and reported as
- *             an input key down event. After a period a key up event is
- *             generated. This period can be extended by calling either
- *             **bpf_rc_keydown**\ () again with the same values, or calling
- *             **bpf_rc_repeat**\ ().
+ *             report a successfully decoded repeat key message. This delays
+ *             the generation of a key up event for previously generated
+ *             key down event.
   *
- *             Some protocols include a toggle bit, in case the button was
- *             released and pressed again between consecutive scancodes.
+ *             Some IR protocols like NEC have a special IR message for
+ *             repeating last button, for when a button is held down.
   *
   *             The *ctx* should point to the lirc sample as passed into
   *             the program.
   *
- *             The *protocol* is the decoded protocol number (see
- *             **enum rc_proto** for some predefined values).
- *
   *             This helper is only available is the kernel was compiled with
   *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
   *             "**y**".
   *     Return
   *             0
   *
- * int bpf_rc_repeat(void *ctx)
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
   *     Description
   *             This helper is used in programs implementing IR decoding, to
- *             report a successfully decoded repeat key message. This delays
- *             the generation of a key up event for previously generated
- *             key down event.
+ *             report a successfully decoded key press with *scancode*,
+ *             *toggle* value in the given *protocol*. The scancode will be
+ *             translated to a keycode using the rc keymap, and reported as
+ *             an input key down event. After a period a key up event is
+ *             generated. This period can be extended by calling either
+ *             **bpf_rc_keydown**\ () again with the same values, or calling
+ *             **bpf_rc_repeat**\ ().
   *
- *             Some IR protocols like NEC have a special IR message for
- *             repeating last button, for when a button is held down.
+ *             Some protocols include a toggle bit, in case the button was
+ *             released and pressed again between consecutive scancodes.
   *
   *             The *ctx* should point to the lirc sample as passed into
   *             the program.
   *
+ *             The *protocol* is the decoded protocol number (see
+ *             **enum rc_proto** for some predefined values).
+ *
   *             This helper is only available is the kernel was compiled with
   *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
   *             "**y**".
   *     Return
   *             0
   *
- * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
   *     Description
   *             Return the cgroup v2 id of the socket associated with the *skb*.
   *             This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
   *     Return
   *             The id is returned or 0 in case the id could not be retrieved.
   *
- * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
- *     Description
- *             Return id of cgroup v2 that is ancestor of cgroup associated
- *             with the *skb* at the *ancestor_level*.  The root cgroup is at
- *             *ancestor_level* zero and each step down the hierarchy
- *             increments the level. If *ancestor_level* == level of cgroup
- *             associated with *skb*, then return value will be same as that
- *             of **bpf_skb_cgroup_id**\ ().
- *
- *             The helper is useful to implement policies based on cgroups
- *             that are upper in hierarchy than immediate cgroup associated
- *             with *skb*.
- *
- *             The format of returned id and helper limitations are same as in
- *             **bpf_skb_cgroup_id**\ ().
- *     Return
- *             The id is returned or 0 in case the id could not be retrieved.
- *
   * u64 bpf_get_current_cgroup_id(void)
   *     Return
   *             A 64-bit integer containing the current cgroup id based
   *             on the cgroup within which the current task is running.
   *
- * void* get_local_storage(void *map, u64 flags)
+ * void *bpf_get_local_storage(void *map, u64 flags)
   *     Description
   *             Get the pointer to the local storage area.
   *             The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *     Description
+ *             Return id of cgroup v2 that is ancestor of cgroup associated
+ *             with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *             *ancestor_level* zero and each step down the hierarchy
+ *             increments the level. If *ancestor_level* == level of cgroup
+ *             associated with *skb*, then return value will be same as that
+ *             of **bpf_skb_cgroup_id**\ ().
+ *
+ *             The helper is useful to implement policies based on cgroups
+ *             that are upper in hierarchy than immediate cgroup associated
+ *             with *skb*.
+ *
+ *             The format of returned id and helper limitations are same as in
+ *             **bpf_skb_cgroup_id**\ ().
+ *     Return
+ *             The id is returned or 0 in case the id could not be retrieved.
+ *
   * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
   *     Description
   *             Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
   *     Return
   *             0 on success, or a negative error in case of failure.
   *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ *     Description
+ *             Push an element *value* in *map*. *flags* is one of:
+ *
+ *             **BPF_EXIST**
+ *                     If the queue/stack is full, the oldest element is
+ *                     removed to make room for this.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
   * int bpf_map_pop_elem(struct bpf_map *map, void *value)
   *     Description
   *             Pop an element from *map*.
@@ -2343,29 +2343,94 @@ union bpf_attr {
   *     Return
   *             0
   *
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ *     Description
+ *             Acquire a spinlock represented by the pointer *lock*, which is
+ *             stored as part of a value of a map. Taking the lock allows to
+ *             safely update the rest of the fields in that value. The
+ *             spinlock can (and must) later be released with a call to
+ *             **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ *             Spinlocks in BPF programs come with a number of restrictions
+ *             and constraints:
+ *
+ *             * **bpf_spin_lock** objects are only allowed inside maps of
+ *               types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ *               list could be extended in the future).
+ *             * BTF description of the map is mandatory.
+ *             * The BPF program can take ONE lock at a time, since taking two
+ *               or more could cause dead locks.
+ *             * Only one **struct bpf_spin_lock** is allowed per map element.
+ *             * When the lock is taken, calls (either BPF to BPF or helpers)
+ *               are not allowed.
+ *             * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ *               allowed inside a spinlock-ed region.
+ *             * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ *               the lock, on all execution paths, before it returns.
+ *             * The BPF program can access **struct bpf_spin_lock** only via
+ *               the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ *               helpers. Loading or storing data into the **struct
+ *               bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ *             * To use the **bpf_spin_lock**\ () helper, the BTF description
+ *               of the map value must be a struct and have **struct
+ *               bpf_spin_lock** *anyname*\ **;** field at the top level.
+ *               Nested lock inside another struct is not allowed.
+ *             * The **struct bpf_spin_lock** *lock* field in a map value must
+ *               be aligned on a multiple of 4 bytes in that value.
+ *             * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ *               the **bpf_spin_lock** field to user space.
+ *             * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ *               a BPF program, do not update the **bpf_spin_lock** field.
+ *             * **bpf_spin_lock** cannot be on the stack or inside a
+ *               networking packet (it can only be inside of a map values).
+ *             * **bpf_spin_lock** is available to root only.
+ *             * Tracing programs and socket filter programs cannot use
+ *               **bpf_spin_lock**\ () due to insufficient preemption checks
+ *               (but this may change in the future).
+ *             * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ *     Return
+ *             0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ *     Description
+ *             Release the *lock* previously locked by a call to
+ *             **bpf_spin_lock**\ (\ *lock*\ ).
+ *     Return
+ *             0
+ *
   * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
   *     Description
   *             This helper gets a **struct bpf_sock** pointer such
- *             that all the fields in bpf_sock can be accessed.
+ *             that all the fields in this **bpf_sock** can be accessed.
   *     Return
- *             A **struct bpf_sock** pointer on success, or NULL in
+ *             A **struct bpf_sock** pointer on success, or **NULL** in
   *             case of failure.
   *
   * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
   *     Description
   *             This helper gets a **struct bpf_tcp_sock** pointer from a
   *             **struct bpf_sock** pointer.
- *
   *     Return
- *             A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *             A **struct bpf_tcp_sock** pointer on success, or **NULL** in
   *             case of failure.
   *
   * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
- *     Description
- *             Sets ECN of IP header to ce (congestion encountered) if
- *             current value is ect (ECN capable). Works with IPv6 and IPv4.
- *     Return
- *             1 if set, 0 if not set.
+ *     Description
+ *             Set ECN (Explicit Congestion Notification) field of IP header
+ *             to **CE** (Congestion Encountered) if current value is **ECT**
+ *             (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ *             and IPv4.
+ *     Return
+ *             1 if the **CE** flag is set (either by the current helper call
+ *             or because it was already present), 0 if it is not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ *     Description
+ *             Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ *             **bpf_sk_release**\ () is unnecessary and not allowed.
+ *     Return
+ *             A **struct bpf_sock** pointer on success, or **NULL** in
+ *             case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -2465,7 +2530,8 @@ union bpf_attr {
         FN(spin_unlock),                \
         FN(sk_fullsock),                \
         FN(tcp_sock),                   \
-       FN(skb_ecn_set_ce),
+       FN(skb_ecn_set_ce),             \
+       FN(get_listener_sock),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c

index 1b8d8cd..87e3020 100644 (file)
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
  /* Calculate type signature hash of ENUM. */
  static __u32 btf_hash_enum(struct btf_type *t)
  {
-       struct btf_enum *member = (struct btf_enum *)(t + 1);
-       __u32 vlen = BTF_INFO_VLEN(t->info);
-       __u32 h = btf_hash_common(t);
-       int i;
+       __u32 h;
  
-       for (i = 0; i < vlen; i++) {
-               h = hash_combine(h, member->name_off);
-               h = hash_combine(h, member->val);
-               member++;
-       }
+       /* don't hash vlen and enum members to support enum fwd resolving */
+       h = hash_combine(0, t->name_off);
+       h = hash_combine(h, t->info & ~0xffff);
+       h = hash_combine(h, t->size);
         return h;
  }
  
@@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
         return true;
  }
  
+static inline bool btf_is_enum_fwd(struct btf_type *t)
+{
+       return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
+              BTF_INFO_VLEN(t->info) == 0;
+}
+
+static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
+{
+       if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+               return btf_equal_enum(t1, t2);
+       /* ignore vlen when comparing */
+       return t1->name_off == t2->name_off &&
+              (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+              t1->size == t2->size;
+}
+
  /*
   * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
   * as referenced type IDs equivalence is established separately during type
@@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
                                 new_id = cand_node->type_id;
                                 break;
                         }
+                       if (d->opts.dont_resolve_fwds)
+                               continue;
+                       if (btf_compat_enum(t, cand)) {
+                               if (btf_is_enum_fwd(t)) {
+                                       /* resolve fwd to full enum */
+                                       new_id = cand_node->type_id;
+                                       break;
+                               }
+                               /* resolve canonical enum fwd to full enum */
+                               d->map[cand_node->type_id] = type_id;
+                       }
                 }
                 break;
  
@@ -2084,15 +2107,15 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
                 return fwd_kind == real_kind;
         }
  
-       if (cand_type->info != canon_type->info)
-               return 0;
-
         switch (cand_kind) {
         case BTF_KIND_INT:
                 return btf_equal_int(cand_type, canon_type);
  
         case BTF_KIND_ENUM:
-               return btf_equal_enum(cand_type, canon_type);
+               if (d->opts.dont_resolve_fwds)
+                       return btf_equal_enum(cand_type, canon_type);
+               else
+                       return btf_compat_enum(cand_type, canon_type);
  
         case BTF_KIND_FWD:
                 return btf_equal_common(cand_type, canon_type);
@@ -2103,6 +2126,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
         case BTF_KIND_PTR:
         case BTF_KIND_TYPEDEF:
         case BTF_KIND_FUNC:
+               if (cand_type->info != canon_type->info)
+                       return 0;
                 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
  
         case BTF_KIND_ARRAY: {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index d5b830d..5e977d2 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -835,12 +835,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
                         obj->efile.maps_shndx = idx;
                 else if (strcmp(name, BTF_ELF_SEC) == 0) {
                         obj->btf = btf__new(data->d_buf, data->d_size);
-                       if (IS_ERR(obj->btf) || btf__load(obj->btf)) {
+                       if (IS_ERR(obj->btf)) {
                                 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
                                            BTF_ELF_SEC, PTR_ERR(obj->btf));
-                               if (!IS_ERR(obj->btf))
-                                       btf__free(obj->btf);
                                 obj->btf = NULL;
+                               continue;
+                       }
+                       err = btf__load(obj->btf);
+                       if (err) {
+                               pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n",
+                                          BTF_ELF_SEC, err);
+                               btf__free(obj->btf);
+                               obj->btf = NULL;
+                               err = 0;
                         }
                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
                         btf_ext_data = data;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c

index f98ac82..8d0078b 100644 (file)
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -126,8 +126,8 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
         cfg->frame_headroom = usr_cfg->frame_headroom;
  }
  
-static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
-                                     const struct xsk_socket_config *usr_cfg)
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+                                    const struct xsk_socket_config *usr_cfg)
  {
         if (!usr_cfg) {
                 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
@@ -135,14 +135,19 @@ static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
                 cfg->libbpf_flags = 0;
                 cfg->xdp_flags = 0;
                 cfg->bind_flags = 0;
-               return;
+               return 0;
         }
  
+       if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+               return -EINVAL;
+
         cfg->rx_size = usr_cfg->rx_size;
         cfg->tx_size = usr_cfg->tx_size;
         cfg->libbpf_flags = usr_cfg->libbpf_flags;
         cfg->xdp_flags = usr_cfg->xdp_flags;
         cfg->bind_flags = usr_cfg->bind_flags;
+
+       return 0;
  }
  
  int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
@@ -557,7 +562,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
         }
         strncpy(xsk->ifname, ifname, IFNAMSIZ);
  
-       xsk_set_xdp_socket_config(&xsk->config, usr_config);
+       err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+       if (err)
+               goto out_socket;
  
         if (rx) {
                 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h

index c9433a4..c81fc35 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
         (void *) BPF_FUNC_sk_fullsock;
  static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
         (void *) BPF_FUNC_tcp_sock;
+static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
+       (void *) BPF_FUNC_get_listener_sock;
  static int (*bpf_skb_ecn_set_ce)(void *ctx) =
         (void *) BPF_FUNC_skb_ecn_set_ce;
  
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c

index 90f8a20..ee99368 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -37,7 +37,7 @@ void test_map_lock(void)
         const char *file = "./test_map_lock.o";
         int prog_fd, map_fd[2], vars[17] = {};
         pthread_t thread_id[6];
-       struct bpf_object *obj;
+       struct bpf_object *obj = NULL;
         int err = 0, key = 0, i;
         void *ret;
  
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c

index 9a573a9..114ebe6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -5,7 +5,7 @@ void test_spinlock(void)
  {
         const char *file = "./test_spin_lock.o";
         pthread_t thread_id[4];
-       struct bpf_object *obj;
+       struct bpf_object *obj = NULL;
         int prog_fd;
         int err = 0, i;
         void *ret;
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c

index de1a43e..37328f1 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -8,38 +8,51 @@
  #include "bpf_helpers.h"
  #include "bpf_endian.h"
  
-enum bpf_array_idx {
-       SRV_IDX,
-       CLI_IDX,
-       __NR_BPF_ARRAY_IDX,
+enum bpf_addr_array_idx {
+       ADDR_SRV_IDX,
+       ADDR_CLI_IDX,
+       __NR_BPF_ADDR_ARRAY_IDX,
+};
+
+enum bpf_result_array_idx {
+       EGRESS_SRV_IDX,
+       EGRESS_CLI_IDX,
+       INGRESS_LISTEN_IDX,
+       __NR_BPF_RESULT_ARRAY_IDX,
+};
+
+enum bpf_linum_array_idx {
+       EGRESS_LINUM_IDX,
+       INGRESS_LINUM_IDX,
+       __NR_BPF_LINUM_ARRAY_IDX,
  };
  
  struct bpf_map_def SEC("maps") addr_map = {
         .type = BPF_MAP_TYPE_ARRAY,
         .key_size = sizeof(__u32),
         .value_size = sizeof(struct sockaddr_in6),
-       .max_entries = __NR_BPF_ARRAY_IDX,
+       .max_entries = __NR_BPF_ADDR_ARRAY_IDX,
  };
  
  struct bpf_map_def SEC("maps") sock_result_map = {
         .type = BPF_MAP_TYPE_ARRAY,
         .key_size = sizeof(__u32),
         .value_size = sizeof(struct bpf_sock),
-       .max_entries = __NR_BPF_ARRAY_IDX,
+       .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
  };
  
  struct bpf_map_def SEC("maps") tcp_sock_result_map = {
         .type = BPF_MAP_TYPE_ARRAY,
         .key_size = sizeof(__u32),
         .value_size = sizeof(struct bpf_tcp_sock),
-       .max_entries = __NR_BPF_ARRAY_IDX,
+       .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
  };
  
  struct bpf_map_def SEC("maps") linum_map = {
         .type = BPF_MAP_TYPE_ARRAY,
         .key_size = sizeof(__u32),
         .value_size = sizeof(__u32),
-       .max_entries = 1,
+       .max_entries = __NR_BPF_LINUM_ARRAY_IDX,
  };
  
  static bool is_loopback6(__u32 *a6)
@@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst,
  
  #define RETURN {                                               \
         linum = __LINE__;                                       \
-       bpf_map_update_elem(&linum_map, &idx0, &linum, 0);      \
+       bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
         return 1;                                               \
  }
  
  SEC("cgroup_skb/egress")
-int read_sock_fields(struct __sk_buff *skb)
+int egress_read_sock_fields(struct __sk_buff *skb)
  {
-       __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
+       __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
         struct sockaddr_in6 *srv_sa6, *cli_sa6;
         struct bpf_tcp_sock *tp, *tp_ret;
         struct bpf_sock *sk, *sk_ret;
-       __u32 linum, idx0 = 0;
+       __u32 linum, linum_idx;
+
+       linum_idx = EGRESS_LINUM_IDX;
  
         sk = skb->sk;
         if (!sk || sk->state == 10)
@@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb)
                 RETURN;
  
         if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
-               idx = srv_idx;
+               result_idx = EGRESS_SRV_IDX;
         else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
-               idx = cli_idx;
+               result_idx = EGRESS_CLI_IDX;
         else
                 RETURN;
  
-       sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
-       tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
+       sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
+       tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
+       if (!sk_ret || !tp_ret)
+               RETURN;
+
+       skcpy(sk_ret, sk);
+       tpcpy(tp_ret, tp);
+
+       RETURN;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_read_sock_fields(struct __sk_buff *skb)
+{
+       __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
+       struct bpf_tcp_sock *tp, *tp_ret;
+       struct bpf_sock *sk, *sk_ret;
+       struct sockaddr_in6 *srv_sa6;
+       __u32 linum, linum_idx;
+
+       linum_idx = INGRESS_LINUM_IDX;
+
+       sk = skb->sk;
+       if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
+               RETURN;
+
+       srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
+       if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
+               RETURN;
+
+       if (sk->state != 10 && sk->state != 12)
+               RETURN;
+
+       sk = bpf_get_listener_sock(sk);
+       if (!sk)
+               RETURN;
+
+       tp = bpf_tcp_sock(sk);
+       if (!tp)
+               RETURN;
+
+       sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
+       tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
         if (!sk_ret || !tp_ret)
                 RETURN;
  
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c

index 38797aa..23e3b31 100644 (file)
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5874,6 +5874,50 @@ const struct btf_dedup_test dedup_tests[] = {
                 .dont_resolve_fwds = false,
         },
  },
+{
+       .descr = "dedup: enum fwd resolution",
+       .input = {
+               .raw_types = {
+                       /* [1] fwd enum 'e1' before full enum */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+                       /* [2] full enum 'e1' after fwd */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+                               BTF_ENUM_ENC(NAME_NTH(2), 123),
+                       /* [3] full enum 'e2' before fwd */
+                       BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+                               BTF_ENUM_ENC(NAME_NTH(4), 456),
+                       /* [4] fwd enum 'e2' after full enum */
+                       BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+                       /* [5] incompatible fwd enum with different size */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
+                       /* [6] incompatible full enum with different value */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+                               BTF_ENUM_ENC(NAME_NTH(2), 321),
+                       BTF_END_RAW,
+               },
+               BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+       },
+       .expect = {
+               .raw_types = {
+                       /* [1] full enum 'e1' */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+                               BTF_ENUM_ENC(NAME_NTH(2), 123),
+                       /* [2] full enum 'e2' */
+                       BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+                               BTF_ENUM_ENC(NAME_NTH(4), 456),
+                       /* [3] incompatible fwd enum with different size */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
+                       /* [4] incompatible full enum with different value */
+                       BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+                               BTF_ENUM_ENC(NAME_NTH(2), 321),
+                       BTF_END_RAW,
+               },
+               BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+       },
+       .opts = {
+               .dont_resolve_fwds = false,
+       },
+},
  
  };
  
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c

index bc89439..dcae7f6 100644 (file)
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -16,10 +16,23 @@
  #include "cgroup_helpers.h"
  #include "bpf_rlimit.h"
  
-enum bpf_array_idx {
-       SRV_IDX,
-       CLI_IDX,
-       __NR_BPF_ARRAY_IDX,
+enum bpf_addr_array_idx {
+       ADDR_SRV_IDX,
+       ADDR_CLI_IDX,
+       __NR_BPF_ADDR_ARRAY_IDX,
+};
+
+enum bpf_result_array_idx {
+       EGRESS_SRV_IDX,
+       EGRESS_CLI_IDX,
+       INGRESS_LISTEN_IDX,
+       __NR_BPF_RESULT_ARRAY_IDX,
+};
+
+enum bpf_linum_array_idx {
+       EGRESS_LINUM_IDX,
+       INGRESS_LINUM_IDX,
+       __NR_BPF_LINUM_ARRAY_IDX,
  };
  
  #define CHECK(condition, tag, format...) ({                            \
@@ -41,8 +54,16 @@ static int linum_map_fd;
  static int addr_map_fd;
  static int tp_map_fd;
  static int sk_map_fd;
-static __u32 srv_idx = SRV_IDX;
-static __u32 cli_idx = CLI_IDX;
+
+static __u32 addr_srv_idx = ADDR_SRV_IDX;
+static __u32 addr_cli_idx = ADDR_CLI_IDX;
+
+static __u32 egress_srv_idx = EGRESS_SRV_IDX;
+static __u32 egress_cli_idx = EGRESS_CLI_IDX;
+static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
+
+static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
  
  static void init_loopback6(struct sockaddr_in6 *sa6)
  {
@@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp)
  
  static void check_result(void)
  {
-       struct bpf_tcp_sock srv_tp, cli_tp;
-       struct bpf_sock srv_sk, cli_sk;
-       __u32 linum, idx0 = 0;
+       struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+       struct bpf_sock srv_sk, cli_sk, listen_sk;
+       __u32 ingress_linum, egress_linum;
         int err;
  
-       err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum);
+       err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+                                 &egress_linum);
         CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
               "err:%d errno:%d", err, errno);
  
-       err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk);
-       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)",
+       err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+                                 &ingress_linum);
+       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+             "err:%d errno:%d", err, errno);
+
+       err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
+       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
+             "err:%d errno:%d", err, errno);
+       err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
+       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
+             "err:%d errno:%d", err, errno);
+
+       err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
+       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
               "err:%d errno:%d", err, errno);
-       err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp);
-       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)",
+       err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
+       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
               "err:%d errno:%d", err, errno);
  
-       err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk);
-       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)",
+       err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
+       CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
               "err:%d errno:%d", err, errno);
-       err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp);
-       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)",
+       err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
+       CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
               "err:%d errno:%d", err, errno);
  
+       printf("listen_sk: ");
+       print_sk(&listen_sk);
+       printf("\n");
+
         printf("srv_sk: ");
         print_sk(&srv_sk);
         printf("\n");
@@ -124,6 +162,10 @@ static void check_result(void)
         print_sk(&cli_sk);
         printf("\n");
  
+       printf("listen_tp: ");
+       print_tp(&listen_tp);
+       printf("\n");
+
         printf("srv_tp: ");
         print_tp(&srv_tp);
         printf("\n");
@@ -132,6 +174,19 @@ static void check_result(void)
         print_tp(&cli_tp);
         printf("\n");
  
+       CHECK(listen_sk.state != 10 ||
+             listen_sk.family != AF_INET6 ||
+             listen_sk.protocol != IPPROTO_TCP ||
+             memcmp(listen_sk.src_ip6, &in6addr_loopback,
+                    sizeof(listen_sk.src_ip6)) ||
+             listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+             listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+             listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+             listen_sk.dst_port,
+             "Unexpected listen_sk",
+             "Check listen_sk output. ingress_linum:%u",
+             ingress_linum);
+
         CHECK(srv_sk.state == 10 ||
               !srv_sk.state ||
               srv_sk.family != AF_INET6 ||
@@ -142,7 +197,8 @@ static void check_result(void)
                      sizeof(srv_sk.dst_ip6)) ||
               srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
               srv_sk.dst_port != cli_sa6.sin6_port,
-             "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum);
+             "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
+             egress_linum);
  
         CHECK(cli_sk.state == 10 ||
               !cli_sk.state ||
@@ -154,21 +210,31 @@ static void check_result(void)
                      sizeof(cli_sk.dst_ip6)) ||
               cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
               cli_sk.dst_port != srv_sa6.sin6_port,
-             "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum);
+             "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
+             egress_linum);
+
+       CHECK(listen_tp.data_segs_out ||
+             listen_tp.data_segs_in ||
+             listen_tp.total_retrans ||
+             listen_tp.bytes_acked,
+             "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
+             ingress_linum);
  
         CHECK(srv_tp.data_segs_out != 1 ||
               srv_tp.data_segs_in ||
               srv_tp.snd_cwnd != 10 ||
               srv_tp.total_retrans ||
               srv_tp.bytes_acked != DATA_LEN,
-             "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum);
+             "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
+             egress_linum);
  
         CHECK(cli_tp.data_segs_out ||
               cli_tp.data_segs_in != 1 ||
               cli_tp.snd_cwnd != 10 ||
               cli_tp.total_retrans ||
               cli_tp.bytes_received != DATA_LEN,
-             "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum);
+             "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
+             egress_linum);
  }
  
  static void test(void)
@@ -211,10 +277,10 @@ static void test(void)
               err, errno);
  
         /* Update addr_map with srv_sa6 and cli_sa6 */
-       err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0);
+       err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
         CHECK(err, "map_update", "err:%d errno:%d", err, errno);
  
-       err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0);
+       err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
         CHECK(err, "map_update", "err:%d errno:%d", err, errno);
  
         /* Connect from cli_sa6 to srv_sa6 */
@@ -273,9 +339,9 @@ int main(int argc, char **argv)
         struct bpf_prog_load_attr attr = {
                 .file = "test_sock_fields_kern.o",
                 .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-               .expected_attach_type = BPF_CGROUP_INET_EGRESS,
         };
-       int cgroup_fd, prog_fd, err;
+       int cgroup_fd, egress_fd, ingress_fd, err;
+       struct bpf_program *ingress_prog;
         struct bpf_object *obj;
         struct bpf_map *map;
  
@@ -293,12 +359,24 @@ int main(int argc, char **argv)
         err = join_cgroup(TEST_CGROUP);
         CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
  
-       err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+       err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
         CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
  
-       err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
+       ingress_prog = bpf_object__find_program_by_title(obj,
+                                                        "cgroup_skb/ingress");
+       CHECK(!ingress_prog,
+             "bpf_object__find_program_by_title(cgroup_skb/ingress)",
+             "not found");
+       ingress_fd = bpf_program__fd(ingress_prog);
+
+       err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
         CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
               "err:%d errno%d", err, errno);
+
+       err = bpf_prog_attach(ingress_fd, cgroup_fd,
+                             BPF_CGROUP_INET_INGRESS, 0);
+       CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
+             "err:%d errno%d", err, errno);
         close(cgroup_fd);
  
         map = bpf_object__find_map_by_name(obj, "addr_map");
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c

index 3ed3593..923f211 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -605,3 +605,171 @@
         .prog_type = BPF_PROG_TYPE_SCHED_CLS,
         .result = ACCEPT,
  },
+{
+       "reference tracking: use ptr from bpf_tcp_sock() after release",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+},
+{
+       "reference tracking: use ptr from bpf_sk_fullsock() after release",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+},
+{
+       "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+},
+{
+       "reference tracking: use sk after bpf_sk_release(tp)",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+},
+{
+       "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = ACCEPT,
+},
+{
+       "reference tracking: bpf_sk_release(listen_sk)",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "reference has not been acquired before",
+},
+{
+       /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
+       "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
+       .insns = {
+       BPF_SK_LOOKUP,
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_sk_release),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c

index 0ddfdf7..4164362 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -342,7 +342,7 @@
         },
         .prog_type = BPF_PROG_TYPE_SCHED_CLS,
         .result = REJECT,
-       .errstr = "type=sock_common expected=sock",
+       .errstr = "reference has not been acquired before",
  },
  {
         "bpf_sk_release(bpf_sk_fullsock(skb->sk))",
@@ -380,5 +380,5 @@
         },
         .prog_type = BPF_PROG_TYPE_SCHED_CLS,
         .result = REJECT,
-       .errstr = "type=tcp_sock expected=sock",
+       .errstr = "reference has not been acquired before",
  },
author	David S. Miller <davem@davemloft.net>
	Sat, 16 Mar 2019 19:20:08 +0000 (12:20 -0700)
committer	David S. Miller <davem@davemloft.net>
	Sat, 16 Mar 2019 19:20:08 +0000 (12:20 -0700)
arch/powerpc/include/asm/ppc-opcode.h		patch \| blob \| history
arch/powerpc/net/bpf_jit.h		patch \| blob \| history
arch/powerpc/net/bpf_jit32.h		patch \| blob \| history
arch/powerpc/net/bpf_jit64.h		patch \| blob \| history
arch/powerpc/net/bpf_jit_comp64.c		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/linux/bpf_verifier.h		patch \| blob \| history
include/net/xdp_sock.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/xdp/xdp_umem.c		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/lib/bpf/btf.c		patch \| blob \| history
tools/lib/bpf/libbpf.c		patch \| blob \| history
tools/lib/bpf/xsk.c		patch \| blob \| history
tools/testing/selftests/bpf/bpf_helpers.h		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/map_lock.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/spinlock.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_sock_fields_kern.c		patch \| blob \| history
tools/testing/selftests/bpf/test_btf.c		patch \| blob \| history
tools/testing/selftests/bpf/test_sock_fields.c		patch \| blob \| history
tools/testing/selftests/bpf/verifier/ref_tracking.c		patch \| blob \| history
tools/testing/selftests/bpf/verifier/sock.c		patch \| blob \| history