selftests, bpf: Extend test_tc_tunnel test with vxlan
authorXuesen Huang <huangxuesen@kuaishou.com>
Fri, 5 Mar 2021 12:33:47 +0000 (20:33 +0800)
committerDaniel Borkmann <daniel@iogearbox.net>
Fri, 5 Mar 2021 22:58:59 +0000 (23:58 +0100)
Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which
encapsulates the ethernet as the inner l2 header.

Update a vxlan encapsulation test case.

Signed-off-by: Xuesen Huang <huangxuesen@kuaishou.com>
Signed-off-by: Li Wang <wangli09@kuaishou.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/20210305123347.15311-1-hxseverything@gmail.com
tools/testing/selftests/bpf/progs/test_tc_tunnel.c
tools/testing/selftests/bpf/test_tc_tunnel.sh

index 37bce7a..84cd632 100644 (file)
@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
 
 static const int cfg_udp_src = 20000;
 
+#define        L2_PAD_SZ       (sizeof(struct vxlanhdr) + ETH_HLEN)
+
 #define        UDP_PORT                5555
 #define        MPLS_OVER_UDP_PORT      6635
 #define        ETH_OVER_UDP_PORT       7777
+#define        VXLAN_UDP_PORT          8472
+
+#define        EXTPROTO_VXLAN  0x1
+
+#define        VXLAN_N_VID     (1u << 24)
+#define        VXLAN_VNI_MASK  bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define        VXLAN_FLAGS     0x8
+#define        VXLAN_VNI       1
 
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
                                                     MPLS_LS_S_MASK | 0xff);
 
+struct vxlanhdr {
+       __be32 vx_flags;
+       __be32 vx_vni;
+} __attribute__((packed));
+
 struct gre_hdr {
        __be16 flags;
        __be16 protocol;
@@ -45,13 +60,13 @@ union l4hdr {
 struct v4hdr {
        struct iphdr ip;
        union l4hdr l4hdr;
-       __u8 pad[16];                   /* enough space for L2 header */
+       __u8 pad[L2_PAD_SZ];            /* space for L2 header / vxlan header ... */
 } __attribute__((packed));
 
 struct v6hdr {
        struct ipv6hdr ip;
        union l4hdr l4hdr;
-       __u8 pad[16];                   /* enough space for L2 header */
+       __u8 pad[L2_PAD_SZ];            /* space for L2 header / vxlan header ... */
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
        iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
-                                     __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+                                       __u16 l2_proto, __u16 ext_proto)
 {
        __u16 udp_dst = UDP_PORT;
        struct iphdr iph_inner;
        struct v4hdr h_outer;
        struct tcphdr tcph;
        int olen, l2_len;
+       __u8 *l2_hdr = NULL;
        int tcp_off;
        __u64 flags;
 
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
                break;
        case ETH_P_TEB:
                l2_len = ETH_HLEN;
-               udp_dst = ETH_OVER_UDP_PORT;
+               if (ext_proto & EXTPROTO_VXLAN) {
+                       udp_dst = VXLAN_UDP_PORT;
+                       l2_len += sizeof(struct vxlanhdr);
+               } else
+                       udp_dst = ETH_OVER_UDP_PORT;
                break;
        }
        flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
        }
 
        /* add L2 encap (if specified) */
+       l2_hdr = (__u8 *)&h_outer + olen;
        switch (l2_proto) {
        case ETH_P_MPLS_UC:
-               *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+               *(__u32 *)l2_hdr = mpls_label;
                break;
        case ETH_P_TEB:
-               if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
-                                      ETH_HLEN))
+               flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+               if (ext_proto & EXTPROTO_VXLAN) {
+                       struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+                       vxlan_hdr->vx_flags = VXLAN_FLAGS;
+                       vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+                       l2_hdr += sizeof(struct vxlanhdr);
+               }
+
+               if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
                        return TC_ACT_SHOT;
+
                break;
        }
        olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
        return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
                                      __u16 l2_proto)
+{
+       return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+                                       __u16 l2_proto, __u16 ext_proto)
 {
        __u16 udp_dst = UDP_PORT;
        struct ipv6hdr iph_inner;
        struct v6hdr h_outer;
        struct tcphdr tcph;
        int olen, l2_len;
+       __u8 *l2_hdr = NULL;
        __u16 tot_len;
        __u64 flags;
 
@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
                break;
        case ETH_P_TEB:
                l2_len = ETH_HLEN;
-               udp_dst = ETH_OVER_UDP_PORT;
+               if (ext_proto & EXTPROTO_VXLAN) {
+                       udp_dst = VXLAN_UDP_PORT;
+                       l2_len += sizeof(struct vxlanhdr);
+               } else
+                       udp_dst = ETH_OVER_UDP_PORT;
                break;
        }
        flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
                h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
                h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
                tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
-                         sizeof(h_outer.l4hdr.udp);
+                         sizeof(h_outer.l4hdr.udp) + l2_len;
                h_outer.l4hdr.udp.check = 0;
                h_outer.l4hdr.udp.len = bpf_htons(tot_len);
                break;
@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
        }
 
        /* add L2 encap (if specified) */
+       l2_hdr = (__u8 *)&h_outer + olen;
        switch (l2_proto) {
        case ETH_P_MPLS_UC:
-               *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+               *(__u32 *)l2_hdr = mpls_label;
                break;
        case ETH_P_TEB:
-               if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
-                                      ETH_HLEN))
+               flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+               if (ext_proto & EXTPROTO_VXLAN) {
+                       struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+                       vxlan_hdr->vx_flags = VXLAN_FLAGS;
+                       vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+                       l2_hdr += sizeof(struct vxlanhdr);
+               }
+
+               if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
                        return TC_ACT_SHOT;
                break;
        }
@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
        return TC_ACT_OK;
 }
 
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+                                     __u16 l2_proto)
+{
+       return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
 SEC("encap_ipip_none")
 int __encap_ipip_none(struct __sk_buff *skb)
 {
@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
                return TC_ACT_OK;
 }
 
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+               return __encap_ipv4(skb, IPPROTO_UDP,
+                                   ETH_P_TEB,
+                                   EXTPROTO_VXLAN);
+       else
+               return TC_ACT_OK;
+}
+
 SEC("encap_sit_none")
 int __encap_sit_none(struct __sk_buff *skb)
 {
@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
                return TC_ACT_OK;
 }
 
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+       if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+               return __encap_ipv6(skb, IPPROTO_UDP,
+                                   ETH_P_TEB,
+                                   EXTPROTO_VXLAN);
+       else
+               return TC_ACT_OK;
+}
+
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
        char buf[sizeof(struct v6hdr)];
@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
                case ETH_OVER_UDP_PORT:
                        olen += ETH_HLEN;
                        break;
+               case VXLAN_UDP_PORT:
+                       olen += ETH_HLEN + sizeof(struct vxlanhdr);
+                       break;
                }
                break;
        default:
index 7c76b84..c9dde9b 100755 (executable)
@@ -44,8 +44,8 @@ setup() {
        # clamp route to reserve room for tunnel headers
        ip -netns "${ns1}" -4 route flush table main
        ip -netns "${ns1}" -6 route flush table main
-       ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
-       ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
+       ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
+       ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
 
        sleep 1
 
@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then
        echo "sit"
        $0 ipv6 sit none 100
 
+       echo "ip4 vxlan"
+       $0 ipv4 vxlan eth 2000
+
+       echo "ip6 vxlan"
+       $0 ipv6 ip6vxlan eth 2000
+
        for mac in none mpls eth ; do
                echo "ip gre $mac"
                $0 ipv4 gre $mac 100
@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then
        targs="encap fou encap-sport auto encap-dport $dport"
 elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
        ttype=$gretaptype
+elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
+       ttype="vxlan"
+       targs="id 1 dstport 8472 udp6zerocsumrx"
 else
        ttype=$tuntype
        targs=""
@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
 elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
        # No support for TEB fou tunnel; expect failure.
        expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
        # Share ethernet address between tunnel/veth2 so L2 decap works.
        ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
                  awk '/ether/ { print $2 }')