1 /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
6 #include "xdp_sample.bpf.h"
7 #include "xdp_sample_shared.h"
8 #include "hash_func01.h"
10 /* Special map type that can XDP_REDIRECT frames to another CPU */
12 __uint(type, BPF_MAP_TYPE_CPUMAP);
13 __uint(key_size, sizeof(u32));
14 __uint(value_size, sizeof(struct bpf_cpumap_val));
15 } cpu_map SEC(".maps");
17 /* Set of maps controlling available CPU, and for iterating through
18 * selectable redirect CPUs.
21 __uint(type, BPF_MAP_TYPE_ARRAY);
24 } cpus_available SEC(".maps");
27 __uint(type, BPF_MAP_TYPE_ARRAY);
30 __uint(max_entries, 1);
31 } cpus_count SEC(".maps");
34 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
37 __uint(max_entries, 1);
38 } cpus_iterator SEC(".maps");
41 __uint(type, BPF_MAP_TYPE_DEVMAP);
42 __uint(key_size, sizeof(int));
43 __uint(value_size, sizeof(struct bpf_devmap_val));
44 __uint(max_entries, 1);
45 } tx_port SEC(".maps");
47 char tx_mac_addr[ETH_ALEN];
49 /* Helper parse functions */
51 static __always_inline
52 bool parse_eth(struct ethhdr *eth, void *data_end,
53 u16 *eth_proto, u64 *l3_offset)
58 offset = sizeof(*eth);
59 if ((void *)eth + offset > data_end)
62 eth_type = eth->h_proto;
64 /* Skip non 802.3 Ethertypes */
65 if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
68 /* Handle VLAN tagged packet */
69 if (eth_type == bpf_htons(ETH_P_8021Q) ||
70 eth_type == bpf_htons(ETH_P_8021AD)) {
71 struct vlan_hdr *vlan_hdr;
73 vlan_hdr = (void *)eth + offset;
74 offset += sizeof(*vlan_hdr);
75 if ((void *)eth + offset > data_end)
77 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
79 /* Handle double VLAN tagged packet */
80 if (eth_type == bpf_htons(ETH_P_8021Q) ||
81 eth_type == bpf_htons(ETH_P_8021AD)) {
82 struct vlan_hdr *vlan_hdr;
84 vlan_hdr = (void *)eth + offset;
85 offset += sizeof(*vlan_hdr);
86 if ((void *)eth + offset > data_end)
88 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
91 *eth_proto = bpf_ntohs(eth_type);
96 static __always_inline
97 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
99 void *data_end = (void *)(long)ctx->data_end;
100 void *data = (void *)(long)ctx->data;
101 struct iphdr *iph = data + nh_off;
105 if (iph + 1 > data_end)
107 if (!(iph->protocol == IPPROTO_UDP))
110 udph = (void *)(iph + 1);
111 if (udph + 1 > data_end)
114 dport = bpf_ntohs(udph->dest);
118 static __always_inline
119 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
121 void *data_end = (void *)(long)ctx->data_end;
122 void *data = (void *)(long)ctx->data;
123 struct iphdr *iph = data + nh_off;
125 if (iph + 1 > data_end)
127 return iph->protocol;
130 static __always_inline
131 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
133 void *data_end = (void *)(long)ctx->data_end;
134 void *data = (void *)(long)ctx->data;
135 struct ipv6hdr *ip6h = data + nh_off;
137 if (ip6h + 1 > data_end)
139 return ip6h->nexthdr;
143 int xdp_prognum0_no_touch(struct xdp_md *ctx)
145 u32 key = bpf_get_smp_processor_id();
151 /* Only use first entry in cpus_available */
152 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
155 cpu_dest = *cpu_selected;
157 rec = bpf_map_lookup_elem(&rx_cnt, &key);
160 NO_TEAR_INC(rec->processed);
162 if (cpu_dest >= nr_cpus) {
163 NO_TEAR_INC(rec->issue);
166 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
170 int xdp_prognum1_touch_data(struct xdp_md *ctx)
172 void *data_end = (void *)(long)ctx->data_end;
173 void *data = (void *)(long)ctx->data;
174 u32 key = bpf_get_smp_processor_id();
175 struct ethhdr *eth = data;
182 /* Only use first entry in cpus_available */
183 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
186 cpu_dest = *cpu_selected;
188 /* Validate packet length is minimum Eth header size */
189 if (eth + 1 > data_end)
192 rec = bpf_map_lookup_elem(&rx_cnt, &key);
195 NO_TEAR_INC(rec->processed);
197 /* Read packet data, and use it (drop non 802.3 Ethertypes) */
198 eth_type = eth->h_proto;
199 if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
200 NO_TEAR_INC(rec->dropped);
204 if (cpu_dest >= nr_cpus) {
205 NO_TEAR_INC(rec->issue);
208 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
212 int xdp_prognum2_round_robin(struct xdp_md *ctx)
214 void *data_end = (void *)(long)ctx->data_end;
215 void *data = (void *)(long)ctx->data;
216 u32 key = bpf_get_smp_processor_id();
226 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
230 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
233 cpu_idx = *cpu_iterator;
236 if (*cpu_iterator == *cpu_max)
239 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
242 cpu_dest = *cpu_selected;
244 rec = bpf_map_lookup_elem(&rx_cnt, &key);
247 NO_TEAR_INC(rec->processed);
249 if (cpu_dest >= nr_cpus) {
250 NO_TEAR_INC(rec->issue);
253 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
257 int xdp_prognum3_proto_separate(struct xdp_md *ctx)
259 void *data_end = (void *)(long)ctx->data_end;
260 void *data = (void *)(long)ctx->data;
261 u32 key = bpf_get_smp_processor_id();
262 struct ethhdr *eth = data;
263 u8 ip_proto = IPPROTO_UDP;
271 rec = bpf_map_lookup_elem(&rx_cnt, &key);
274 NO_TEAR_INC(rec->processed);
276 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
277 return XDP_PASS; /* Just skip */
279 /* Extract L4 protocol */
282 ip_proto = get_proto_ipv4(ctx, l3_offset);
285 ip_proto = get_proto_ipv6(ctx, l3_offset);
288 cpu_idx = 0; /* ARP packet handled on separate CPU */
294 /* Choose CPU based on L4 protocol */
310 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
313 cpu_dest = *cpu_lookup;
315 if (cpu_dest >= nr_cpus) {
316 NO_TEAR_INC(rec->issue);
319 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
323 int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
325 void *data_end = (void *)(long)ctx->data_end;
326 void *data = (void *)(long)ctx->data;
327 u32 key = bpf_get_smp_processor_id();
328 struct ethhdr *eth = data;
329 u8 ip_proto = IPPROTO_UDP;
338 rec = bpf_map_lookup_elem(&rx_cnt, &key);
341 NO_TEAR_INC(rec->processed);
343 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
344 return XDP_PASS; /* Just skip */
346 /* Extract L4 protocol */
349 ip_proto = get_proto_ipv4(ctx, l3_offset);
352 ip_proto = get_proto_ipv6(ctx, l3_offset);
355 cpu_idx = 0; /* ARP packet handled on separate CPU */
361 /* Choose CPU based on L4 protocol */
372 /* DDoS filter UDP port 9 (pktgen) */
373 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
374 if (dest_port == 9) {
375 NO_TEAR_INC(rec->dropped);
383 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
386 cpu_dest = *cpu_lookup;
388 if (cpu_dest >= nr_cpus) {
389 NO_TEAR_INC(rec->issue);
392 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
395 /* Hashing initval */
396 #define INITVAL 15485863
398 static __always_inline
399 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
401 void *data_end = (void *)(long)ctx->data_end;
402 void *data = (void *)(long)ctx->data;
403 struct iphdr *iph = data + nh_off;
406 if (iph + 1 > data_end)
409 cpu_hash = iph->saddr + iph->daddr;
410 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
415 static __always_inline
416 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
418 void *data_end = (void *)(long)ctx->data_end;
419 void *data = (void *)(long)ctx->data;
420 struct ipv6hdr *ip6h = data + nh_off;
423 if (ip6h + 1 > data_end)
426 cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
427 cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
428 cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
429 cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
430 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
435 /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
436 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
440 int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
442 void *data_end = (void *)(long)ctx->data_end;
443 void *data = (void *)(long)ctx->data;
444 u32 key = bpf_get_smp_processor_id();
445 struct ethhdr *eth = data;
456 rec = bpf_map_lookup_elem(&rx_cnt, &key);
459 NO_TEAR_INC(rec->processed);
461 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
465 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
466 return XDP_PASS; /* Just skip */
468 /* Hash for IPv4 and IPv6 */
471 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
474 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
476 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
481 /* Choose CPU based on hash */
482 cpu_idx = cpu_hash % *cpu_max;
484 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
487 cpu_dest = *cpu_lookup;
489 if (cpu_dest >= nr_cpus) {
490 NO_TEAR_INC(rec->issue);
493 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
496 SEC("xdp_cpumap/redirect")
497 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
499 void *data_end = (void *)(long)ctx->data_end;
500 void *data = (void *)(long)ctx->data;
501 struct ethhdr *eth = data;
504 nh_off = sizeof(*eth);
505 if (data + nh_off > data_end)
508 swap_src_dst_mac(data);
509 return bpf_redirect_map(&tx_port, 0, 0);
512 SEC("xdp_cpumap/pass")
513 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
518 SEC("xdp_cpumap/drop")
519 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
524 SEC("xdp_devmap/egress")
525 int xdp_redirect_egress_prog(struct xdp_md *ctx)
527 void *data_end = (void *)(long)ctx->data_end;
528 void *data = (void *)(long)ctx->data;
529 struct ethhdr *eth = data;
532 nh_off = sizeof(*eth);
533 if (data + nh_off > data_end)
536 __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
541 char _license[] SEC("license") = "GPL";