Merge tag 'for-5.20/parisc-1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller...
[linux-2.6-microblaze.git] / samples / bpf / hbm_kern.h
1 /* SPDX-License-Identifier: GPL-2.0
2  *
3  * Copyright (c) 2019 Facebook
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 2 of the GNU General Public
7  * License as published by the Free Software Foundation.
8  *
9  * Include file for sample Host Bandwidth Manager (HBM) BPF programs
10  */
11 #define KBUILD_MODNAME "foo"
12 #include <uapi/linux/bpf.h>
13 #include <uapi/linux/if_ether.h>
14 #include <uapi/linux/if_packet.h>
15 #include <uapi/linux/ip.h>
16 #include <uapi/linux/ipv6.h>
17 #include <uapi/linux/in.h>
18 #include <uapi/linux/tcp.h>
19 #include <uapi/linux/filter.h>
20 #include <uapi/linux/pkt_cls.h>
21 #include <net/ipv6.h>
22 #include <net/inet_ecn.h>
23 #include <bpf/bpf_endian.h>
24 #include <bpf/bpf_helpers.h>
25 #include "hbm.h"
26
27 #define DROP_PKT        0
28 #define ALLOW_PKT       1
29 #define TCP_ECN_OK      1
30 #define CWR             2
31
32 #ifndef HBM_DEBUG  // Define HBM_DEBUG to enable debugging
33 #undef bpf_printk
34 #define bpf_printk(fmt, ...)
35 #endif
36
37 #define INITIAL_CREDIT_PACKETS  100
38 #define MAX_BYTES_PER_PACKET    1500
39 #define MARK_THRESH             (40 * MAX_BYTES_PER_PACKET)
40 #define DROP_THRESH             (80 * 5 * MAX_BYTES_PER_PACKET)
41 #define LARGE_PKT_DROP_THRESH   (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
42 #define MARK_REGION_SIZE        (LARGE_PKT_DROP_THRESH - MARK_THRESH)
43 #define LARGE_PKT_THRESH        120
44 #define MAX_CREDIT              (100 * MAX_BYTES_PER_PACKET)
45 #define INIT_CREDIT             (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
46
47 // Time base accounting for fq's EDT
48 #define BURST_SIZE_NS           100000 // 100us
49 #define MARK_THRESH_NS          50000 // 50us
50 #define DROP_THRESH_NS          500000 // 500us
51 // Reserve 20us of queuing for small packets (less than 120 bytes)
52 #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
53 #define MARK_REGION_SIZE_NS     (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
54
55 // rate in bytes per ns << 20
56 #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
57 #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
58 #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
59
60 struct {
61         __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
62         __type(key, struct bpf_cgroup_storage_key);
63         __type(value, struct hbm_vqueue);
64 } queue_state SEC(".maps");
65
66 struct {
67         __uint(type, BPF_MAP_TYPE_ARRAY);
68         __uint(max_entries, 1);
69         __type(key, u32);
70         __type(value, struct hbm_queue_stats);
71 } queue_stats SEC(".maps");
72
73 struct hbm_pkt_info {
74         int     cwnd;
75         int     rtt;
76         int     packets_out;
77         bool    is_ip;
78         bool    is_tcp;
79         short   ecn;
80 };
81
82 static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
83 {
84         struct bpf_sock *sk;
85         struct bpf_tcp_sock *tp;
86
87         sk = skb->sk;
88         if (sk) {
89                 sk = bpf_sk_fullsock(sk);
90                 if (sk) {
91                         if (sk->protocol == IPPROTO_TCP) {
92                                 tp = bpf_tcp_sock(sk);
93                                 if (tp) {
94                                         pkti->cwnd = tp->snd_cwnd;
95                                         pkti->rtt = tp->srtt_us >> 3;
96                                         pkti->packets_out = tp->packets_out;
97                                         return 0;
98                                 }
99                         }
100                 }
101         }
102         pkti->cwnd = 0;
103         pkti->rtt = 0;
104         pkti->packets_out = 0;
105         return 1;
106 }
107
108 static void hbm_get_pkt_info(struct __sk_buff *skb,
109                              struct hbm_pkt_info *pkti)
110 {
111         struct iphdr iph;
112         struct ipv6hdr *ip6h;
113
114         pkti->cwnd = 0;
115         pkti->rtt = 0;
116         bpf_skb_load_bytes(skb, 0, &iph, 12);
117         if (iph.version == 6) {
118                 ip6h = (struct ipv6hdr *)&iph;
119                 pkti->is_ip = true;
120                 pkti->is_tcp = (ip6h->nexthdr == 6);
121                 pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
122         } else if (iph.version == 4) {
123                 pkti->is_ip = true;
124                 pkti->is_tcp = (iph.protocol == 6);
125                 pkti->ecn = iph.tos & INET_ECN_MASK;
126         } else {
127                 pkti->is_ip = false;
128                 pkti->is_tcp = false;
129                 pkti->ecn = 0;
130         }
131         if (pkti->is_tcp)
132                 get_tcp_info(skb, pkti);
133 }
134
135 static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
136 {
137         bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
138         qdp->lasttime = bpf_ktime_get_ns();
139         qdp->credit = INIT_CREDIT;
140         qdp->rate = rate * 128;
141 }
142
143 static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
144                                                 int rate)
145 {
146         unsigned long long curtime;
147
148         curtime = bpf_ktime_get_ns();
149         bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
150         qdp->lasttime = curtime - BURST_SIZE_NS;        // support initial burst
151         qdp->credit = 0;                                // not used
152         qdp->rate = rate * 128;
153 }
154
155 static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
156                                              int len,
157                                              unsigned long long curtime,
158                                              bool congestion_flag,
159                                              bool drop_flag,
160                                              bool cwr_flag,
161                                              bool ecn_ce_flag,
162                                              struct hbm_pkt_info *pkti,
163                                              int credit)
164 {
165         int rv = ALLOW_PKT;
166
167         if (qsp != NULL) {
168                 // Following is needed for work conserving
169                 __sync_add_and_fetch(&(qsp->bytes_total), len);
170                 if (qsp->stats) {
171                         // Optionally update statistics
172                         if (qsp->firstPacketTime == 0)
173                                 qsp->firstPacketTime = curtime;
174                         qsp->lastPacketTime = curtime;
175                         __sync_add_and_fetch(&(qsp->pkts_total), 1);
176                         if (congestion_flag) {
177                                 __sync_add_and_fetch(&(qsp->pkts_marked), 1);
178                                 __sync_add_and_fetch(&(qsp->bytes_marked), len);
179                         }
180                         if (drop_flag) {
181                                 __sync_add_and_fetch(&(qsp->pkts_dropped), 1);
182                                 __sync_add_and_fetch(&(qsp->bytes_dropped),
183                                                      len);
184                         }
185                         if (ecn_ce_flag)
186                                 __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
187                         if (pkti->cwnd) {
188                                 __sync_add_and_fetch(&(qsp->sum_cwnd),
189                                                      pkti->cwnd);
190                                 __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
191                         }
192                         if (pkti->rtt)
193                                 __sync_add_and_fetch(&(qsp->sum_rtt),
194                                                      pkti->rtt);
195                         __sync_add_and_fetch(&(qsp->sum_credit), credit);
196
197                         if (drop_flag)
198                                 rv = DROP_PKT;
199                         if (cwr_flag)
200                                 rv |= 2;
201                         if (rv == DROP_PKT)
202                                 __sync_add_and_fetch(&(qsp->returnValCount[0]),
203                                                      1);
204                         else if (rv == ALLOW_PKT)
205                                 __sync_add_and_fetch(&(qsp->returnValCount[1]),
206                                                      1);
207                         else if (rv == 2)
208                                 __sync_add_and_fetch(&(qsp->returnValCount[2]),
209                                                      1);
210                         else if (rv == 3)
211                                 __sync_add_and_fetch(&(qsp->returnValCount[3]),
212                                                      1);
213                 }
214         }
215 }