Merge tag 'fixes-v5.10a' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...
[linux-2.6-microblaze.git] / net / psample / psample.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/psample/psample.c - Netlink channel for packet sampling
4  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
5  */
6
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <linux/skbuff.h>
10 #include <linux/module.h>
11 #include <net/net_namespace.h>
12 #include <net/sock.h>
13 #include <net/netlink.h>
14 #include <net/genetlink.h>
15 #include <net/psample.h>
16 #include <linux/spinlock.h>
17 #include <net/ip_tunnels.h>
18 #include <net/dst_metadata.h>
19
20 #define PSAMPLE_MAX_PACKET_SIZE 0xffff
21
22 static LIST_HEAD(psample_groups_list);
23 static DEFINE_SPINLOCK(psample_groups_lock);
24
25 /* multicast groups */
26 enum psample_nl_multicast_groups {
27         PSAMPLE_NL_MCGRP_CONFIG,
28         PSAMPLE_NL_MCGRP_SAMPLE,
29 };
30
31 static const struct genl_multicast_group psample_nl_mcgrps[] = {
32         [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
33         [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
34 };
35
36 static struct genl_family psample_nl_family __ro_after_init;
37
38 static int psample_group_nl_fill(struct sk_buff *msg,
39                                  struct psample_group *group,
40                                  enum psample_command cmd, u32 portid, u32 seq,
41                                  int flags)
42 {
43         void *hdr;
44         int ret;
45
46         hdr = genlmsg_put(msg, portid, seq, &psample_nl_family, flags, cmd);
47         if (!hdr)
48                 return -EMSGSIZE;
49
50         ret = nla_put_u32(msg, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
51         if (ret < 0)
52                 goto error;
53
54         ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_REFCOUNT, group->refcount);
55         if (ret < 0)
56                 goto error;
57
58         ret = nla_put_u32(msg, PSAMPLE_ATTR_GROUP_SEQ, group->seq);
59         if (ret < 0)
60                 goto error;
61
62         genlmsg_end(msg, hdr);
63         return 0;
64
65 error:
66         genlmsg_cancel(msg, hdr);
67         return -EMSGSIZE;
68 }
69
70 static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
71                                            struct netlink_callback *cb)
72 {
73         struct psample_group *group;
74         int start = cb->args[0];
75         int idx = 0;
76         int err;
77
78         spin_lock_bh(&psample_groups_lock);
79         list_for_each_entry(group, &psample_groups_list, list) {
80                 if (!net_eq(group->net, sock_net(msg->sk)))
81                         continue;
82                 if (idx < start) {
83                         idx++;
84                         continue;
85                 }
86                 err = psample_group_nl_fill(msg, group, PSAMPLE_CMD_NEW_GROUP,
87                                             NETLINK_CB(cb->skb).portid,
88                                             cb->nlh->nlmsg_seq, NLM_F_MULTI);
89                 if (err)
90                         break;
91                 idx++;
92         }
93
94         spin_unlock_bh(&psample_groups_lock);
95         cb->args[0] = idx;
96         return msg->len;
97 }
98
99 static const struct genl_small_ops psample_nl_ops[] = {
100         {
101                 .cmd = PSAMPLE_CMD_GET_GROUP,
102                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
103                 .dumpit = psample_nl_cmd_get_group_dumpit,
104                 /* can be retrieved by unprivileged users */
105         }
106 };
107
108 static struct genl_family psample_nl_family __ro_after_init = {
109         .name           = PSAMPLE_GENL_NAME,
110         .version        = PSAMPLE_GENL_VERSION,
111         .maxattr        = PSAMPLE_ATTR_MAX,
112         .netnsok        = true,
113         .module         = THIS_MODULE,
114         .mcgrps         = psample_nl_mcgrps,
115         .small_ops      = psample_nl_ops,
116         .n_small_ops    = ARRAY_SIZE(psample_nl_ops),
117         .n_mcgrps       = ARRAY_SIZE(psample_nl_mcgrps),
118 };
119
120 static void psample_group_notify(struct psample_group *group,
121                                  enum psample_command cmd)
122 {
123         struct sk_buff *msg;
124         int err;
125
126         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
127         if (!msg)
128                 return;
129
130         err = psample_group_nl_fill(msg, group, cmd, 0, 0, NLM_F_MULTI);
131         if (!err)
132                 genlmsg_multicast_netns(&psample_nl_family, group->net, msg, 0,
133                                         PSAMPLE_NL_MCGRP_CONFIG, GFP_ATOMIC);
134         else
135                 nlmsg_free(msg);
136 }
137
138 static struct psample_group *psample_group_create(struct net *net,
139                                                   u32 group_num)
140 {
141         struct psample_group *group;
142
143         group = kzalloc(sizeof(*group), GFP_ATOMIC);
144         if (!group)
145                 return NULL;
146
147         group->net = net;
148         group->group_num = group_num;
149         list_add_tail(&group->list, &psample_groups_list);
150
151         psample_group_notify(group, PSAMPLE_CMD_NEW_GROUP);
152         return group;
153 }
154
155 static void psample_group_destroy(struct psample_group *group)
156 {
157         psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
158         list_del(&group->list);
159         kfree_rcu(group, rcu);
160 }
161
162 static struct psample_group *
163 psample_group_lookup(struct net *net, u32 group_num)
164 {
165         struct psample_group *group;
166
167         list_for_each_entry(group, &psample_groups_list, list)
168                 if ((group->group_num == group_num) && (group->net == net))
169                         return group;
170         return NULL;
171 }
172
173 struct psample_group *psample_group_get(struct net *net, u32 group_num)
174 {
175         struct psample_group *group;
176
177         spin_lock_bh(&psample_groups_lock);
178
179         group = psample_group_lookup(net, group_num);
180         if (!group) {
181                 group = psample_group_create(net, group_num);
182                 if (!group)
183                         goto out;
184         }
185         group->refcount++;
186
187 out:
188         spin_unlock_bh(&psample_groups_lock);
189         return group;
190 }
191 EXPORT_SYMBOL_GPL(psample_group_get);
192
193 void psample_group_take(struct psample_group *group)
194 {
195         spin_lock_bh(&psample_groups_lock);
196         group->refcount++;
197         spin_unlock_bh(&psample_groups_lock);
198 }
199 EXPORT_SYMBOL_GPL(psample_group_take);
200
201 void psample_group_put(struct psample_group *group)
202 {
203         spin_lock_bh(&psample_groups_lock);
204
205         if (--group->refcount == 0)
206                 psample_group_destroy(group);
207
208         spin_unlock_bh(&psample_groups_lock);
209 }
210 EXPORT_SYMBOL_GPL(psample_group_put);
211
212 #ifdef CONFIG_INET
213 static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
214                               struct ip_tunnel_info *tun_info)
215 {
216         unsigned short tun_proto = ip_tunnel_info_af(tun_info);
217         const void *tun_opts = ip_tunnel_info_opts(tun_info);
218         const struct ip_tunnel_key *tun_key = &tun_info->key;
219         int tun_opts_len = tun_info->options_len;
220
221         if (tun_key->tun_flags & TUNNEL_KEY &&
222             nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
223                          PSAMPLE_TUNNEL_KEY_ATTR_PAD))
224                 return -EMSGSIZE;
225
226         if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE &&
227             nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE))
228                 return -EMSGSIZE;
229
230         switch (tun_proto) {
231         case AF_INET:
232                 if (tun_key->u.ipv4.src &&
233                     nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC,
234                                     tun_key->u.ipv4.src))
235                         return -EMSGSIZE;
236                 if (tun_key->u.ipv4.dst &&
237                     nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST,
238                                     tun_key->u.ipv4.dst))
239                         return -EMSGSIZE;
240                 break;
241         case AF_INET6:
242                 if (!ipv6_addr_any(&tun_key->u.ipv6.src) &&
243                     nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC,
244                                      &tun_key->u.ipv6.src))
245                         return -EMSGSIZE;
246                 if (!ipv6_addr_any(&tun_key->u.ipv6.dst) &&
247                     nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST,
248                                      &tun_key->u.ipv6.dst))
249                         return -EMSGSIZE;
250                 break;
251         }
252         if (tun_key->tos &&
253             nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos))
254                 return -EMSGSIZE;
255         if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
256                 return -EMSGSIZE;
257         if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
258             nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
259                 return -EMSGSIZE;
260         if ((tun_key->tun_flags & TUNNEL_CSUM) &&
261             nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
262                 return -EMSGSIZE;
263         if (tun_key->tp_src &&
264             nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src))
265                 return -EMSGSIZE;
266         if (tun_key->tp_dst &&
267             nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
268                 return -EMSGSIZE;
269         if ((tun_key->tun_flags & TUNNEL_OAM) &&
270             nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
271                 return -EMSGSIZE;
272         if (tun_opts_len) {
273                 if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
274                     nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
275                             tun_opts_len, tun_opts))
276                         return -EMSGSIZE;
277                 else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
278                          nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
279                                  tun_opts_len, tun_opts))
280                         return -EMSGSIZE;
281         }
282
283         return 0;
284 }
285
286 static int psample_ip_tun_to_nlattr(struct sk_buff *skb,
287                             struct ip_tunnel_info *tun_info)
288 {
289         struct nlattr *nla;
290         int err;
291
292         nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL);
293         if (!nla)
294                 return -EMSGSIZE;
295
296         err = __psample_ip_tun_to_nlattr(skb, tun_info);
297         if (err) {
298                 nla_nest_cancel(skb, nla);
299                 return err;
300         }
301
302         nla_nest_end(skb, nla);
303
304         return 0;
305 }
306
307 static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
308 {
309         unsigned short tun_proto = ip_tunnel_info_af(tun_info);
310         const struct ip_tunnel_key *tun_key = &tun_info->key;
311         int tun_opts_len = tun_info->options_len;
312         int sum = 0;
313
314         if (tun_key->tun_flags & TUNNEL_KEY)
315                 sum += nla_total_size(sizeof(u64));
316
317         if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
318                 sum += nla_total_size(0);
319
320         switch (tun_proto) {
321         case AF_INET:
322                 if (tun_key->u.ipv4.src)
323                         sum += nla_total_size(sizeof(u32));
324                 if (tun_key->u.ipv4.dst)
325                         sum += nla_total_size(sizeof(u32));
326                 break;
327         case AF_INET6:
328                 if (!ipv6_addr_any(&tun_key->u.ipv6.src))
329                         sum += nla_total_size(sizeof(struct in6_addr));
330                 if (!ipv6_addr_any(&tun_key->u.ipv6.dst))
331                         sum += nla_total_size(sizeof(struct in6_addr));
332                 break;
333         }
334         if (tun_key->tos)
335                 sum += nla_total_size(sizeof(u8));
336         sum += nla_total_size(sizeof(u8));      /* TTL */
337         if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
338                 sum += nla_total_size(0);
339         if (tun_key->tun_flags & TUNNEL_CSUM)
340                 sum += nla_total_size(0);
341         if (tun_key->tp_src)
342                 sum += nla_total_size(sizeof(u16));
343         if (tun_key->tp_dst)
344                 sum += nla_total_size(sizeof(u16));
345         if (tun_key->tun_flags & TUNNEL_OAM)
346                 sum += nla_total_size(0);
347         if (tun_opts_len) {
348                 if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
349                         sum += nla_total_size(tun_opts_len);
350                 else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
351                         sum += nla_total_size(tun_opts_len);
352         }
353
354         return sum;
355 }
356 #endif
357
358 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
359                            u32 trunc_size, int in_ifindex, int out_ifindex,
360                            u32 sample_rate)
361 {
362 #ifdef CONFIG_INET
363         struct ip_tunnel_info *tun_info;
364 #endif
365         struct sk_buff *nl_skb;
366         int data_len;
367         int meta_len;
368         void *data;
369         int ret;
370
371         meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
372                    (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
373                    nla_total_size(sizeof(u32)) +        /* sample_rate */
374                    nla_total_size(sizeof(u32)) +        /* orig_size */
375                    nla_total_size(sizeof(u32)) +        /* group_num */
376                    nla_total_size(sizeof(u32));         /* seq */
377
378 #ifdef CONFIG_INET
379         tun_info = skb_tunnel_info(skb);
380         if (tun_info)
381                 meta_len += psample_tunnel_meta_len(tun_info);
382 #endif
383
384         data_len = min(skb->len, trunc_size);
385         if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
386                 data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
387                             - NLA_ALIGNTO;
388
389         nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC);
390         if (unlikely(!nl_skb))
391                 return;
392
393         data = genlmsg_put(nl_skb, 0, 0, &psample_nl_family, 0,
394                            PSAMPLE_CMD_SAMPLE);
395         if (unlikely(!data))
396                 goto error;
397
398         if (in_ifindex) {
399                 ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_IIFINDEX, in_ifindex);
400                 if (unlikely(ret < 0))
401                         goto error;
402         }
403
404         if (out_ifindex) {
405                 ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OIFINDEX, out_ifindex);
406                 if (unlikely(ret < 0))
407                         goto error;
408         }
409
410         ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_RATE, sample_rate);
411         if (unlikely(ret < 0))
412                 goto error;
413
414         ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_ORIGSIZE, skb->len);
415         if (unlikely(ret < 0))
416                 goto error;
417
418         ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_SAMPLE_GROUP, group->group_num);
419         if (unlikely(ret < 0))
420                 goto error;
421
422         ret = nla_put_u32(nl_skb, PSAMPLE_ATTR_GROUP_SEQ, group->seq++);
423         if (unlikely(ret < 0))
424                 goto error;
425
426         if (data_len) {
427                 int nla_len = nla_total_size(data_len);
428                 struct nlattr *nla;
429
430                 nla = skb_put(nl_skb, nla_len);
431                 nla->nla_type = PSAMPLE_ATTR_DATA;
432                 nla->nla_len = nla_attr_size(data_len);
433
434                 if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
435                         goto error;
436         }
437
438 #ifdef CONFIG_INET
439         if (tun_info) {
440                 ret = psample_ip_tun_to_nlattr(nl_skb, tun_info);
441                 if (unlikely(ret < 0))
442                         goto error;
443         }
444 #endif
445
446         genlmsg_end(nl_skb, data);
447         genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
448                                 PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
449
450         return;
451 error:
452         pr_err_ratelimited("Could not create psample log message\n");
453         nlmsg_free(nl_skb);
454 }
455 EXPORT_SYMBOL_GPL(psample_sample_packet);
456
457 static int __init psample_module_init(void)
458 {
459         return genl_register_family(&psample_nl_family);
460 }
461
462 static void __exit psample_module_exit(void)
463 {
464         genl_unregister_family(&psample_nl_family);
465 }
466
467 module_init(psample_module_init);
468 module_exit(psample_module_exit);
469
470 MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
471 MODULE_DESCRIPTION("netlink channel for packet sampling");
472 MODULE_LICENSE("GPL v2");