ASoC: qdsp6: Suggest more generic node names
[linux-2.6-microblaze.git] / net / core / drop_monitor.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Monitoring code for network dropped packet alerts
4  *
5  * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/netdevice.h>
11 #include <linux/etherdevice.h>
12 #include <linux/string.h>
13 #include <linux/if_arp.h>
14 #include <linux/inetdevice.h>
15 #include <linux/inet.h>
16 #include <linux/interrupt.h>
17 #include <linux/netpoll.h>
18 #include <linux/sched.h>
19 #include <linux/delay.h>
20 #include <linux/types.h>
21 #include <linux/workqueue.h>
22 #include <linux/netlink.h>
23 #include <linux/net_dropmon.h>
24 #include <linux/percpu.h>
25 #include <linux/timer.h>
26 #include <linux/bitops.h>
27 #include <linux/slab.h>
28 #include <linux/module.h>
29 #include <net/drop_monitor.h>
30 #include <net/genetlink.h>
31 #include <net/netevent.h>
32 #include <net/flow_offload.h>
33
34 #include <trace/events/skb.h>
35 #include <trace/events/napi.h>
36
37 #include <asm/unaligned.h>
38
39 #define TRACE_ON 1
40 #define TRACE_OFF 0
41
42 /*
43  * Globals, our netlink socket pointer
44  * and the work handle that will send up
45  * netlink alerts
46  */
47 static int trace_state = TRACE_OFF;
48 static bool monitor_hw;
49
50 /* net_dm_mutex
51  *
52  * An overall lock guarding every operation coming from userspace.
53  * It also guards the global 'hw_stats_list' list.
54  */
55 static DEFINE_MUTEX(net_dm_mutex);
56
57 struct net_dm_stats {
58         u64 dropped;
59         struct u64_stats_sync syncp;
60 };
61
62 #define NET_DM_MAX_HW_TRAP_NAME_LEN 40
63
64 struct net_dm_hw_entry {
65         char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
66         u32 count;
67 };
68
69 struct net_dm_hw_entries {
70         u32 num_entries;
71         struct net_dm_hw_entry entries[];
72 };
73
74 struct per_cpu_dm_data {
75         spinlock_t              lock;   /* Protects 'skb', 'hw_entries' and
76                                          * 'send_timer'
77                                          */
78         union {
79                 struct sk_buff                  *skb;
80                 struct net_dm_hw_entries        *hw_entries;
81         };
82         struct sk_buff_head     drop_queue;
83         struct work_struct      dm_alert_work;
84         struct timer_list       send_timer;
85         struct net_dm_stats     stats;
86 };
87
88 struct dm_hw_stat_delta {
89         struct net_device *dev;
90         unsigned long last_rx;
91         struct list_head list;
92         struct rcu_head rcu;
93         unsigned long last_drop_val;
94 };
95
96 static struct genl_family net_drop_monitor_family;
97
98 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
99 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
100
101 static int dm_hit_limit = 64;
102 static int dm_delay = 1;
103 static unsigned long dm_hw_check_delta = 2*HZ;
104 static LIST_HEAD(hw_stats_list);
105
106 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
107 static u32 net_dm_trunc_len;
108 static u32 net_dm_queue_len = 1000;
109
110 struct net_dm_alert_ops {
111         void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
112                                 void *location);
113         void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
114                                 int work, int budget);
115         void (*work_item_func)(struct work_struct *work);
116         void (*hw_work_item_func)(struct work_struct *work);
117         void (*hw_probe)(struct sk_buff *skb,
118                          const struct net_dm_hw_metadata *hw_metadata);
119 };
120
121 struct net_dm_skb_cb {
122         union {
123                 struct net_dm_hw_metadata *hw_metadata;
124                 void *pc;
125         };
126 };
127
128 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
129
130 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
131 {
132         size_t al;
133         struct net_dm_alert_msg *msg;
134         struct nlattr *nla;
135         struct sk_buff *skb;
136         unsigned long flags;
137         void *msg_header;
138
139         al = sizeof(struct net_dm_alert_msg);
140         al += dm_hit_limit * sizeof(struct net_dm_drop_point);
141         al += sizeof(struct nlattr);
142
143         skb = genlmsg_new(al, GFP_KERNEL);
144
145         if (!skb)
146                 goto err;
147
148         msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
149                                  0, NET_DM_CMD_ALERT);
150         if (!msg_header) {
151                 nlmsg_free(skb);
152                 skb = NULL;
153                 goto err;
154         }
155         nla = nla_reserve(skb, NLA_UNSPEC,
156                           sizeof(struct net_dm_alert_msg));
157         if (!nla) {
158                 nlmsg_free(skb);
159                 skb = NULL;
160                 goto err;
161         }
162         msg = nla_data(nla);
163         memset(msg, 0, al);
164         goto out;
165
166 err:
167         mod_timer(&data->send_timer, jiffies + HZ / 10);
168 out:
169         spin_lock_irqsave(&data->lock, flags);
170         swap(data->skb, skb);
171         spin_unlock_irqrestore(&data->lock, flags);
172
173         if (skb) {
174                 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
175                 struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh);
176
177                 genlmsg_end(skb, genlmsg_data(gnlh));
178         }
179
180         return skb;
181 }
182
183 static const struct genl_multicast_group dropmon_mcgrps[] = {
184         { .name = "events", },
185 };
186
187 static void send_dm_alert(struct work_struct *work)
188 {
189         struct sk_buff *skb;
190         struct per_cpu_dm_data *data;
191
192         data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
193
194         skb = reset_per_cpu_data(data);
195
196         if (skb)
197                 genlmsg_multicast(&net_drop_monitor_family, skb, 0,
198                                   0, GFP_KERNEL);
199 }
200
201 /*
202  * This is the timer function to delay the sending of an alert
203  * in the event that more drops will arrive during the
204  * hysteresis period.
205  */
206 static void sched_send_work(struct timer_list *t)
207 {
208         struct per_cpu_dm_data *data = from_timer(data, t, send_timer);
209
210         schedule_work(&data->dm_alert_work);
211 }
212
213 static void trace_drop_common(struct sk_buff *skb, void *location)
214 {
215         struct net_dm_alert_msg *msg;
216         struct nlmsghdr *nlh;
217         struct nlattr *nla;
218         int i;
219         struct sk_buff *dskb;
220         struct per_cpu_dm_data *data;
221         unsigned long flags;
222
223         local_irq_save(flags);
224         data = this_cpu_ptr(&dm_cpu_data);
225         spin_lock(&data->lock);
226         dskb = data->skb;
227
228         if (!dskb)
229                 goto out;
230
231         nlh = (struct nlmsghdr *)dskb->data;
232         nla = genlmsg_data(nlmsg_data(nlh));
233         msg = nla_data(nla);
234         for (i = 0; i < msg->entries; i++) {
235                 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
236                         msg->points[i].count++;
237                         goto out;
238                 }
239         }
240         if (msg->entries == dm_hit_limit)
241                 goto out;
242         /*
243          * We need to create a new entry
244          */
245         __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
246         nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
247         memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
248         msg->points[msg->entries].count = 1;
249         msg->entries++;
250
251         if (!timer_pending(&data->send_timer)) {
252                 data->send_timer.expires = jiffies + dm_delay * HZ;
253                 add_timer(&data->send_timer);
254         }
255
256 out:
257         spin_unlock_irqrestore(&data->lock, flags);
258 }
259
260 static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
261 {
262         trace_drop_common(skb, location);
263 }
264
265 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
266                                 int work, int budget)
267 {
268         struct dm_hw_stat_delta *new_stat;
269
270         /*
271          * Don't check napi structures with no associated device
272          */
273         if (!napi->dev)
274                 return;
275
276         rcu_read_lock();
277         list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
278                 /*
279                  * only add a note to our monitor buffer if:
280                  * 1) this is the dev we received on
281                  * 2) its after the last_rx delta
282                  * 3) our rx_dropped count has gone up
283                  */
284                 if ((new_stat->dev == napi->dev)  &&
285                     (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
286                     (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
287                         trace_drop_common(NULL, NULL);
288                         new_stat->last_drop_val = napi->dev->stats.rx_dropped;
289                         new_stat->last_rx = jiffies;
290                         break;
291                 }
292         }
293         rcu_read_unlock();
294 }
295
296 static struct net_dm_hw_entries *
297 net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
298 {
299         struct net_dm_hw_entries *hw_entries;
300         unsigned long flags;
301
302         hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
303                              GFP_KERNEL);
304         if (!hw_entries) {
305                 /* If the memory allocation failed, we try to perform another
306                  * allocation in 1/10 second. Otherwise, the probe function
307                  * will constantly bail out.
308                  */
309                 mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
310         }
311
312         spin_lock_irqsave(&hw_data->lock, flags);
313         swap(hw_data->hw_entries, hw_entries);
314         spin_unlock_irqrestore(&hw_data->lock, flags);
315
316         return hw_entries;
317 }
318
319 static int net_dm_hw_entry_put(struct sk_buff *msg,
320                                const struct net_dm_hw_entry *hw_entry)
321 {
322         struct nlattr *attr;
323
324         attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
325         if (!attr)
326                 return -EMSGSIZE;
327
328         if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
329                 goto nla_put_failure;
330
331         if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
332                 goto nla_put_failure;
333
334         nla_nest_end(msg, attr);
335
336         return 0;
337
338 nla_put_failure:
339         nla_nest_cancel(msg, attr);
340         return -EMSGSIZE;
341 }
342
343 static int net_dm_hw_entries_put(struct sk_buff *msg,
344                                  const struct net_dm_hw_entries *hw_entries)
345 {
346         struct nlattr *attr;
347         int i;
348
349         attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
350         if (!attr)
351                 return -EMSGSIZE;
352
353         for (i = 0; i < hw_entries->num_entries; i++) {
354                 int rc;
355
356                 rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
357                 if (rc)
358                         goto nla_put_failure;
359         }
360
361         nla_nest_end(msg, attr);
362
363         return 0;
364
365 nla_put_failure:
366         nla_nest_cancel(msg, attr);
367         return -EMSGSIZE;
368 }
369
370 static int
371 net_dm_hw_summary_report_fill(struct sk_buff *msg,
372                               const struct net_dm_hw_entries *hw_entries)
373 {
374         struct net_dm_alert_msg anc_hdr = { 0 };
375         void *hdr;
376         int rc;
377
378         hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
379                           NET_DM_CMD_ALERT);
380         if (!hdr)
381                 return -EMSGSIZE;
382
383         /* We need to put the ancillary header in order not to break user
384          * space.
385          */
386         if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
387                 goto nla_put_failure;
388
389         rc = net_dm_hw_entries_put(msg, hw_entries);
390         if (rc)
391                 goto nla_put_failure;
392
393         genlmsg_end(msg, hdr);
394
395         return 0;
396
397 nla_put_failure:
398         genlmsg_cancel(msg, hdr);
399         return -EMSGSIZE;
400 }
401
402 static void net_dm_hw_summary_work(struct work_struct *work)
403 {
404         struct net_dm_hw_entries *hw_entries;
405         struct per_cpu_dm_data *hw_data;
406         struct sk_buff *msg;
407         int rc;
408
409         hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
410
411         hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
412         if (!hw_entries)
413                 return;
414
415         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
416         if (!msg)
417                 goto out;
418
419         rc = net_dm_hw_summary_report_fill(msg, hw_entries);
420         if (rc) {
421                 nlmsg_free(msg);
422                 goto out;
423         }
424
425         genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
426
427 out:
428         kfree(hw_entries);
429 }
430
431 static void
432 net_dm_hw_summary_probe(struct sk_buff *skb,
433                         const struct net_dm_hw_metadata *hw_metadata)
434 {
435         struct net_dm_hw_entries *hw_entries;
436         struct net_dm_hw_entry *hw_entry;
437         struct per_cpu_dm_data *hw_data;
438         unsigned long flags;
439         int i;
440
441         hw_data = this_cpu_ptr(&dm_hw_cpu_data);
442         spin_lock_irqsave(&hw_data->lock, flags);
443         hw_entries = hw_data->hw_entries;
444
445         if (!hw_entries)
446                 goto out;
447
448         for (i = 0; i < hw_entries->num_entries; i++) {
449                 hw_entry = &hw_entries->entries[i];
450                 if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
451                              NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
452                         hw_entry->count++;
453                         goto out;
454                 }
455         }
456         if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
457                 goto out;
458
459         hw_entry = &hw_entries->entries[hw_entries->num_entries];
460         strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
461                 NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
462         hw_entry->count = 1;
463         hw_entries->num_entries++;
464
465         if (!timer_pending(&hw_data->send_timer)) {
466                 hw_data->send_timer.expires = jiffies + dm_delay * HZ;
467                 add_timer(&hw_data->send_timer);
468         }
469
470 out:
471         spin_unlock_irqrestore(&hw_data->lock, flags);
472 }
473
474 static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
475         .kfree_skb_probe        = trace_kfree_skb_hit,
476         .napi_poll_probe        = trace_napi_poll_hit,
477         .work_item_func         = send_dm_alert,
478         .hw_work_item_func      = net_dm_hw_summary_work,
479         .hw_probe               = net_dm_hw_summary_probe,
480 };
481
482 static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
483                                               struct sk_buff *skb,
484                                               void *location)
485 {
486         ktime_t tstamp = ktime_get_real();
487         struct per_cpu_dm_data *data;
488         struct sk_buff *nskb;
489         unsigned long flags;
490
491         if (!skb_mac_header_was_set(skb))
492                 return;
493
494         nskb = skb_clone(skb, GFP_ATOMIC);
495         if (!nskb)
496                 return;
497
498         NET_DM_SKB_CB(nskb)->pc = location;
499         /* Override the timestamp because we care about the time when the
500          * packet was dropped.
501          */
502         nskb->tstamp = tstamp;
503
504         data = this_cpu_ptr(&dm_cpu_data);
505
506         spin_lock_irqsave(&data->drop_queue.lock, flags);
507         if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
508                 __skb_queue_tail(&data->drop_queue, nskb);
509         else
510                 goto unlock_free;
511         spin_unlock_irqrestore(&data->drop_queue.lock, flags);
512
513         schedule_work(&data->dm_alert_work);
514
515         return;
516
517 unlock_free:
518         spin_unlock_irqrestore(&data->drop_queue.lock, flags);
519         u64_stats_update_begin(&data->stats.syncp);
520         data->stats.dropped++;
521         u64_stats_update_end(&data->stats.syncp);
522         consume_skb(nskb);
523 }
524
525 static void net_dm_packet_trace_napi_poll_hit(void *ignore,
526                                               struct napi_struct *napi,
527                                               int work, int budget)
528 {
529 }
530
531 static size_t net_dm_in_port_size(void)
532 {
533                /* NET_DM_ATTR_IN_PORT nest */
534         return nla_total_size(0) +
535                /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
536                nla_total_size(sizeof(u32)) +
537                /* NET_DM_ATTR_PORT_NETDEV_NAME */
538                nla_total_size(IFNAMSIZ + 1);
539 }
540
541 #define NET_DM_MAX_SYMBOL_LEN 40
542
543 static size_t net_dm_packet_report_size(size_t payload_len)
544 {
545         size_t size;
546
547         size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
548
549         return NLMSG_ALIGN(size) +
550                /* NET_DM_ATTR_ORIGIN */
551                nla_total_size(sizeof(u16)) +
552                /* NET_DM_ATTR_PC */
553                nla_total_size(sizeof(u64)) +
554                /* NET_DM_ATTR_SYMBOL */
555                nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
556                /* NET_DM_ATTR_IN_PORT */
557                net_dm_in_port_size() +
558                /* NET_DM_ATTR_TIMESTAMP */
559                nla_total_size(sizeof(u64)) +
560                /* NET_DM_ATTR_ORIG_LEN */
561                nla_total_size(sizeof(u32)) +
562                /* NET_DM_ATTR_PROTO */
563                nla_total_size(sizeof(u16)) +
564                /* NET_DM_ATTR_PAYLOAD */
565                nla_total_size(payload_len);
566 }
567
568 static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
569                                             const char *name)
570 {
571         struct nlattr *attr;
572
573         attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
574         if (!attr)
575                 return -EMSGSIZE;
576
577         if (ifindex &&
578             nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
579                 goto nla_put_failure;
580
581         if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
582                 goto nla_put_failure;
583
584         nla_nest_end(msg, attr);
585
586         return 0;
587
588 nla_put_failure:
589         nla_nest_cancel(msg, attr);
590         return -EMSGSIZE;
591 }
592
593 static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
594                                      size_t payload_len)
595 {
596         u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
597         char buf[NET_DM_MAX_SYMBOL_LEN];
598         struct nlattr *attr;
599         void *hdr;
600         int rc;
601
602         hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
603                           NET_DM_CMD_PACKET_ALERT);
604         if (!hdr)
605                 return -EMSGSIZE;
606
607         if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
608                 goto nla_put_failure;
609
610         if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
611                 goto nla_put_failure;
612
613         snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
614         if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
615                 goto nla_put_failure;
616
617         rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
618         if (rc)
619                 goto nla_put_failure;
620
621         if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
622                               ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
623                 goto nla_put_failure;
624
625         if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
626                 goto nla_put_failure;
627
628         if (!payload_len)
629                 goto out;
630
631         if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
632                 goto nla_put_failure;
633
634         attr = skb_put(msg, nla_total_size(payload_len));
635         attr->nla_type = NET_DM_ATTR_PAYLOAD;
636         attr->nla_len = nla_attr_size(payload_len);
637         if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
638                 goto nla_put_failure;
639
640 out:
641         genlmsg_end(msg, hdr);
642
643         return 0;
644
645 nla_put_failure:
646         genlmsg_cancel(msg, hdr);
647         return -EMSGSIZE;
648 }
649
650 #define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
651
652 static void net_dm_packet_report(struct sk_buff *skb)
653 {
654         struct sk_buff *msg;
655         size_t payload_len;
656         int rc;
657
658         /* Make sure we start copying the packet from the MAC header */
659         if (skb->data > skb_mac_header(skb))
660                 skb_push(skb, skb->data - skb_mac_header(skb));
661         else
662                 skb_pull(skb, skb_mac_header(skb) - skb->data);
663
664         /* Ensure packet fits inside a single netlink attribute */
665         payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
666         if (net_dm_trunc_len)
667                 payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
668
669         msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
670         if (!msg)
671                 goto out;
672
673         rc = net_dm_packet_report_fill(msg, skb, payload_len);
674         if (rc) {
675                 nlmsg_free(msg);
676                 goto out;
677         }
678
679         genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
680
681 out:
682         consume_skb(skb);
683 }
684
685 static void net_dm_packet_work(struct work_struct *work)
686 {
687         struct per_cpu_dm_data *data;
688         struct sk_buff_head list;
689         struct sk_buff *skb;
690         unsigned long flags;
691
692         data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
693
694         __skb_queue_head_init(&list);
695
696         spin_lock_irqsave(&data->drop_queue.lock, flags);
697         skb_queue_splice_tail_init(&data->drop_queue, &list);
698         spin_unlock_irqrestore(&data->drop_queue.lock, flags);
699
700         while ((skb = __skb_dequeue(&list)))
701                 net_dm_packet_report(skb);
702 }
703
704 static size_t
705 net_dm_flow_action_cookie_size(const struct net_dm_hw_metadata *hw_metadata)
706 {
707         return hw_metadata->fa_cookie ?
708                nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0;
709 }
710
711 static size_t
712 net_dm_hw_packet_report_size(size_t payload_len,
713                              const struct net_dm_hw_metadata *hw_metadata)
714 {
715         size_t size;
716
717         size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
718
719         return NLMSG_ALIGN(size) +
720                /* NET_DM_ATTR_ORIGIN */
721                nla_total_size(sizeof(u16)) +
722                /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
723                nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
724                /* NET_DM_ATTR_HW_TRAP_NAME */
725                nla_total_size(strlen(hw_metadata->trap_name) + 1) +
726                /* NET_DM_ATTR_IN_PORT */
727                net_dm_in_port_size() +
728                /* NET_DM_ATTR_FLOW_ACTION_COOKIE */
729                net_dm_flow_action_cookie_size(hw_metadata) +
730                /* NET_DM_ATTR_TIMESTAMP */
731                nla_total_size(sizeof(u64)) +
732                /* NET_DM_ATTR_ORIG_LEN */
733                nla_total_size(sizeof(u32)) +
734                /* NET_DM_ATTR_PROTO */
735                nla_total_size(sizeof(u16)) +
736                /* NET_DM_ATTR_PAYLOAD */
737                nla_total_size(payload_len);
738 }
739
740 static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
741                                         struct sk_buff *skb, size_t payload_len)
742 {
743         struct net_dm_hw_metadata *hw_metadata;
744         struct nlattr *attr;
745         void *hdr;
746
747         hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
748
749         hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
750                           NET_DM_CMD_PACKET_ALERT);
751         if (!hdr)
752                 return -EMSGSIZE;
753
754         if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
755                 goto nla_put_failure;
756
757         if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
758                            hw_metadata->trap_group_name))
759                 goto nla_put_failure;
760
761         if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
762                            hw_metadata->trap_name))
763                 goto nla_put_failure;
764
765         if (hw_metadata->input_dev) {
766                 struct net_device *dev = hw_metadata->input_dev;
767                 int rc;
768
769                 rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
770                                                       dev->name);
771                 if (rc)
772                         goto nla_put_failure;
773         }
774
775         if (hw_metadata->fa_cookie &&
776             nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE,
777                     hw_metadata->fa_cookie->cookie_len,
778                     hw_metadata->fa_cookie->cookie))
779                 goto nla_put_failure;
780
781         if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
782                               ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
783                 goto nla_put_failure;
784
785         if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
786                 goto nla_put_failure;
787
788         if (!payload_len)
789                 goto out;
790
791         if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
792                 goto nla_put_failure;
793
794         attr = skb_put(msg, nla_total_size(payload_len));
795         attr->nla_type = NET_DM_ATTR_PAYLOAD;
796         attr->nla_len = nla_attr_size(payload_len);
797         if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
798                 goto nla_put_failure;
799
800 out:
801         genlmsg_end(msg, hdr);
802
803         return 0;
804
805 nla_put_failure:
806         genlmsg_cancel(msg, hdr);
807         return -EMSGSIZE;
808 }
809
810 static struct net_dm_hw_metadata *
811 net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
812 {
813         const struct flow_action_cookie *fa_cookie;
814         struct net_dm_hw_metadata *n_hw_metadata;
815         const char *trap_group_name;
816         const char *trap_name;
817
818         n_hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC);
819         if (!n_hw_metadata)
820                 return NULL;
821
822         trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC);
823         if (!trap_group_name)
824                 goto free_hw_metadata;
825         n_hw_metadata->trap_group_name = trap_group_name;
826
827         trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC);
828         if (!trap_name)
829                 goto free_trap_group;
830         n_hw_metadata->trap_name = trap_name;
831
832         if (hw_metadata->fa_cookie) {
833                 size_t cookie_size = sizeof(*fa_cookie) +
834                                      hw_metadata->fa_cookie->cookie_len;
835
836                 fa_cookie = kmemdup(hw_metadata->fa_cookie, cookie_size,
837                                     GFP_ATOMIC);
838                 if (!fa_cookie)
839                         goto free_trap_name;
840                 n_hw_metadata->fa_cookie = fa_cookie;
841         }
842
843         n_hw_metadata->input_dev = hw_metadata->input_dev;
844         if (n_hw_metadata->input_dev)
845                 dev_hold(n_hw_metadata->input_dev);
846
847         return n_hw_metadata;
848
849 free_trap_name:
850         kfree(trap_name);
851 free_trap_group:
852         kfree(trap_group_name);
853 free_hw_metadata:
854         kfree(n_hw_metadata);
855         return NULL;
856 }
857
858 static void
859 net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
860 {
861         if (hw_metadata->input_dev)
862                 dev_put(hw_metadata->input_dev);
863         kfree(hw_metadata->fa_cookie);
864         kfree(hw_metadata->trap_name);
865         kfree(hw_metadata->trap_group_name);
866         kfree(hw_metadata);
867 }
868
869 static void net_dm_hw_packet_report(struct sk_buff *skb)
870 {
871         struct net_dm_hw_metadata *hw_metadata;
872         struct sk_buff *msg;
873         size_t payload_len;
874         int rc;
875
876         if (skb->data > skb_mac_header(skb))
877                 skb_push(skb, skb->data - skb_mac_header(skb));
878         else
879                 skb_pull(skb, skb_mac_header(skb) - skb->data);
880
881         payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
882         if (net_dm_trunc_len)
883                 payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
884
885         hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
886         msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
887                         GFP_KERNEL);
888         if (!msg)
889                 goto out;
890
891         rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
892         if (rc) {
893                 nlmsg_free(msg);
894                 goto out;
895         }
896
897         genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
898
899 out:
900         net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
901         consume_skb(skb);
902 }
903
904 static void net_dm_hw_packet_work(struct work_struct *work)
905 {
906         struct per_cpu_dm_data *hw_data;
907         struct sk_buff_head list;
908         struct sk_buff *skb;
909         unsigned long flags;
910
911         hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
912
913         __skb_queue_head_init(&list);
914
915         spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
916         skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
917         spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
918
919         while ((skb = __skb_dequeue(&list)))
920                 net_dm_hw_packet_report(skb);
921 }
922
923 static void
924 net_dm_hw_packet_probe(struct sk_buff *skb,
925                        const struct net_dm_hw_metadata *hw_metadata)
926 {
927         struct net_dm_hw_metadata *n_hw_metadata;
928         ktime_t tstamp = ktime_get_real();
929         struct per_cpu_dm_data *hw_data;
930         struct sk_buff *nskb;
931         unsigned long flags;
932
933         if (!skb_mac_header_was_set(skb))
934                 return;
935
936         nskb = skb_clone(skb, GFP_ATOMIC);
937         if (!nskb)
938                 return;
939
940         n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
941         if (!n_hw_metadata)
942                 goto free;
943
944         NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
945         nskb->tstamp = tstamp;
946
947         hw_data = this_cpu_ptr(&dm_hw_cpu_data);
948
949         spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
950         if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
951                 __skb_queue_tail(&hw_data->drop_queue, nskb);
952         else
953                 goto unlock_free;
954         spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
955
956         schedule_work(&hw_data->dm_alert_work);
957
958         return;
959
960 unlock_free:
961         spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
962         u64_stats_update_begin(&hw_data->stats.syncp);
963         hw_data->stats.dropped++;
964         u64_stats_update_end(&hw_data->stats.syncp);
965         net_dm_hw_metadata_free(n_hw_metadata);
966 free:
967         consume_skb(nskb);
968 }
969
970 static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
971         .kfree_skb_probe        = net_dm_packet_trace_kfree_skb_hit,
972         .napi_poll_probe        = net_dm_packet_trace_napi_poll_hit,
973         .work_item_func         = net_dm_packet_work,
974         .hw_work_item_func      = net_dm_hw_packet_work,
975         .hw_probe               = net_dm_hw_packet_probe,
976 };
977
978 static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
979         [NET_DM_ALERT_MODE_SUMMARY]     = &net_dm_alert_summary_ops,
980         [NET_DM_ALERT_MODE_PACKET]      = &net_dm_alert_packet_ops,
981 };
982
983 void net_dm_hw_report(struct sk_buff *skb,
984                       const struct net_dm_hw_metadata *hw_metadata)
985 {
986         rcu_read_lock();
987
988         if (!monitor_hw)
989                 goto out;
990
991         net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
992
993 out:
994         rcu_read_unlock();
995 }
996 EXPORT_SYMBOL_GPL(net_dm_hw_report);
997
998 static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
999 {
1000         const struct net_dm_alert_ops *ops;
1001         int cpu;
1002
1003         if (monitor_hw) {
1004                 NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
1005                 return -EAGAIN;
1006         }
1007
1008         ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1009
1010         if (!try_module_get(THIS_MODULE)) {
1011                 NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
1012                 return -ENODEV;
1013         }
1014
1015         for_each_possible_cpu(cpu) {
1016                 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1017                 struct net_dm_hw_entries *hw_entries;
1018
1019                 INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
1020                 timer_setup(&hw_data->send_timer, sched_send_work, 0);
1021                 hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
1022                 kfree(hw_entries);
1023         }
1024
1025         monitor_hw = true;
1026
1027         return 0;
1028 }
1029
1030 static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
1031 {
1032         int cpu;
1033
1034         if (!monitor_hw) {
1035                 NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
1036                 return;
1037         }
1038
1039         monitor_hw = false;
1040
1041         /* After this call returns we are guaranteed that no CPU is processing
1042          * any hardware drops.
1043          */
1044         synchronize_rcu();
1045
1046         for_each_possible_cpu(cpu) {
1047                 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1048                 struct sk_buff *skb;
1049
1050                 del_timer_sync(&hw_data->send_timer);
1051                 cancel_work_sync(&hw_data->dm_alert_work);
1052                 while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
1053                         struct net_dm_hw_metadata *hw_metadata;
1054
1055                         hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
1056                         net_dm_hw_metadata_free(hw_metadata);
1057                         consume_skb(skb);
1058                 }
1059         }
1060
1061         module_put(THIS_MODULE);
1062 }
1063
1064 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
1065 {
1066         const struct net_dm_alert_ops *ops;
1067         int cpu, rc;
1068
1069         ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1070
1071         if (!try_module_get(THIS_MODULE)) {
1072                 NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
1073                 return -ENODEV;
1074         }
1075
1076         for_each_possible_cpu(cpu) {
1077                 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1078                 struct sk_buff *skb;
1079
1080                 INIT_WORK(&data->dm_alert_work, ops->work_item_func);
1081                 timer_setup(&data->send_timer, sched_send_work, 0);
1082                 /* Allocate a new per-CPU skb for the summary alert message and
1083                  * free the old one which might contain stale data from
1084                  * previous tracing.
1085                  */
1086                 skb = reset_per_cpu_data(data);
1087                 consume_skb(skb);
1088         }
1089
1090         rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1091         if (rc) {
1092                 NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
1093                 goto err_module_put;
1094         }
1095
1096         rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
1097         if (rc) {
1098                 NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
1099                 goto err_unregister_trace;
1100         }
1101
1102         return 0;
1103
1104 err_unregister_trace:
1105         unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1106 err_module_put:
1107         module_put(THIS_MODULE);
1108         return rc;
1109 }
1110
1111 static void net_dm_trace_off_set(void)
1112 {
1113         struct dm_hw_stat_delta *new_stat, *temp;
1114         const struct net_dm_alert_ops *ops;
1115         int cpu;
1116
1117         ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1118
1119         unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
1120         unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1121
1122         tracepoint_synchronize_unregister();
1123
1124         /* Make sure we do not send notifications to user space after request
1125          * to stop tracing returns.
1126          */
1127         for_each_possible_cpu(cpu) {
1128                 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1129                 struct sk_buff *skb;
1130
1131                 del_timer_sync(&data->send_timer);
1132                 cancel_work_sync(&data->dm_alert_work);
1133                 while ((skb = __skb_dequeue(&data->drop_queue)))
1134                         consume_skb(skb);
1135         }
1136
1137         list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
1138                 if (new_stat->dev == NULL) {
1139                         list_del_rcu(&new_stat->list);
1140                         kfree_rcu(new_stat, rcu);
1141                 }
1142         }
1143
1144         module_put(THIS_MODULE);
1145 }
1146
1147 static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
1148 {
1149         int rc = 0;
1150
1151         if (state == trace_state) {
1152                 NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
1153                 return -EAGAIN;
1154         }
1155
1156         switch (state) {
1157         case TRACE_ON:
1158                 rc = net_dm_trace_on_set(extack);
1159                 break;
1160         case TRACE_OFF:
1161                 net_dm_trace_off_set();
1162                 break;
1163         default:
1164                 rc = 1;
1165                 break;
1166         }
1167
1168         if (!rc)
1169                 trace_state = state;
1170         else
1171                 rc = -EINPROGRESS;
1172
1173         return rc;
1174 }
1175
1176 static bool net_dm_is_monitoring(void)
1177 {
1178         return trace_state == TRACE_ON || monitor_hw;
1179 }
1180
1181 static int net_dm_alert_mode_get_from_info(struct genl_info *info,
1182                                            enum net_dm_alert_mode *p_alert_mode)
1183 {
1184         u8 val;
1185
1186         val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
1187
1188         switch (val) {
1189         case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
1190         case NET_DM_ALERT_MODE_PACKET:
1191                 *p_alert_mode = val;
1192                 break;
1193         default:
1194                 return -EINVAL;
1195         }
1196
1197         return 0;
1198 }
1199
1200 static int net_dm_alert_mode_set(struct genl_info *info)
1201 {
1202         struct netlink_ext_ack *extack = info->extack;
1203         enum net_dm_alert_mode alert_mode;
1204         int rc;
1205
1206         if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
1207                 return 0;
1208
1209         rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
1210         if (rc) {
1211                 NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
1212                 return -EINVAL;
1213         }
1214
1215         net_dm_alert_mode = alert_mode;
1216
1217         return 0;
1218 }
1219
1220 static void net_dm_trunc_len_set(struct genl_info *info)
1221 {
1222         if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
1223                 return;
1224
1225         net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
1226 }
1227
1228 static void net_dm_queue_len_set(struct genl_info *info)
1229 {
1230         if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
1231                 return;
1232
1233         net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
1234 }
1235
1236 static int net_dm_cmd_config(struct sk_buff *skb,
1237                         struct genl_info *info)
1238 {
1239         struct netlink_ext_ack *extack = info->extack;
1240         int rc;
1241
1242         if (net_dm_is_monitoring()) {
1243                 NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
1244                 return -EBUSY;
1245         }
1246
1247         rc = net_dm_alert_mode_set(info);
1248         if (rc)
1249                 return rc;
1250
1251         net_dm_trunc_len_set(info);
1252
1253         net_dm_queue_len_set(info);
1254
1255         return 0;
1256 }
1257
1258 static int net_dm_monitor_start(bool set_sw, bool set_hw,
1259                                 struct netlink_ext_ack *extack)
1260 {
1261         bool sw_set = false;
1262         int rc;
1263
1264         if (set_sw) {
1265                 rc = set_all_monitor_traces(TRACE_ON, extack);
1266                 if (rc)
1267                         return rc;
1268                 sw_set = true;
1269         }
1270
1271         if (set_hw) {
1272                 rc = net_dm_hw_monitor_start(extack);
1273                 if (rc)
1274                         goto err_monitor_hw;
1275         }
1276
1277         return 0;
1278
1279 err_monitor_hw:
1280         if (sw_set)
1281                 set_all_monitor_traces(TRACE_OFF, extack);
1282         return rc;
1283 }
1284
1285 static void net_dm_monitor_stop(bool set_sw, bool set_hw,
1286                                 struct netlink_ext_ack *extack)
1287 {
1288         if (set_hw)
1289                 net_dm_hw_monitor_stop(extack);
1290         if (set_sw)
1291                 set_all_monitor_traces(TRACE_OFF, extack);
1292 }
1293
1294 static int net_dm_cmd_trace(struct sk_buff *skb,
1295                         struct genl_info *info)
1296 {
1297         bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
1298         bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
1299         struct netlink_ext_ack *extack = info->extack;
1300
1301         /* To maintain backward compatibility, we start / stop monitoring of
1302          * software drops if no flag is specified.
1303          */
1304         if (!set_sw && !set_hw)
1305                 set_sw = true;
1306
1307         switch (info->genlhdr->cmd) {
1308         case NET_DM_CMD_START:
1309                 return net_dm_monitor_start(set_sw, set_hw, extack);
1310         case NET_DM_CMD_STOP:
1311                 net_dm_monitor_stop(set_sw, set_hw, extack);
1312                 return 0;
1313         }
1314
1315         return -EOPNOTSUPP;
1316 }
1317
1318 static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
1319 {
1320         void *hdr;
1321
1322         hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
1323                           &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
1324         if (!hdr)
1325                 return -EMSGSIZE;
1326
1327         if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
1328                 goto nla_put_failure;
1329
1330         if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
1331                 goto nla_put_failure;
1332
1333         if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
1334                 goto nla_put_failure;
1335
1336         genlmsg_end(msg, hdr);
1337
1338         return 0;
1339
1340 nla_put_failure:
1341         genlmsg_cancel(msg, hdr);
1342         return -EMSGSIZE;
1343 }
1344
1345 static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
1346 {
1347         struct sk_buff *msg;
1348         int rc;
1349
1350         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1351         if (!msg)
1352                 return -ENOMEM;
1353
1354         rc = net_dm_config_fill(msg, info);
1355         if (rc)
1356                 goto free_msg;
1357
1358         return genlmsg_reply(msg, info);
1359
1360 free_msg:
1361         nlmsg_free(msg);
1362         return rc;
1363 }
1364
1365 static void net_dm_stats_read(struct net_dm_stats *stats)
1366 {
1367         int cpu;
1368
1369         memset(stats, 0, sizeof(*stats));
1370         for_each_possible_cpu(cpu) {
1371                 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1372                 struct net_dm_stats *cpu_stats = &data->stats;
1373                 unsigned int start;
1374                 u64 dropped;
1375
1376                 do {
1377                         start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
1378                         dropped = cpu_stats->dropped;
1379                 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
1380
1381                 stats->dropped += dropped;
1382         }
1383 }
1384
1385 static int net_dm_stats_put(struct sk_buff *msg)
1386 {
1387         struct net_dm_stats stats;
1388         struct nlattr *attr;
1389
1390         net_dm_stats_read(&stats);
1391
1392         attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
1393         if (!attr)
1394                 return -EMSGSIZE;
1395
1396         if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1397                               stats.dropped, NET_DM_ATTR_PAD))
1398                 goto nla_put_failure;
1399
1400         nla_nest_end(msg, attr);
1401
1402         return 0;
1403
1404 nla_put_failure:
1405         nla_nest_cancel(msg, attr);
1406         return -EMSGSIZE;
1407 }
1408
1409 static void net_dm_hw_stats_read(struct net_dm_stats *stats)
1410 {
1411         int cpu;
1412
1413         memset(stats, 0, sizeof(*stats));
1414         for_each_possible_cpu(cpu) {
1415                 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1416                 struct net_dm_stats *cpu_stats = &hw_data->stats;
1417                 unsigned int start;
1418                 u64 dropped;
1419
1420                 do {
1421                         start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
1422                         dropped = cpu_stats->dropped;
1423                 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
1424
1425                 stats->dropped += dropped;
1426         }
1427 }
1428
1429 static int net_dm_hw_stats_put(struct sk_buff *msg)
1430 {
1431         struct net_dm_stats stats;
1432         struct nlattr *attr;
1433
1434         net_dm_hw_stats_read(&stats);
1435
1436         attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
1437         if (!attr)
1438                 return -EMSGSIZE;
1439
1440         if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1441                               stats.dropped, NET_DM_ATTR_PAD))
1442                 goto nla_put_failure;
1443
1444         nla_nest_end(msg, attr);
1445
1446         return 0;
1447
1448 nla_put_failure:
1449         nla_nest_cancel(msg, attr);
1450         return -EMSGSIZE;
1451 }
1452
1453 static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
1454 {
1455         void *hdr;
1456         int rc;
1457
1458         hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
1459                           &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
1460         if (!hdr)
1461                 return -EMSGSIZE;
1462
1463         rc = net_dm_stats_put(msg);
1464         if (rc)
1465                 goto nla_put_failure;
1466
1467         rc = net_dm_hw_stats_put(msg);
1468         if (rc)
1469                 goto nla_put_failure;
1470
1471         genlmsg_end(msg, hdr);
1472
1473         return 0;
1474
1475 nla_put_failure:
1476         genlmsg_cancel(msg, hdr);
1477         return -EMSGSIZE;
1478 }
1479
1480 static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
1481 {
1482         struct sk_buff *msg;
1483         int rc;
1484
1485         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1486         if (!msg)
1487                 return -ENOMEM;
1488
1489         rc = net_dm_stats_fill(msg, info);
1490         if (rc)
1491                 goto free_msg;
1492
1493         return genlmsg_reply(msg, info);
1494
1495 free_msg:
1496         nlmsg_free(msg);
1497         return rc;
1498 }
1499
1500 static int dropmon_net_event(struct notifier_block *ev_block,
1501                              unsigned long event, void *ptr)
1502 {
1503         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1504         struct dm_hw_stat_delta *new_stat = NULL;
1505         struct dm_hw_stat_delta *tmp;
1506
1507         switch (event) {
1508         case NETDEV_REGISTER:
1509                 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
1510
1511                 if (!new_stat)
1512                         goto out;
1513
1514                 new_stat->dev = dev;
1515                 new_stat->last_rx = jiffies;
1516                 mutex_lock(&net_dm_mutex);
1517                 list_add_rcu(&new_stat->list, &hw_stats_list);
1518                 mutex_unlock(&net_dm_mutex);
1519                 break;
1520         case NETDEV_UNREGISTER:
1521                 mutex_lock(&net_dm_mutex);
1522                 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
1523                         if (new_stat->dev == dev) {
1524                                 new_stat->dev = NULL;
1525                                 if (trace_state == TRACE_OFF) {
1526                                         list_del_rcu(&new_stat->list);
1527                                         kfree_rcu(new_stat, rcu);
1528                                         break;
1529                                 }
1530                         }
1531                 }
1532                 mutex_unlock(&net_dm_mutex);
1533                 break;
1534         }
1535 out:
1536         return NOTIFY_DONE;
1537 }
1538
1539 static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
1540         [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
1541         [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
1542         [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
1543         [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
1544         [NET_DM_ATTR_SW_DROPS]  = {. type = NLA_FLAG },
1545         [NET_DM_ATTR_HW_DROPS]  = {. type = NLA_FLAG },
1546 };
1547
1548 static const struct genl_ops dropmon_ops[] = {
1549         {
1550                 .cmd = NET_DM_CMD_CONFIG,
1551                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1552                 .doit = net_dm_cmd_config,
1553                 .flags = GENL_ADMIN_PERM,
1554         },
1555         {
1556                 .cmd = NET_DM_CMD_START,
1557                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1558                 .doit = net_dm_cmd_trace,
1559         },
1560         {
1561                 .cmd = NET_DM_CMD_STOP,
1562                 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1563                 .doit = net_dm_cmd_trace,
1564         },
1565         {
1566                 .cmd = NET_DM_CMD_CONFIG_GET,
1567                 .doit = net_dm_cmd_config_get,
1568         },
1569         {
1570                 .cmd = NET_DM_CMD_STATS_GET,
1571                 .doit = net_dm_cmd_stats_get,
1572         },
1573 };
1574
1575 static int net_dm_nl_pre_doit(const struct genl_ops *ops,
1576                               struct sk_buff *skb, struct genl_info *info)
1577 {
1578         mutex_lock(&net_dm_mutex);
1579
1580         return 0;
1581 }
1582
1583 static void net_dm_nl_post_doit(const struct genl_ops *ops,
1584                                 struct sk_buff *skb, struct genl_info *info)
1585 {
1586         mutex_unlock(&net_dm_mutex);
1587 }
1588
1589 static struct genl_family net_drop_monitor_family __ro_after_init = {
1590         .hdrsize        = 0,
1591         .name           = "NET_DM",
1592         .version        = 2,
1593         .maxattr        = NET_DM_ATTR_MAX,
1594         .policy         = net_dm_nl_policy,
1595         .pre_doit       = net_dm_nl_pre_doit,
1596         .post_doit      = net_dm_nl_post_doit,
1597         .module         = THIS_MODULE,
1598         .ops            = dropmon_ops,
1599         .n_ops          = ARRAY_SIZE(dropmon_ops),
1600         .mcgrps         = dropmon_mcgrps,
1601         .n_mcgrps       = ARRAY_SIZE(dropmon_mcgrps),
1602 };
1603
1604 static struct notifier_block dropmon_net_notifier = {
1605         .notifier_call = dropmon_net_event
1606 };
1607
1608 static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
1609 {
1610         spin_lock_init(&data->lock);
1611         skb_queue_head_init(&data->drop_queue);
1612         u64_stats_init(&data->stats.syncp);
1613 }
1614
1615 static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
1616 {
1617         WARN_ON(!skb_queue_empty(&data->drop_queue));
1618 }
1619
1620 static void net_dm_cpu_data_init(int cpu)
1621 {
1622         struct per_cpu_dm_data *data;
1623
1624         data = &per_cpu(dm_cpu_data, cpu);
1625         __net_dm_cpu_data_init(data);
1626 }
1627
1628 static void net_dm_cpu_data_fini(int cpu)
1629 {
1630         struct per_cpu_dm_data *data;
1631
1632         data = &per_cpu(dm_cpu_data, cpu);
1633         /* At this point, we should have exclusive access
1634          * to this struct and can free the skb inside it.
1635          */
1636         consume_skb(data->skb);
1637         __net_dm_cpu_data_fini(data);
1638 }
1639
1640 static void net_dm_hw_cpu_data_init(int cpu)
1641 {
1642         struct per_cpu_dm_data *hw_data;
1643
1644         hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1645         __net_dm_cpu_data_init(hw_data);
1646 }
1647
1648 static void net_dm_hw_cpu_data_fini(int cpu)
1649 {
1650         struct per_cpu_dm_data *hw_data;
1651
1652         hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1653         kfree(hw_data->hw_entries);
1654         __net_dm_cpu_data_fini(hw_data);
1655 }
1656
1657 static int __init init_net_drop_monitor(void)
1658 {
1659         int cpu, rc;
1660
1661         pr_info("Initializing network drop monitor service\n");
1662
1663         if (sizeof(void *) > 8) {
1664                 pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
1665                 return -ENOSPC;
1666         }
1667
1668         rc = genl_register_family(&net_drop_monitor_family);
1669         if (rc) {
1670                 pr_err("Could not create drop monitor netlink family\n");
1671                 return rc;
1672         }
1673         WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
1674
1675         rc = register_netdevice_notifier(&dropmon_net_notifier);
1676         if (rc < 0) {
1677                 pr_crit("Failed to register netdevice notifier\n");
1678                 goto out_unreg;
1679         }
1680
1681         rc = 0;
1682
1683         for_each_possible_cpu(cpu) {
1684                 net_dm_cpu_data_init(cpu);
1685                 net_dm_hw_cpu_data_init(cpu);
1686         }
1687
1688         goto out;
1689
1690 out_unreg:
1691         genl_unregister_family(&net_drop_monitor_family);
1692 out:
1693         return rc;
1694 }
1695
1696 static void exit_net_drop_monitor(void)
1697 {
1698         int cpu;
1699
1700         BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
1701
1702         /*
1703          * Because of the module_get/put we do in the trace state change path
1704          * we are guarnateed not to have any current users when we get here
1705          */
1706
1707         for_each_possible_cpu(cpu) {
1708                 net_dm_hw_cpu_data_fini(cpu);
1709                 net_dm_cpu_data_fini(cpu);
1710         }
1711
1712         BUG_ON(genl_unregister_family(&net_drop_monitor_family));
1713 }
1714
1715 module_init(init_net_drop_monitor);
1716 module_exit(exit_net_drop_monitor);
1717
1718 MODULE_LICENSE("GPL v2");
1719 MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>");
1720 MODULE_ALIAS_GENL_FAMILY("NET_DM");