Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client
[linux-2.6-microblaze.git] / net / sched / sch_teql.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
3  *
4  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5  */
6
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21
22 /*
23    How to setup it.
24    ----------------
25
26    After loading this module you will find a new device teqlN
27    and new qdisc with the same name. To join a slave to the equalizer
28    you should just set this qdisc on a device f.e.
29
30    # tc qdisc add dev eth0 root teql0
31    # tc qdisc add dev eth1 root teql0
32
33    That's all. Full PnP 8)
34
35    Applicability.
36    --------------
37
38    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39       signal and generate EOI events. If you want to equalize virtual devices
40       like tunnels, use a normal eql device.
41    2. This device puts no limitations on physical slave characteristics
42       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43       Certainly, large difference in link speeds will make the resulting
44       eqalized link unusable, because of huge packet reordering.
45       I estimate an upper useful difference as ~10 times.
46    3. If the slave requires address resolution, only protocols using
47       neighbour cache (IPv4/IPv6) will work over the equalized link.
48       Other protocols are still allowed to use the slave device directly,
49       which will not break load balancing, though native slave
50       traffic will have the highest priority.  */
51
52 struct teql_master {
53         struct Qdisc_ops qops;
54         struct net_device *dev;
55         struct Qdisc *slaves;
56         struct list_head master_list;
57         unsigned long   tx_bytes;
58         unsigned long   tx_packets;
59         unsigned long   tx_errors;
60         unsigned long   tx_dropped;
61 };
62
63 struct teql_sched_data {
64         struct Qdisc *next;
65         struct teql_master *m;
66         struct sk_buff_head q;
67 };
68
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72
73 /* "teql*" qdisc routines */
74
75 static int
76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78         struct net_device *dev = qdisc_dev(sch);
79         struct teql_sched_data *q = qdisc_priv(sch);
80
81         if (q->q.qlen < dev->tx_queue_len) {
82                 __skb_queue_tail(&q->q, skb);
83                 return NET_XMIT_SUCCESS;
84         }
85
86         return qdisc_drop(skb, sch, to_free);
87 }
88
89 static struct sk_buff *
90 teql_dequeue(struct Qdisc *sch)
91 {
92         struct teql_sched_data *dat = qdisc_priv(sch);
93         struct netdev_queue *dat_queue;
94         struct sk_buff *skb;
95         struct Qdisc *q;
96
97         skb = __skb_dequeue(&dat->q);
98         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99         q = rcu_dereference_bh(dat_queue->qdisc);
100
101         if (skb == NULL) {
102                 struct net_device *m = qdisc_dev(q);
103                 if (m) {
104                         dat->m->slaves = sch;
105                         netif_wake_queue(m);
106                 }
107         } else {
108                 qdisc_bstats_update(sch, skb);
109         }
110         sch->q.qlen = dat->q.qlen + q->q.qlen;
111         return skb;
112 }
113
114 static struct sk_buff *
115 teql_peek(struct Qdisc *sch)
116 {
117         /* teql is meant to be used as root qdisc */
118         return NULL;
119 }
120
121 static void
122 teql_reset(struct Qdisc *sch)
123 {
124         struct teql_sched_data *dat = qdisc_priv(sch);
125
126         skb_queue_purge(&dat->q);
127         sch->q.qlen = 0;
128 }
129
130 static void
131 teql_destroy(struct Qdisc *sch)
132 {
133         struct Qdisc *q, *prev;
134         struct teql_sched_data *dat = qdisc_priv(sch);
135         struct teql_master *master = dat->m;
136
137         prev = master->slaves;
138         if (prev) {
139                 do {
140                         q = NEXT_SLAVE(prev);
141                         if (q == sch) {
142                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
143                                 if (q == master->slaves) {
144                                         master->slaves = NEXT_SLAVE(q);
145                                         if (q == master->slaves) {
146                                                 struct netdev_queue *txq;
147                                                 spinlock_t *root_lock;
148
149                                                 txq = netdev_get_tx_queue(master->dev, 0);
150                                                 master->slaves = NULL;
151
152                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
153                                                 spin_lock_bh(root_lock);
154                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
155                                                 spin_unlock_bh(root_lock);
156                                         }
157                                 }
158                                 skb_queue_purge(&dat->q);
159                                 break;
160                         }
161
162                 } while ((prev = q) != master->slaves);
163         }
164 }
165
166 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
167                            struct netlink_ext_ack *extack)
168 {
169         struct net_device *dev = qdisc_dev(sch);
170         struct teql_master *m = (struct teql_master *)sch->ops;
171         struct teql_sched_data *q = qdisc_priv(sch);
172
173         if (dev->hard_header_len > m->dev->hard_header_len)
174                 return -EINVAL;
175
176         if (m->dev == dev)
177                 return -ELOOP;
178
179         q->m = m;
180
181         skb_queue_head_init(&q->q);
182
183         if (m->slaves) {
184                 if (m->dev->flags & IFF_UP) {
185                         if ((m->dev->flags & IFF_POINTOPOINT &&
186                              !(dev->flags & IFF_POINTOPOINT)) ||
187                             (m->dev->flags & IFF_BROADCAST &&
188                              !(dev->flags & IFF_BROADCAST)) ||
189                             (m->dev->flags & IFF_MULTICAST &&
190                              !(dev->flags & IFF_MULTICAST)) ||
191                             dev->mtu < m->dev->mtu)
192                                 return -EINVAL;
193                 } else {
194                         if (!(dev->flags&IFF_POINTOPOINT))
195                                 m->dev->flags &= ~IFF_POINTOPOINT;
196                         if (!(dev->flags&IFF_BROADCAST))
197                                 m->dev->flags &= ~IFF_BROADCAST;
198                         if (!(dev->flags&IFF_MULTICAST))
199                                 m->dev->flags &= ~IFF_MULTICAST;
200                         if (dev->mtu < m->dev->mtu)
201                                 m->dev->mtu = dev->mtu;
202                 }
203                 q->next = NEXT_SLAVE(m->slaves);
204                 NEXT_SLAVE(m->slaves) = sch;
205         } else {
206                 q->next = sch;
207                 m->slaves = sch;
208                 m->dev->mtu = dev->mtu;
209                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
210         }
211         return 0;
212 }
213
214
215 static int
216 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
217                struct net_device *dev, struct netdev_queue *txq,
218                struct dst_entry *dst)
219 {
220         struct neighbour *n;
221         int err = 0;
222
223         n = dst_neigh_lookup_skb(dst, skb);
224         if (!n)
225                 return -ENOENT;
226
227         if (dst->dev != dev) {
228                 struct neighbour *mn;
229
230                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
231                 neigh_release(n);
232                 if (IS_ERR(mn))
233                         return PTR_ERR(mn);
234                 n = mn;
235         }
236
237         if (neigh_event_send(n, skb_res) == 0) {
238                 int err;
239                 char haddr[MAX_ADDR_LEN];
240
241                 neigh_ha_snapshot(haddr, n, dev);
242                 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
243                                       haddr, NULL, skb->len);
244
245                 if (err < 0)
246                         err = -EINVAL;
247         } else {
248                 err = (skb_res == NULL) ? -EAGAIN : 1;
249         }
250         neigh_release(n);
251         return err;
252 }
253
254 static inline int teql_resolve(struct sk_buff *skb,
255                                struct sk_buff *skb_res,
256                                struct net_device *dev,
257                                struct netdev_queue *txq)
258 {
259         struct dst_entry *dst = skb_dst(skb);
260         int res;
261
262         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
263                 return -ENODEV;
264
265         if (!dev->header_ops || !dst)
266                 return 0;
267
268         rcu_read_lock();
269         res = __teql_resolve(skb, skb_res, dev, txq, dst);
270         rcu_read_unlock();
271
272         return res;
273 }
274
275 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277         struct teql_master *master = netdev_priv(dev);
278         struct Qdisc *start, *q;
279         int busy;
280         int nores;
281         int subq = skb_get_queue_mapping(skb);
282         struct sk_buff *skb_res = NULL;
283
284         start = master->slaves;
285
286 restart:
287         nores = 0;
288         busy = 0;
289
290         q = start;
291         if (!q)
292                 goto drop;
293
294         do {
295                 struct net_device *slave = qdisc_dev(q);
296                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
297
298                 if (slave_txq->qdisc_sleeping != q)
299                         continue;
300                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
301                     !netif_running(slave)) {
302                         busy = 1;
303                         continue;
304                 }
305
306                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
307                 case 0:
308                         if (__netif_tx_trylock(slave_txq)) {
309                                 unsigned int length = qdisc_pkt_len(skb);
310
311                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
312                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
313                                     NETDEV_TX_OK) {
314                                         __netif_tx_unlock(slave_txq);
315                                         master->slaves = NEXT_SLAVE(q);
316                                         netif_wake_queue(dev);
317                                         master->tx_packets++;
318                                         master->tx_bytes += length;
319                                         return NETDEV_TX_OK;
320                                 }
321                                 __netif_tx_unlock(slave_txq);
322                         }
323                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
324                                 busy = 1;
325                         break;
326                 case 1:
327                         master->slaves = NEXT_SLAVE(q);
328                         return NETDEV_TX_OK;
329                 default:
330                         nores = 1;
331                         break;
332                 }
333                 __skb_pull(skb, skb_network_offset(skb));
334         } while ((q = NEXT_SLAVE(q)) != start);
335
336         if (nores && skb_res == NULL) {
337                 skb_res = skb;
338                 goto restart;
339         }
340
341         if (busy) {
342                 netif_stop_queue(dev);
343                 return NETDEV_TX_BUSY;
344         }
345         master->tx_errors++;
346
347 drop:
348         master->tx_dropped++;
349         dev_kfree_skb(skb);
350         return NETDEV_TX_OK;
351 }
352
353 static int teql_master_open(struct net_device *dev)
354 {
355         struct Qdisc *q;
356         struct teql_master *m = netdev_priv(dev);
357         int mtu = 0xFFFE;
358         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
359
360         if (m->slaves == NULL)
361                 return -EUNATCH;
362
363         flags = FMASK;
364
365         q = m->slaves;
366         do {
367                 struct net_device *slave = qdisc_dev(q);
368
369                 if (slave == NULL)
370                         return -EUNATCH;
371
372                 if (slave->mtu < mtu)
373                         mtu = slave->mtu;
374                 if (slave->hard_header_len > LL_MAX_HEADER)
375                         return -EINVAL;
376
377                 /* If all the slaves are BROADCAST, master is BROADCAST
378                    If all the slaves are PtP, master is PtP
379                    Otherwise, master is NBMA.
380                  */
381                 if (!(slave->flags&IFF_POINTOPOINT))
382                         flags &= ~IFF_POINTOPOINT;
383                 if (!(slave->flags&IFF_BROADCAST))
384                         flags &= ~IFF_BROADCAST;
385                 if (!(slave->flags&IFF_MULTICAST))
386                         flags &= ~IFF_MULTICAST;
387         } while ((q = NEXT_SLAVE(q)) != m->slaves);
388
389         m->dev->mtu = mtu;
390         m->dev->flags = (m->dev->flags&~FMASK) | flags;
391         netif_start_queue(m->dev);
392         return 0;
393 }
394
395 static int teql_master_close(struct net_device *dev)
396 {
397         netif_stop_queue(dev);
398         return 0;
399 }
400
401 static void teql_master_stats64(struct net_device *dev,
402                                 struct rtnl_link_stats64 *stats)
403 {
404         struct teql_master *m = netdev_priv(dev);
405
406         stats->tx_packets       = m->tx_packets;
407         stats->tx_bytes         = m->tx_bytes;
408         stats->tx_errors        = m->tx_errors;
409         stats->tx_dropped       = m->tx_dropped;
410 }
411
412 static int teql_master_mtu(struct net_device *dev, int new_mtu)
413 {
414         struct teql_master *m = netdev_priv(dev);
415         struct Qdisc *q;
416
417         q = m->slaves;
418         if (q) {
419                 do {
420                         if (new_mtu > qdisc_dev(q)->mtu)
421                                 return -EINVAL;
422                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
423         }
424
425         dev->mtu = new_mtu;
426         return 0;
427 }
428
429 static const struct net_device_ops teql_netdev_ops = {
430         .ndo_open       = teql_master_open,
431         .ndo_stop       = teql_master_close,
432         .ndo_start_xmit = teql_master_xmit,
433         .ndo_get_stats64 = teql_master_stats64,
434         .ndo_change_mtu = teql_master_mtu,
435 };
436
437 static __init void teql_master_setup(struct net_device *dev)
438 {
439         struct teql_master *master = netdev_priv(dev);
440         struct Qdisc_ops *ops = &master->qops;
441
442         master->dev     = dev;
443         ops->priv_size  = sizeof(struct teql_sched_data);
444
445         ops->enqueue    =       teql_enqueue;
446         ops->dequeue    =       teql_dequeue;
447         ops->peek       =       teql_peek;
448         ops->init       =       teql_qdisc_init;
449         ops->reset      =       teql_reset;
450         ops->destroy    =       teql_destroy;
451         ops->owner      =       THIS_MODULE;
452
453         dev->netdev_ops =       &teql_netdev_ops;
454         dev->type               = ARPHRD_VOID;
455         dev->mtu                = 1500;
456         dev->min_mtu            = 68;
457         dev->max_mtu            = 65535;
458         dev->tx_queue_len       = 100;
459         dev->flags              = IFF_NOARP;
460         dev->hard_header_len    = LL_MAX_HEADER;
461         netif_keep_dst(dev);
462 }
463
464 static LIST_HEAD(master_dev_list);
465 static int max_equalizers = 1;
466 module_param(max_equalizers, int, 0);
467 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
468
469 static int __init teql_init(void)
470 {
471         int i;
472         int err = -ENODEV;
473
474         for (i = 0; i < max_equalizers; i++) {
475                 struct net_device *dev;
476                 struct teql_master *master;
477
478                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
479                                    NET_NAME_UNKNOWN, teql_master_setup);
480                 if (!dev) {
481                         err = -ENOMEM;
482                         break;
483                 }
484
485                 if ((err = register_netdev(dev))) {
486                         free_netdev(dev);
487                         break;
488                 }
489
490                 master = netdev_priv(dev);
491
492                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
493                 err = register_qdisc(&master->qops);
494
495                 if (err) {
496                         unregister_netdev(dev);
497                         free_netdev(dev);
498                         break;
499                 }
500
501                 list_add_tail(&master->master_list, &master_dev_list);
502         }
503         return i ? 0 : err;
504 }
505
506 static void __exit teql_exit(void)
507 {
508         struct teql_master *master, *nxt;
509
510         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
511
512                 list_del(&master->master_list);
513
514                 unregister_qdisc(&master->qops);
515                 unregister_netdev(master->dev);
516                 free_netdev(master->dev);
517         }
518 }
519
520 module_init(teql_init);
521 module_exit(teql_exit);
522
523 MODULE_LICENSE("GPL");