drm/etnaviv: potential NULL dereference
[linux-2.6-microblaze.git] / net / sched / sch_api.c
1 /*
2  * net/sched/sch_api.c  Packet scheduler API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/slab.h>
31 #include <linux/hashtable.h>
32
33 #include <net/net_namespace.h>
34 #include <net/sock.h>
35 #include <net/netlink.h>
36 #include <net/pkt_sched.h>
37 #include <net/pkt_cls.h>
38
39 /*
40
41    Short review.
42    -------------
43
44    This file consists of two interrelated parts:
45
46    1. queueing disciplines manager frontend.
47    2. traffic classes manager frontend.
48
49    Generally, queueing discipline ("qdisc") is a black box,
50    which is able to enqueue packets and to dequeue them (when
51    device is ready to send something) in order and at times
52    determined by algorithm hidden in it.
53
54    qdisc's are divided to two categories:
55    - "queues", which have no internal structure visible from outside.
56    - "schedulers", which split all the packets to "traffic classes",
57      using "packet classifiers" (look at cls_api.c)
58
59    In turn, classes may have child qdiscs (as rule, queues)
60    attached to them etc. etc. etc.
61
62    The goal of the routines in this file is to translate
63    information supplied by user in the form of handles
64    to more intelligible for kernel form, to make some sanity
65    checks and part of work, which is common to all qdiscs
66    and to provide rtnetlink notifications.
67
68    All real intelligent work is done inside qdisc modules.
69
70
71
72    Every discipline has two major routines: enqueue and dequeue.
73
74    ---dequeue
75
76    dequeue usually returns a skb to send. It is allowed to return NULL,
77    but it does not mean that queue is empty, it just means that
78    discipline does not want to send anything this time.
79    Queue is really empty if q->q.qlen == 0.
80    For complicated disciplines with multiple queues q->q is not
81    real packet queue, but however q->q.qlen must be valid.
82
83    ---enqueue
84
85    enqueue returns 0, if packet was enqueued successfully.
86    If packet (this one or another one) was dropped, it returns
87    not zero error code.
88    NET_XMIT_DROP        - this packet dropped
89      Expected action: do not backoff, but wait until queue will clear.
90    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
91      Expected action: backoff or ignore
92
93    Auxiliary routines:
94
95    ---peek
96
97    like dequeue but without removing a packet from the queue
98
99    ---reset
100
101    returns qdisc to initial state: purge all buffers, clear all
102    timers, counters (except for statistics) etc.
103
104    ---init
105
106    initializes newly created qdisc.
107
108    ---destroy
109
110    destroys resources allocated by init and during lifetime of qdisc.
111
112    ---change
113
114    changes qdisc parameters.
115  */
116
117 /* Protects list of registered TC modules. It is pure SMP lock. */
118 static DEFINE_RWLOCK(qdisc_mod_lock);
119
120
121 /************************************************
122  *      Queueing disciplines manipulation.      *
123  ************************************************/
124
125
126 /* The list of all installed queueing disciplines. */
127
128 static struct Qdisc_ops *qdisc_base;
129
130 /* Register/unregister queueing discipline */
131
132 int register_qdisc(struct Qdisc_ops *qops)
133 {
134         struct Qdisc_ops *q, **qp;
135         int rc = -EEXIST;
136
137         write_lock(&qdisc_mod_lock);
138         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139                 if (!strcmp(qops->id, q->id))
140                         goto out;
141
142         if (qops->enqueue == NULL)
143                 qops->enqueue = noop_qdisc_ops.enqueue;
144         if (qops->peek == NULL) {
145                 if (qops->dequeue == NULL)
146                         qops->peek = noop_qdisc_ops.peek;
147                 else
148                         goto out_einval;
149         }
150         if (qops->dequeue == NULL)
151                 qops->dequeue = noop_qdisc_ops.dequeue;
152
153         if (qops->cl_ops) {
154                 const struct Qdisc_class_ops *cops = qops->cl_ops;
155
156                 if (!(cops->find && cops->walk && cops->leaf))
157                         goto out_einval;
158
159                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
160                         goto out_einval;
161         }
162
163         qops->next = NULL;
164         *qp = qops;
165         rc = 0;
166 out:
167         write_unlock(&qdisc_mod_lock);
168         return rc;
169
170 out_einval:
171         rc = -EINVAL;
172         goto out;
173 }
174 EXPORT_SYMBOL(register_qdisc);
175
176 int unregister_qdisc(struct Qdisc_ops *qops)
177 {
178         struct Qdisc_ops *q, **qp;
179         int err = -ENOENT;
180
181         write_lock(&qdisc_mod_lock);
182         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
183                 if (q == qops)
184                         break;
185         if (q) {
186                 *qp = q->next;
187                 q->next = NULL;
188                 err = 0;
189         }
190         write_unlock(&qdisc_mod_lock);
191         return err;
192 }
193 EXPORT_SYMBOL(unregister_qdisc);
194
195 /* Get default qdisc if not otherwise specified */
196 void qdisc_get_default(char *name, size_t len)
197 {
198         read_lock(&qdisc_mod_lock);
199         strlcpy(name, default_qdisc_ops->id, len);
200         read_unlock(&qdisc_mod_lock);
201 }
202
203 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204 {
205         struct Qdisc_ops *q = NULL;
206
207         for (q = qdisc_base; q; q = q->next) {
208                 if (!strcmp(name, q->id)) {
209                         if (!try_module_get(q->owner))
210                                 q = NULL;
211                         break;
212                 }
213         }
214
215         return q;
216 }
217
218 /* Set new default qdisc to use */
219 int qdisc_set_default(const char *name)
220 {
221         const struct Qdisc_ops *ops;
222
223         if (!capable(CAP_NET_ADMIN))
224                 return -EPERM;
225
226         write_lock(&qdisc_mod_lock);
227         ops = qdisc_lookup_default(name);
228         if (!ops) {
229                 /* Not found, drop lock and try to load module */
230                 write_unlock(&qdisc_mod_lock);
231                 request_module("sch_%s", name);
232                 write_lock(&qdisc_mod_lock);
233
234                 ops = qdisc_lookup_default(name);
235         }
236
237         if (ops) {
238                 /* Set new default */
239                 module_put(default_qdisc_ops->owner);
240                 default_qdisc_ops = ops;
241         }
242         write_unlock(&qdisc_mod_lock);
243
244         return ops ? 0 : -ENOENT;
245 }
246
247 #ifdef CONFIG_NET_SCH_DEFAULT
248 /* Set default value from kernel config */
249 static int __init sch_default_qdisc(void)
250 {
251         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252 }
253 late_initcall(sch_default_qdisc);
254 #endif
255
256 /* We know handle. Find qdisc among all qdisc's attached to device
257  * (root qdisc, all its children, children of children etc.)
258  * Note: caller either uses rtnl or rcu_read_lock()
259  */
260
261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262 {
263         struct Qdisc *q;
264
265         if (!qdisc_dev(root))
266                 return (root->handle == handle ? root : NULL);
267
268         if (!(root->flags & TCQ_F_BUILTIN) &&
269             root->handle == handle)
270                 return root;
271
272         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
273                 if (q->handle == handle)
274                         return q;
275         }
276         return NULL;
277 }
278
279 void qdisc_hash_add(struct Qdisc *q, bool invisible)
280 {
281         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282                 ASSERT_RTNL();
283                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284                 if (invisible)
285                         q->flags |= TCQ_F_INVISIBLE;
286         }
287 }
288 EXPORT_SYMBOL(qdisc_hash_add);
289
290 void qdisc_hash_del(struct Qdisc *q)
291 {
292         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293                 ASSERT_RTNL();
294                 hash_del_rcu(&q->hash);
295         }
296 }
297 EXPORT_SYMBOL(qdisc_hash_del);
298
299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300 {
301         struct Qdisc *q;
302
303         if (!handle)
304                 return NULL;
305         q = qdisc_match_from_root(dev->qdisc, handle);
306         if (q)
307                 goto out;
308
309         if (dev_ingress_queue(dev))
310                 q = qdisc_match_from_root(
311                         dev_ingress_queue(dev)->qdisc_sleeping,
312                         handle);
313 out:
314         return q;
315 }
316
317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318 {
319         struct netdev_queue *nq;
320         struct Qdisc *q;
321
322         if (!handle)
323                 return NULL;
324         q = qdisc_match_from_root(dev->qdisc, handle);
325         if (q)
326                 goto out;
327
328         nq = dev_ingress_queue_rcu(dev);
329         if (nq)
330                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331 out:
332         return q;
333 }
334
335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336 {
337         unsigned long cl;
338         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
339
340         if (cops == NULL)
341                 return NULL;
342         cl = cops->find(p, classid);
343
344         if (cl == 0)
345                 return NULL;
346         return cops->leaf(p, cl);
347 }
348
349 /* Find queueing discipline by name */
350
351 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
352 {
353         struct Qdisc_ops *q = NULL;
354
355         if (kind) {
356                 read_lock(&qdisc_mod_lock);
357                 for (q = qdisc_base; q; q = q->next) {
358                         if (nla_strcmp(kind, q->id) == 0) {
359                                 if (!try_module_get(q->owner))
360                                         q = NULL;
361                                 break;
362                         }
363                 }
364                 read_unlock(&qdisc_mod_lock);
365         }
366         return q;
367 }
368
369 /* The linklayer setting were not transferred from iproute2, in older
370  * versions, and the rate tables lookup systems have been dropped in
371  * the kernel. To keep backward compatible with older iproute2 tc
372  * utils, we detect the linklayer setting by detecting if the rate
373  * table were modified.
374  *
375  * For linklayer ATM table entries, the rate table will be aligned to
376  * 48 bytes, thus some table entries will contain the same value.  The
377  * mpu (min packet unit) is also encoded into the old rate table, thus
378  * starting from the mpu, we find low and high table entries for
379  * mapping this cell.  If these entries contain the same value, when
380  * the rate tables have been modified for linklayer ATM.
381  *
382  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
383  * and then roundup to the next cell, calc the table entry one below,
384  * and compare.
385  */
386 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387 {
388         int low       = roundup(r->mpu, 48);
389         int high      = roundup(low+1, 48);
390         int cell_low  = low >> r->cell_log;
391         int cell_high = (high >> r->cell_log) - 1;
392
393         /* rtab is too inaccurate at rates > 100Mbit/s */
394         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395                 pr_debug("TC linklayer: Giving up ATM detection\n");
396                 return TC_LINKLAYER_ETHERNET;
397         }
398
399         if ((cell_high > cell_low) && (cell_high < 256)
400             && (rtab[cell_low] == rtab[cell_high])) {
401                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402                          cell_low, cell_high, rtab[cell_high]);
403                 return TC_LINKLAYER_ATM;
404         }
405         return TC_LINKLAYER_ETHERNET;
406 }
407
408 static struct qdisc_rate_table *qdisc_rtab_list;
409
410 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
411                                         struct nlattr *tab,
412                                         struct netlink_ext_ack *extack)
413 {
414         struct qdisc_rate_table *rtab;
415
416         if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
417             nla_len(tab) != TC_RTAB_SIZE) {
418                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419                 return NULL;
420         }
421
422         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
425                         rtab->refcnt++;
426                         return rtab;
427                 }
428         }
429
430         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431         if (rtab) {
432                 rtab->rate = *r;
433                 rtab->refcnt = 1;
434                 memcpy(rtab->data, nla_data(tab), 1024);
435                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
436                         r->linklayer = __detect_linklayer(r, rtab->data);
437                 rtab->next = qdisc_rtab_list;
438                 qdisc_rtab_list = rtab;
439         } else {
440                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441         }
442         return rtab;
443 }
444 EXPORT_SYMBOL(qdisc_get_rtab);
445
446 void qdisc_put_rtab(struct qdisc_rate_table *tab)
447 {
448         struct qdisc_rate_table *rtab, **rtabp;
449
450         if (!tab || --tab->refcnt)
451                 return;
452
453         for (rtabp = &qdisc_rtab_list;
454              (rtab = *rtabp) != NULL;
455              rtabp = &rtab->next) {
456                 if (rtab == tab) {
457                         *rtabp = rtab->next;
458                         kfree(rtab);
459                         return;
460                 }
461         }
462 }
463 EXPORT_SYMBOL(qdisc_put_rtab);
464
465 static LIST_HEAD(qdisc_stab_list);
466
467 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469         [TCA_STAB_DATA] = { .type = NLA_BINARY },
470 };
471
472 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473                                                struct netlink_ext_ack *extack)
474 {
475         struct nlattr *tb[TCA_STAB_MAX + 1];
476         struct qdisc_size_table *stab;
477         struct tc_sizespec *s;
478         unsigned int tsize = 0;
479         u16 *tab = NULL;
480         int err;
481
482         err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
483         if (err < 0)
484                 return ERR_PTR(err);
485         if (!tb[TCA_STAB_BASE]) {
486                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
487                 return ERR_PTR(-EINVAL);
488         }
489
490         s = nla_data(tb[TCA_STAB_BASE]);
491
492         if (s->tsize > 0) {
493                 if (!tb[TCA_STAB_DATA]) {
494                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
495                         return ERR_PTR(-EINVAL);
496                 }
497                 tab = nla_data(tb[TCA_STAB_DATA]);
498                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
499         }
500
501         if (tsize != s->tsize || (!tab && tsize > 0)) {
502                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
503                 return ERR_PTR(-EINVAL);
504         }
505
506         list_for_each_entry(stab, &qdisc_stab_list, list) {
507                 if (memcmp(&stab->szopts, s, sizeof(*s)))
508                         continue;
509                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
510                         continue;
511                 stab->refcnt++;
512                 return stab;
513         }
514
515         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
516         if (!stab)
517                 return ERR_PTR(-ENOMEM);
518
519         stab->refcnt = 1;
520         stab->szopts = *s;
521         if (tsize > 0)
522                 memcpy(stab->data, tab, tsize * sizeof(u16));
523
524         list_add_tail(&stab->list, &qdisc_stab_list);
525
526         return stab;
527 }
528
529 static void stab_kfree_rcu(struct rcu_head *head)
530 {
531         kfree(container_of(head, struct qdisc_size_table, rcu));
532 }
533
534 void qdisc_put_stab(struct qdisc_size_table *tab)
535 {
536         if (!tab)
537                 return;
538
539         if (--tab->refcnt == 0) {
540                 list_del(&tab->list);
541                 call_rcu(&tab->rcu, stab_kfree_rcu);
542         }
543 }
544 EXPORT_SYMBOL(qdisc_put_stab);
545
546 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
547 {
548         struct nlattr *nest;
549
550         nest = nla_nest_start(skb, TCA_STAB);
551         if (nest == NULL)
552                 goto nla_put_failure;
553         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
554                 goto nla_put_failure;
555         nla_nest_end(skb, nest);
556
557         return skb->len;
558
559 nla_put_failure:
560         return -1;
561 }
562
563 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
564                                const struct qdisc_size_table *stab)
565 {
566         int pkt_len, slot;
567
568         pkt_len = skb->len + stab->szopts.overhead;
569         if (unlikely(!stab->szopts.tsize))
570                 goto out;
571
572         slot = pkt_len + stab->szopts.cell_align;
573         if (unlikely(slot < 0))
574                 slot = 0;
575
576         slot >>= stab->szopts.cell_log;
577         if (likely(slot < stab->szopts.tsize))
578                 pkt_len = stab->data[slot];
579         else
580                 pkt_len = stab->data[stab->szopts.tsize - 1] *
581                                 (slot / stab->szopts.tsize) +
582                                 stab->data[slot % stab->szopts.tsize];
583
584         pkt_len <<= stab->szopts.size_log;
585 out:
586         if (unlikely(pkt_len < 1))
587                 pkt_len = 1;
588         qdisc_skb_cb(skb)->pkt_len = pkt_len;
589 }
590 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
591
592 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
593 {
594         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
595                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
596                         txt, qdisc->ops->id, qdisc->handle >> 16);
597                 qdisc->flags |= TCQ_F_WARN_NONWC;
598         }
599 }
600 EXPORT_SYMBOL(qdisc_warn_nonwc);
601
602 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
603 {
604         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
605                                                  timer);
606
607         rcu_read_lock();
608         __netif_schedule(qdisc_root(wd->qdisc));
609         rcu_read_unlock();
610
611         return HRTIMER_NORESTART;
612 }
613
614 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
615                                  clockid_t clockid)
616 {
617         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
618         wd->timer.function = qdisc_watchdog;
619         wd->qdisc = qdisc;
620 }
621 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
622
623 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
624 {
625         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
626 }
627 EXPORT_SYMBOL(qdisc_watchdog_init);
628
629 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
630 {
631         if (test_bit(__QDISC_STATE_DEACTIVATED,
632                      &qdisc_root_sleeping(wd->qdisc)->state))
633                 return;
634
635         if (wd->last_expires == expires)
636                 return;
637
638         wd->last_expires = expires;
639         hrtimer_start(&wd->timer,
640                       ns_to_ktime(expires),
641                       HRTIMER_MODE_ABS_PINNED);
642 }
643 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
644
645 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
646 {
647         hrtimer_cancel(&wd->timer);
648 }
649 EXPORT_SYMBOL(qdisc_watchdog_cancel);
650
651 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
652 {
653         struct hlist_head *h;
654         unsigned int i;
655
656         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
657
658         if (h != NULL) {
659                 for (i = 0; i < n; i++)
660                         INIT_HLIST_HEAD(&h[i]);
661         }
662         return h;
663 }
664
665 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
666 {
667         struct Qdisc_class_common *cl;
668         struct hlist_node *next;
669         struct hlist_head *nhash, *ohash;
670         unsigned int nsize, nmask, osize;
671         unsigned int i, h;
672
673         /* Rehash when load factor exceeds 0.75 */
674         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
675                 return;
676         nsize = clhash->hashsize * 2;
677         nmask = nsize - 1;
678         nhash = qdisc_class_hash_alloc(nsize);
679         if (nhash == NULL)
680                 return;
681
682         ohash = clhash->hash;
683         osize = clhash->hashsize;
684
685         sch_tree_lock(sch);
686         for (i = 0; i < osize; i++) {
687                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
688                         h = qdisc_class_hash(cl->classid, nmask);
689                         hlist_add_head(&cl->hnode, &nhash[h]);
690                 }
691         }
692         clhash->hash     = nhash;
693         clhash->hashsize = nsize;
694         clhash->hashmask = nmask;
695         sch_tree_unlock(sch);
696
697         kvfree(ohash);
698 }
699 EXPORT_SYMBOL(qdisc_class_hash_grow);
700
701 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
702 {
703         unsigned int size = 4;
704
705         clhash->hash = qdisc_class_hash_alloc(size);
706         if (!clhash->hash)
707                 return -ENOMEM;
708         clhash->hashsize  = size;
709         clhash->hashmask  = size - 1;
710         clhash->hashelems = 0;
711         return 0;
712 }
713 EXPORT_SYMBOL(qdisc_class_hash_init);
714
715 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
716 {
717         kvfree(clhash->hash);
718 }
719 EXPORT_SYMBOL(qdisc_class_hash_destroy);
720
721 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
722                              struct Qdisc_class_common *cl)
723 {
724         unsigned int h;
725
726         INIT_HLIST_NODE(&cl->hnode);
727         h = qdisc_class_hash(cl->classid, clhash->hashmask);
728         hlist_add_head(&cl->hnode, &clhash->hash[h]);
729         clhash->hashelems++;
730 }
731 EXPORT_SYMBOL(qdisc_class_hash_insert);
732
733 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
734                              struct Qdisc_class_common *cl)
735 {
736         hlist_del(&cl->hnode);
737         clhash->hashelems--;
738 }
739 EXPORT_SYMBOL(qdisc_class_hash_remove);
740
741 /* Allocate an unique handle from space managed by kernel
742  * Possible range is [8000-FFFF]:0000 (0x8000 values)
743  */
744 static u32 qdisc_alloc_handle(struct net_device *dev)
745 {
746         int i = 0x8000;
747         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
748
749         do {
750                 autohandle += TC_H_MAKE(0x10000U, 0);
751                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
752                         autohandle = TC_H_MAKE(0x80000000U, 0);
753                 if (!qdisc_lookup(dev, autohandle))
754                         return autohandle;
755                 cond_resched();
756         } while (--i > 0);
757
758         return 0;
759 }
760
761 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
762                                unsigned int len)
763 {
764         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
765         const struct Qdisc_class_ops *cops;
766         unsigned long cl;
767         u32 parentid;
768         bool notify;
769         int drops;
770
771         if (n == 0 && len == 0)
772                 return;
773         drops = max_t(int, n, 0);
774         rcu_read_lock();
775         while ((parentid = sch->parent)) {
776                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
777                         break;
778
779                 if (sch->flags & TCQ_F_NOPARENT)
780                         break;
781                 /* Notify parent qdisc only if child qdisc becomes empty.
782                  *
783                  * If child was empty even before update then backlog
784                  * counter is screwed and we skip notification because
785                  * parent class is already passive.
786                  *
787                  * If the original child was offloaded then it is allowed
788                  * to be seem as empty, so the parent is notified anyway.
789                  */
790                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
791                                                        !qdisc_is_offloaded);
792                 /* TODO: perform the search on a per txq basis */
793                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
794                 if (sch == NULL) {
795                         WARN_ON_ONCE(parentid != TC_H_ROOT);
796                         break;
797                 }
798                 cops = sch->ops->cl_ops;
799                 if (notify && cops->qlen_notify) {
800                         cl = cops->find(sch, parentid);
801                         cops->qlen_notify(sch, cl);
802                 }
803                 sch->q.qlen -= n;
804                 sch->qstats.backlog -= len;
805                 __qdisc_qstats_drop(sch, drops);
806         }
807         rcu_read_unlock();
808 }
809 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
810
811 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
812                               void *type_data)
813 {
814         struct net_device *dev = qdisc_dev(sch);
815         int err;
816
817         sch->flags &= ~TCQ_F_OFFLOADED;
818         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
819                 return 0;
820
821         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
822         if (err == -EOPNOTSUPP)
823                 return 0;
824
825         if (!err)
826                 sch->flags |= TCQ_F_OFFLOADED;
827
828         return err;
829 }
830 EXPORT_SYMBOL(qdisc_offload_dump_helper);
831
832 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
833                                 struct Qdisc *new, struct Qdisc *old,
834                                 enum tc_setup_type type, void *type_data,
835                                 struct netlink_ext_ack *extack)
836 {
837         bool any_qdisc_is_offloaded;
838         int err;
839
840         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
841                 return;
842
843         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
844
845         /* Don't report error if the graft is part of destroy operation. */
846         if (!err || !new || new == &noop_qdisc)
847                 return;
848
849         /* Don't report error if the parent, the old child and the new
850          * one are not offloaded.
851          */
852         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
853         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
854         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
855
856         if (any_qdisc_is_offloaded)
857                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
858 }
859 EXPORT_SYMBOL(qdisc_offload_graft_helper);
860
861 static void qdisc_offload_graft_root(struct net_device *dev,
862                                      struct Qdisc *new, struct Qdisc *old,
863                                      struct netlink_ext_ack *extack)
864 {
865         struct tc_root_qopt_offload graft_offload = {
866                 .command        = TC_ROOT_GRAFT,
867                 .handle         = new ? new->handle : 0,
868                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
869                                   (old && old->flags & TCQ_F_INGRESS),
870         };
871
872         qdisc_offload_graft_helper(dev, NULL, new, old,
873                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
874 }
875
876 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
877                          u32 portid, u32 seq, u16 flags, int event)
878 {
879         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
880         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
881         struct tcmsg *tcm;
882         struct nlmsghdr  *nlh;
883         unsigned char *b = skb_tail_pointer(skb);
884         struct gnet_dump d;
885         struct qdisc_size_table *stab;
886         u32 block_index;
887         __u32 qlen;
888
889         cond_resched();
890         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
891         if (!nlh)
892                 goto out_nlmsg_trim;
893         tcm = nlmsg_data(nlh);
894         tcm->tcm_family = AF_UNSPEC;
895         tcm->tcm__pad1 = 0;
896         tcm->tcm__pad2 = 0;
897         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
898         tcm->tcm_parent = clid;
899         tcm->tcm_handle = q->handle;
900         tcm->tcm_info = refcount_read(&q->refcnt);
901         if (nla_put_string(skb, TCA_KIND, q->ops->id))
902                 goto nla_put_failure;
903         if (q->ops->ingress_block_get) {
904                 block_index = q->ops->ingress_block_get(q);
905                 if (block_index &&
906                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
907                         goto nla_put_failure;
908         }
909         if (q->ops->egress_block_get) {
910                 block_index = q->ops->egress_block_get(q);
911                 if (block_index &&
912                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
913                         goto nla_put_failure;
914         }
915         if (q->ops->dump && q->ops->dump(q, skb) < 0)
916                 goto nla_put_failure;
917         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
918                 goto nla_put_failure;
919         qlen = qdisc_qlen_sum(q);
920
921         stab = rtnl_dereference(q->stab);
922         if (stab && qdisc_dump_stab(skb, stab) < 0)
923                 goto nla_put_failure;
924
925         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
926                                          NULL, &d, TCA_PAD) < 0)
927                 goto nla_put_failure;
928
929         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
930                 goto nla_put_failure;
931
932         if (qdisc_is_percpu_stats(q)) {
933                 cpu_bstats = q->cpu_bstats;
934                 cpu_qstats = q->cpu_qstats;
935         }
936
937         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
938                                   &d, cpu_bstats, &q->bstats) < 0 ||
939             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
940             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
941                 goto nla_put_failure;
942
943         if (gnet_stats_finish_copy(&d) < 0)
944                 goto nla_put_failure;
945
946         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
947         return skb->len;
948
949 out_nlmsg_trim:
950 nla_put_failure:
951         nlmsg_trim(skb, b);
952         return -1;
953 }
954
955 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
956 {
957         if (q->flags & TCQ_F_BUILTIN)
958                 return true;
959         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
960                 return true;
961
962         return false;
963 }
964
965 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
966                         struct nlmsghdr *n, u32 clid,
967                         struct Qdisc *old, struct Qdisc *new)
968 {
969         struct sk_buff *skb;
970         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
971
972         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
973         if (!skb)
974                 return -ENOBUFS;
975
976         if (old && !tc_qdisc_dump_ignore(old, false)) {
977                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
978                                   0, RTM_DELQDISC) < 0)
979                         goto err_out;
980         }
981         if (new && !tc_qdisc_dump_ignore(new, false)) {
982                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
983                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
984                         goto err_out;
985         }
986
987         if (skb->len)
988                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
989                                       n->nlmsg_flags & NLM_F_ECHO);
990
991 err_out:
992         kfree_skb(skb);
993         return -EINVAL;
994 }
995
996 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
997                                struct nlmsghdr *n, u32 clid,
998                                struct Qdisc *old, struct Qdisc *new)
999 {
1000         if (new || old)
1001                 qdisc_notify(net, skb, n, clid, old, new);
1002
1003         if (old)
1004                 qdisc_put(old);
1005 }
1006
1007 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1008  * to device "dev".
1009  *
1010  * When appropriate send a netlink notification using 'skb'
1011  * and "n".
1012  *
1013  * On success, destroy old qdisc.
1014  */
1015
1016 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1017                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1018                        struct Qdisc *new, struct Qdisc *old,
1019                        struct netlink_ext_ack *extack)
1020 {
1021         struct Qdisc *q = old;
1022         struct net *net = dev_net(dev);
1023
1024         if (parent == NULL) {
1025                 unsigned int i, num_q, ingress;
1026
1027                 ingress = 0;
1028                 num_q = dev->num_tx_queues;
1029                 if ((q && q->flags & TCQ_F_INGRESS) ||
1030                     (new && new->flags & TCQ_F_INGRESS)) {
1031                         num_q = 1;
1032                         ingress = 1;
1033                         if (!dev_ingress_queue(dev)) {
1034                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1035                                 return -ENOENT;
1036                         }
1037                 }
1038
1039                 if (dev->flags & IFF_UP)
1040                         dev_deactivate(dev);
1041
1042                 qdisc_offload_graft_root(dev, new, old, extack);
1043
1044                 if (new && new->ops->attach)
1045                         goto skip;
1046
1047                 for (i = 0; i < num_q; i++) {
1048                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1049
1050                         if (!ingress)
1051                                 dev_queue = netdev_get_tx_queue(dev, i);
1052
1053                         old = dev_graft_qdisc(dev_queue, new);
1054                         if (new && i > 0)
1055                                 qdisc_refcount_inc(new);
1056
1057                         if (!ingress)
1058                                 qdisc_put(old);
1059                 }
1060
1061 skip:
1062                 if (!ingress) {
1063                         notify_and_destroy(net, skb, n, classid,
1064                                            dev->qdisc, new);
1065                         if (new && !new->ops->attach)
1066                                 qdisc_refcount_inc(new);
1067                         dev->qdisc = new ? : &noop_qdisc;
1068
1069                         if (new && new->ops->attach)
1070                                 new->ops->attach(new);
1071                 } else {
1072                         notify_and_destroy(net, skb, n, classid, old, new);
1073                 }
1074
1075                 if (dev->flags & IFF_UP)
1076                         dev_activate(dev);
1077         } else {
1078                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1079                 unsigned long cl;
1080                 int err;
1081
1082                 /* Only support running class lockless if parent is lockless */
1083                 if (new && (new->flags & TCQ_F_NOLOCK) &&
1084                     parent && !(parent->flags & TCQ_F_NOLOCK))
1085                         new->flags &= ~TCQ_F_NOLOCK;
1086
1087                 if (!cops || !cops->graft)
1088                         return -EOPNOTSUPP;
1089
1090                 cl = cops->find(parent, classid);
1091                 if (!cl) {
1092                         NL_SET_ERR_MSG(extack, "Specified class not found");
1093                         return -ENOENT;
1094                 }
1095
1096                 err = cops->graft(parent, cl, new, &old, extack);
1097                 if (err)
1098                         return err;
1099                 notify_and_destroy(net, skb, n, classid, old, new);
1100         }
1101         return 0;
1102 }
1103
1104 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1105                                    struct netlink_ext_ack *extack)
1106 {
1107         u32 block_index;
1108
1109         if (tca[TCA_INGRESS_BLOCK]) {
1110                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1111
1112                 if (!block_index) {
1113                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1114                         return -EINVAL;
1115                 }
1116                 if (!sch->ops->ingress_block_set) {
1117                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1118                         return -EOPNOTSUPP;
1119                 }
1120                 sch->ops->ingress_block_set(sch, block_index);
1121         }
1122         if (tca[TCA_EGRESS_BLOCK]) {
1123                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1124
1125                 if (!block_index) {
1126                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1127                         return -EINVAL;
1128                 }
1129                 if (!sch->ops->egress_block_set) {
1130                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1131                         return -EOPNOTSUPP;
1132                 }
1133                 sch->ops->egress_block_set(sch, block_index);
1134         }
1135         return 0;
1136 }
1137
1138 /*
1139    Allocate and initialize new qdisc.
1140
1141    Parameters are passed via opt.
1142  */
1143
1144 static struct Qdisc *qdisc_create(struct net_device *dev,
1145                                   struct netdev_queue *dev_queue,
1146                                   struct Qdisc *p, u32 parent, u32 handle,
1147                                   struct nlattr **tca, int *errp,
1148                                   struct netlink_ext_ack *extack)
1149 {
1150         int err;
1151         struct nlattr *kind = tca[TCA_KIND];
1152         struct Qdisc *sch;
1153         struct Qdisc_ops *ops;
1154         struct qdisc_size_table *stab;
1155
1156         ops = qdisc_lookup_ops(kind);
1157 #ifdef CONFIG_MODULES
1158         if (ops == NULL && kind != NULL) {
1159                 char name[IFNAMSIZ];
1160                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1161                         /* We dropped the RTNL semaphore in order to
1162                          * perform the module load.  So, even if we
1163                          * succeeded in loading the module we have to
1164                          * tell the caller to replay the request.  We
1165                          * indicate this using -EAGAIN.
1166                          * We replay the request because the device may
1167                          * go away in the mean time.
1168                          */
1169                         rtnl_unlock();
1170                         request_module("sch_%s", name);
1171                         rtnl_lock();
1172                         ops = qdisc_lookup_ops(kind);
1173                         if (ops != NULL) {
1174                                 /* We will try again qdisc_lookup_ops,
1175                                  * so don't keep a reference.
1176                                  */
1177                                 module_put(ops->owner);
1178                                 err = -EAGAIN;
1179                                 goto err_out;
1180                         }
1181                 }
1182         }
1183 #endif
1184
1185         err = -ENOENT;
1186         if (!ops) {
1187                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1188                 goto err_out;
1189         }
1190
1191         sch = qdisc_alloc(dev_queue, ops, extack);
1192         if (IS_ERR(sch)) {
1193                 err = PTR_ERR(sch);
1194                 goto err_out2;
1195         }
1196
1197         sch->parent = parent;
1198
1199         if (handle == TC_H_INGRESS) {
1200                 sch->flags |= TCQ_F_INGRESS;
1201                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1202         } else {
1203                 if (handle == 0) {
1204                         handle = qdisc_alloc_handle(dev);
1205                         err = -ENOMEM;
1206                         if (handle == 0)
1207                                 goto err_out3;
1208                 }
1209                 if (!netif_is_multiqueue(dev))
1210                         sch->flags |= TCQ_F_ONETXQUEUE;
1211         }
1212
1213         sch->handle = handle;
1214
1215         /* This exist to keep backward compatible with a userspace
1216          * loophole, what allowed userspace to get IFF_NO_QUEUE
1217          * facility on older kernels by setting tx_queue_len=0 (prior
1218          * to qdisc init), and then forgot to reinit tx_queue_len
1219          * before again attaching a qdisc.
1220          */
1221         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1222                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1223                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1224         }
1225
1226         err = qdisc_block_indexes_set(sch, tca, extack);
1227         if (err)
1228                 goto err_out3;
1229
1230         if (ops->init) {
1231                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1232                 if (err != 0)
1233                         goto err_out5;
1234         }
1235
1236         if (tca[TCA_STAB]) {
1237                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1238                 if (IS_ERR(stab)) {
1239                         err = PTR_ERR(stab);
1240                         goto err_out4;
1241                 }
1242                 rcu_assign_pointer(sch->stab, stab);
1243         }
1244         if (tca[TCA_RATE]) {
1245                 seqcount_t *running;
1246
1247                 err = -EOPNOTSUPP;
1248                 if (sch->flags & TCQ_F_MQROOT) {
1249                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1250                         goto err_out4;
1251                 }
1252
1253                 if (sch->parent != TC_H_ROOT &&
1254                     !(sch->flags & TCQ_F_INGRESS) &&
1255                     (!p || !(p->flags & TCQ_F_MQROOT)))
1256                         running = qdisc_root_sleeping_running(sch);
1257                 else
1258                         running = &sch->running;
1259
1260                 err = gen_new_estimator(&sch->bstats,
1261                                         sch->cpu_bstats,
1262                                         &sch->rate_est,
1263                                         NULL,
1264                                         running,
1265                                         tca[TCA_RATE]);
1266                 if (err) {
1267                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1268                         goto err_out4;
1269                 }
1270         }
1271
1272         qdisc_hash_add(sch, false);
1273
1274         return sch;
1275
1276 err_out5:
1277         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1278         if (ops->destroy)
1279                 ops->destroy(sch);
1280 err_out3:
1281         dev_put(dev);
1282         qdisc_free(sch);
1283 err_out2:
1284         module_put(ops->owner);
1285 err_out:
1286         *errp = err;
1287         return NULL;
1288
1289 err_out4:
1290         /*
1291          * Any broken qdiscs that would require a ops->reset() here?
1292          * The qdisc was never in action so it shouldn't be necessary.
1293          */
1294         qdisc_put_stab(rtnl_dereference(sch->stab));
1295         if (ops->destroy)
1296                 ops->destroy(sch);
1297         goto err_out3;
1298 }
1299
1300 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1301                         struct netlink_ext_ack *extack)
1302 {
1303         struct qdisc_size_table *ostab, *stab = NULL;
1304         int err = 0;
1305
1306         if (tca[TCA_OPTIONS]) {
1307                 if (!sch->ops->change) {
1308                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1309                         return -EINVAL;
1310                 }
1311                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1312                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1313                         return -EOPNOTSUPP;
1314                 }
1315                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1316                 if (err)
1317                         return err;
1318         }
1319
1320         if (tca[TCA_STAB]) {
1321                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1322                 if (IS_ERR(stab))
1323                         return PTR_ERR(stab);
1324         }
1325
1326         ostab = rtnl_dereference(sch->stab);
1327         rcu_assign_pointer(sch->stab, stab);
1328         qdisc_put_stab(ostab);
1329
1330         if (tca[TCA_RATE]) {
1331                 /* NB: ignores errors from replace_estimator
1332                    because change can't be undone. */
1333                 if (sch->flags & TCQ_F_MQROOT)
1334                         goto out;
1335                 gen_replace_estimator(&sch->bstats,
1336                                       sch->cpu_bstats,
1337                                       &sch->rate_est,
1338                                       NULL,
1339                                       qdisc_root_sleeping_running(sch),
1340                                       tca[TCA_RATE]);
1341         }
1342 out:
1343         return 0;
1344 }
1345
1346 struct check_loop_arg {
1347         struct qdisc_walker     w;
1348         struct Qdisc            *p;
1349         int                     depth;
1350 };
1351
1352 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1353                          struct qdisc_walker *w);
1354
1355 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1356 {
1357         struct check_loop_arg   arg;
1358
1359         if (q->ops->cl_ops == NULL)
1360                 return 0;
1361
1362         arg.w.stop = arg.w.skip = arg.w.count = 0;
1363         arg.w.fn = check_loop_fn;
1364         arg.depth = depth;
1365         arg.p = p;
1366         q->ops->cl_ops->walk(q, &arg.w);
1367         return arg.w.stop ? -ELOOP : 0;
1368 }
1369
1370 static int
1371 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1372 {
1373         struct Qdisc *leaf;
1374         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1375         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1376
1377         leaf = cops->leaf(q, cl);
1378         if (leaf) {
1379                 if (leaf == arg->p || arg->depth > 7)
1380                         return -ELOOP;
1381                 return check_loop(leaf, arg->p, arg->depth + 1);
1382         }
1383         return 0;
1384 }
1385
1386 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1387         [TCA_KIND]              = { .type = NLA_STRING },
1388         [TCA_RATE]              = { .type = NLA_BINARY,
1389                                     .len = sizeof(struct tc_estimator) },
1390         [TCA_STAB]              = { .type = NLA_NESTED },
1391         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1392         [TCA_CHAIN]             = { .type = NLA_U32 },
1393         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1394         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1395 };
1396
1397 /*
1398  * Delete/get qdisc.
1399  */
1400
1401 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1402                         struct netlink_ext_ack *extack)
1403 {
1404         struct net *net = sock_net(skb->sk);
1405         struct tcmsg *tcm = nlmsg_data(n);
1406         struct nlattr *tca[TCA_MAX + 1];
1407         struct net_device *dev;
1408         u32 clid;
1409         struct Qdisc *q = NULL;
1410         struct Qdisc *p = NULL;
1411         int err;
1412
1413         if ((n->nlmsg_type != RTM_GETQDISC) &&
1414             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1415                 return -EPERM;
1416
1417         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1418                           extack);
1419         if (err < 0)
1420                 return err;
1421
1422         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1423         if (!dev)
1424                 return -ENODEV;
1425
1426         clid = tcm->tcm_parent;
1427         if (clid) {
1428                 if (clid != TC_H_ROOT) {
1429                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1430                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1431                                 if (!p) {
1432                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1433                                         return -ENOENT;
1434                                 }
1435                                 q = qdisc_leaf(p, clid);
1436                         } else if (dev_ingress_queue(dev)) {
1437                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1438                         }
1439                 } else {
1440                         q = dev->qdisc;
1441                 }
1442                 if (!q) {
1443                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1444                         return -ENOENT;
1445                 }
1446
1447                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1448                         NL_SET_ERR_MSG(extack, "Invalid handle");
1449                         return -EINVAL;
1450                 }
1451         } else {
1452                 q = qdisc_lookup(dev, tcm->tcm_handle);
1453                 if (!q) {
1454                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1455                         return -ENOENT;
1456                 }
1457         }
1458
1459         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1460                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1461                 return -EINVAL;
1462         }
1463
1464         if (n->nlmsg_type == RTM_DELQDISC) {
1465                 if (!clid) {
1466                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1467                         return -EINVAL;
1468                 }
1469                 if (q->handle == 0) {
1470                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1471                         return -ENOENT;
1472                 }
1473                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1474                 if (err != 0)
1475                         return err;
1476         } else {
1477                 qdisc_notify(net, skb, n, clid, NULL, q);
1478         }
1479         return 0;
1480 }
1481
1482 /*
1483  * Create/change qdisc.
1484  */
1485
1486 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1487                            struct netlink_ext_ack *extack)
1488 {
1489         struct net *net = sock_net(skb->sk);
1490         struct tcmsg *tcm;
1491         struct nlattr *tca[TCA_MAX + 1];
1492         struct net_device *dev;
1493         u32 clid;
1494         struct Qdisc *q, *p;
1495         int err;
1496
1497         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1498                 return -EPERM;
1499
1500 replay:
1501         /* Reinit, just in case something touches this. */
1502         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1503                           extack);
1504         if (err < 0)
1505                 return err;
1506
1507         tcm = nlmsg_data(n);
1508         clid = tcm->tcm_parent;
1509         q = p = NULL;
1510
1511         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1512         if (!dev)
1513                 return -ENODEV;
1514
1515
1516         if (clid) {
1517                 if (clid != TC_H_ROOT) {
1518                         if (clid != TC_H_INGRESS) {
1519                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1520                                 if (!p) {
1521                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1522                                         return -ENOENT;
1523                                 }
1524                                 q = qdisc_leaf(p, clid);
1525                         } else if (dev_ingress_queue_create(dev)) {
1526                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1527                         }
1528                 } else {
1529                         q = dev->qdisc;
1530                 }
1531
1532                 /* It may be default qdisc, ignore it */
1533                 if (q && q->handle == 0)
1534                         q = NULL;
1535
1536                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1537                         if (tcm->tcm_handle) {
1538                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1539                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1540                                         return -EEXIST;
1541                                 }
1542                                 if (TC_H_MIN(tcm->tcm_handle)) {
1543                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1544                                         return -EINVAL;
1545                                 }
1546                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1547                                 if (!q)
1548                                         goto create_n_graft;
1549                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1550                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1551                                         return -EEXIST;
1552                                 }
1553                                 if (tca[TCA_KIND] &&
1554                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1555                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1556                                         return -EINVAL;
1557                                 }
1558                                 if (q == p ||
1559                                     (p && check_loop(q, p, 0))) {
1560                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1561                                         return -ELOOP;
1562                                 }
1563                                 qdisc_refcount_inc(q);
1564                                 goto graft;
1565                         } else {
1566                                 if (!q)
1567                                         goto create_n_graft;
1568
1569                                 /* This magic test requires explanation.
1570                                  *
1571                                  *   We know, that some child q is already
1572                                  *   attached to this parent and have choice:
1573                                  *   either to change it or to create/graft new one.
1574                                  *
1575                                  *   1. We are allowed to create/graft only
1576                                  *   if CREATE and REPLACE flags are set.
1577                                  *
1578                                  *   2. If EXCL is set, requestor wanted to say,
1579                                  *   that qdisc tcm_handle is not expected
1580                                  *   to exist, so that we choose create/graft too.
1581                                  *
1582                                  *   3. The last case is when no flags are set.
1583                                  *   Alas, it is sort of hole in API, we
1584                                  *   cannot decide what to do unambiguously.
1585                                  *   For now we select create/graft, if
1586                                  *   user gave KIND, which does not match existing.
1587                                  */
1588                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1589                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1590                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1591                                      (tca[TCA_KIND] &&
1592                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1593                                         goto create_n_graft;
1594                         }
1595                 }
1596         } else {
1597                 if (!tcm->tcm_handle) {
1598                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1599                         return -EINVAL;
1600                 }
1601                 q = qdisc_lookup(dev, tcm->tcm_handle);
1602         }
1603
1604         /* Change qdisc parameters */
1605         if (!q) {
1606                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1607                 return -ENOENT;
1608         }
1609         if (n->nlmsg_flags & NLM_F_EXCL) {
1610                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1611                 return -EEXIST;
1612         }
1613         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1614                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1615                 return -EINVAL;
1616         }
1617         err = qdisc_change(q, tca, extack);
1618         if (err == 0)
1619                 qdisc_notify(net, skb, n, clid, NULL, q);
1620         return err;
1621
1622 create_n_graft:
1623         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1624                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1625                 return -ENOENT;
1626         }
1627         if (clid == TC_H_INGRESS) {
1628                 if (dev_ingress_queue(dev)) {
1629                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1630                                          tcm->tcm_parent, tcm->tcm_parent,
1631                                          tca, &err, extack);
1632                 } else {
1633                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1634                         err = -ENOENT;
1635                 }
1636         } else {
1637                 struct netdev_queue *dev_queue;
1638
1639                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1640                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1641                 else if (p)
1642                         dev_queue = p->dev_queue;
1643                 else
1644                         dev_queue = netdev_get_tx_queue(dev, 0);
1645
1646                 q = qdisc_create(dev, dev_queue, p,
1647                                  tcm->tcm_parent, tcm->tcm_handle,
1648                                  tca, &err, extack);
1649         }
1650         if (q == NULL) {
1651                 if (err == -EAGAIN)
1652                         goto replay;
1653                 return err;
1654         }
1655
1656 graft:
1657         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1658         if (err) {
1659                 if (q)
1660                         qdisc_put(q);
1661                 return err;
1662         }
1663
1664         return 0;
1665 }
1666
1667 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1668                               struct netlink_callback *cb,
1669                               int *q_idx_p, int s_q_idx, bool recur,
1670                               bool dump_invisible)
1671 {
1672         int ret = 0, q_idx = *q_idx_p;
1673         struct Qdisc *q;
1674         int b;
1675
1676         if (!root)
1677                 return 0;
1678
1679         q = root;
1680         if (q_idx < s_q_idx) {
1681                 q_idx++;
1682         } else {
1683                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1684                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1685                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1686                                   RTM_NEWQDISC) <= 0)
1687                         goto done;
1688                 q_idx++;
1689         }
1690
1691         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1692          * itself has already been dumped.
1693          *
1694          * If we've already dumped the top-level (ingress) qdisc above and the global
1695          * qdisc hashtable, we don't want to hit it again
1696          */
1697         if (!qdisc_dev(root) || !recur)
1698                 goto out;
1699
1700         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1701                 if (q_idx < s_q_idx) {
1702                         q_idx++;
1703                         continue;
1704                 }
1705                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1706                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1707                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1708                                   RTM_NEWQDISC) <= 0)
1709                         goto done;
1710                 q_idx++;
1711         }
1712
1713 out:
1714         *q_idx_p = q_idx;
1715         return ret;
1716 done:
1717         ret = -1;
1718         goto out;
1719 }
1720
1721 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1722 {
1723         struct net *net = sock_net(skb->sk);
1724         int idx, q_idx;
1725         int s_idx, s_q_idx;
1726         struct net_device *dev;
1727         const struct nlmsghdr *nlh = cb->nlh;
1728         struct nlattr *tca[TCA_MAX + 1];
1729         int err;
1730
1731         s_idx = cb->args[0];
1732         s_q_idx = q_idx = cb->args[1];
1733
1734         idx = 0;
1735         ASSERT_RTNL();
1736
1737         err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1738                           rtm_tca_policy, cb->extack);
1739         if (err < 0)
1740                 return err;
1741
1742         for_each_netdev(net, dev) {
1743                 struct netdev_queue *dev_queue;
1744
1745                 if (idx < s_idx)
1746                         goto cont;
1747                 if (idx > s_idx)
1748                         s_q_idx = 0;
1749                 q_idx = 0;
1750
1751                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1752                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1753                         goto done;
1754
1755                 dev_queue = dev_ingress_queue(dev);
1756                 if (dev_queue &&
1757                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1758                                        &q_idx, s_q_idx, false,
1759                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1760                         goto done;
1761
1762 cont:
1763                 idx++;
1764         }
1765
1766 done:
1767         cb->args[0] = idx;
1768         cb->args[1] = q_idx;
1769
1770         return skb->len;
1771 }
1772
1773
1774
1775 /************************************************
1776  *      Traffic classes manipulation.           *
1777  ************************************************/
1778
1779 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1780                           unsigned long cl,
1781                           u32 portid, u32 seq, u16 flags, int event)
1782 {
1783         struct tcmsg *tcm;
1784         struct nlmsghdr  *nlh;
1785         unsigned char *b = skb_tail_pointer(skb);
1786         struct gnet_dump d;
1787         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1788
1789         cond_resched();
1790         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1791         if (!nlh)
1792                 goto out_nlmsg_trim;
1793         tcm = nlmsg_data(nlh);
1794         tcm->tcm_family = AF_UNSPEC;
1795         tcm->tcm__pad1 = 0;
1796         tcm->tcm__pad2 = 0;
1797         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1798         tcm->tcm_parent = q->handle;
1799         tcm->tcm_handle = q->handle;
1800         tcm->tcm_info = 0;
1801         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1802                 goto nla_put_failure;
1803         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1804                 goto nla_put_failure;
1805
1806         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1807                                          NULL, &d, TCA_PAD) < 0)
1808                 goto nla_put_failure;
1809
1810         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1811                 goto nla_put_failure;
1812
1813         if (gnet_stats_finish_copy(&d) < 0)
1814                 goto nla_put_failure;
1815
1816         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1817         return skb->len;
1818
1819 out_nlmsg_trim:
1820 nla_put_failure:
1821         nlmsg_trim(skb, b);
1822         return -1;
1823 }
1824
1825 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1826                          struct nlmsghdr *n, struct Qdisc *q,
1827                          unsigned long cl, int event)
1828 {
1829         struct sk_buff *skb;
1830         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1831
1832         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1833         if (!skb)
1834                 return -ENOBUFS;
1835
1836         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1837                 kfree_skb(skb);
1838                 return -EINVAL;
1839         }
1840
1841         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1842                               n->nlmsg_flags & NLM_F_ECHO);
1843 }
1844
1845 static int tclass_del_notify(struct net *net,
1846                              const struct Qdisc_class_ops *cops,
1847                              struct sk_buff *oskb, struct nlmsghdr *n,
1848                              struct Qdisc *q, unsigned long cl)
1849 {
1850         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1851         struct sk_buff *skb;
1852         int err = 0;
1853
1854         if (!cops->delete)
1855                 return -EOPNOTSUPP;
1856
1857         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1858         if (!skb)
1859                 return -ENOBUFS;
1860
1861         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1862                            RTM_DELTCLASS) < 0) {
1863                 kfree_skb(skb);
1864                 return -EINVAL;
1865         }
1866
1867         err = cops->delete(q, cl);
1868         if (err) {
1869                 kfree_skb(skb);
1870                 return err;
1871         }
1872
1873         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1874                               n->nlmsg_flags & NLM_F_ECHO);
1875 }
1876
1877 #ifdef CONFIG_NET_CLS
1878
1879 struct tcf_bind_args {
1880         struct tcf_walker w;
1881         u32 classid;
1882         unsigned long cl;
1883 };
1884
1885 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1886 {
1887         struct tcf_bind_args *a = (void *)arg;
1888
1889         if (tp->ops->bind_class) {
1890                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1891
1892                 sch_tree_lock(q);
1893                 tp->ops->bind_class(n, a->classid, a->cl);
1894                 sch_tree_unlock(q);
1895         }
1896         return 0;
1897 }
1898
1899 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1900                            unsigned long new_cl)
1901 {
1902         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1903         struct tcf_block *block;
1904         struct tcf_chain *chain;
1905         unsigned long cl;
1906
1907         cl = cops->find(q, portid);
1908         if (!cl)
1909                 return;
1910         block = cops->tcf_block(q, cl, NULL);
1911         if (!block)
1912                 return;
1913         list_for_each_entry(chain, &block->chain_list, list) {
1914                 struct tcf_proto *tp;
1915
1916                 for (tp = rtnl_dereference(chain->filter_chain);
1917                      tp; tp = rtnl_dereference(tp->next)) {
1918                         struct tcf_bind_args arg = {};
1919
1920                         arg.w.fn = tcf_node_bind;
1921                         arg.classid = clid;
1922                         arg.cl = new_cl;
1923                         tp->ops->walk(tp, &arg.w);
1924                 }
1925         }
1926 }
1927
1928 #else
1929
1930 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1931                            unsigned long new_cl)
1932 {
1933 }
1934
1935 #endif
1936
1937 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1938                          struct netlink_ext_ack *extack)
1939 {
1940         struct net *net = sock_net(skb->sk);
1941         struct tcmsg *tcm = nlmsg_data(n);
1942         struct nlattr *tca[TCA_MAX + 1];
1943         struct net_device *dev;
1944         struct Qdisc *q = NULL;
1945         const struct Qdisc_class_ops *cops;
1946         unsigned long cl = 0;
1947         unsigned long new_cl;
1948         u32 portid;
1949         u32 clid;
1950         u32 qid;
1951         int err;
1952
1953         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1954             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1955                 return -EPERM;
1956
1957         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1958                           extack);
1959         if (err < 0)
1960                 return err;
1961
1962         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1963         if (!dev)
1964                 return -ENODEV;
1965
1966         /*
1967            parent == TC_H_UNSPEC - unspecified parent.
1968            parent == TC_H_ROOT   - class is root, which has no parent.
1969            parent == X:0         - parent is root class.
1970            parent == X:Y         - parent is a node in hierarchy.
1971            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1972
1973            handle == 0:0         - generate handle from kernel pool.
1974            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1975            handle == X:Y         - clear.
1976            handle == X:0         - root class.
1977          */
1978
1979         /* Step 1. Determine qdisc handle X:0 */
1980
1981         portid = tcm->tcm_parent;
1982         clid = tcm->tcm_handle;
1983         qid = TC_H_MAJ(clid);
1984
1985         if (portid != TC_H_ROOT) {
1986                 u32 qid1 = TC_H_MAJ(portid);
1987
1988                 if (qid && qid1) {
1989                         /* If both majors are known, they must be identical. */
1990                         if (qid != qid1)
1991                                 return -EINVAL;
1992                 } else if (qid1) {
1993                         qid = qid1;
1994                 } else if (qid == 0)
1995                         qid = dev->qdisc->handle;
1996
1997                 /* Now qid is genuine qdisc handle consistent
1998                  * both with parent and child.
1999                  *
2000                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2001                  */
2002                 if (portid)
2003                         portid = TC_H_MAKE(qid, portid);
2004         } else {
2005                 if (qid == 0)
2006                         qid = dev->qdisc->handle;
2007         }
2008
2009         /* OK. Locate qdisc */
2010         q = qdisc_lookup(dev, qid);
2011         if (!q)
2012                 return -ENOENT;
2013
2014         /* An check that it supports classes */
2015         cops = q->ops->cl_ops;
2016         if (cops == NULL)
2017                 return -EINVAL;
2018
2019         /* Now try to get class */
2020         if (clid == 0) {
2021                 if (portid == TC_H_ROOT)
2022                         clid = qid;
2023         } else
2024                 clid = TC_H_MAKE(qid, clid);
2025
2026         if (clid)
2027                 cl = cops->find(q, clid);
2028
2029         if (cl == 0) {
2030                 err = -ENOENT;
2031                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2032                     !(n->nlmsg_flags & NLM_F_CREATE))
2033                         goto out;
2034         } else {
2035                 switch (n->nlmsg_type) {
2036                 case RTM_NEWTCLASS:
2037                         err = -EEXIST;
2038                         if (n->nlmsg_flags & NLM_F_EXCL)
2039                                 goto out;
2040                         break;
2041                 case RTM_DELTCLASS:
2042                         err = tclass_del_notify(net, cops, skb, n, q, cl);
2043                         /* Unbind the class with flilters with 0 */
2044                         tc_bind_tclass(q, portid, clid, 0);
2045                         goto out;
2046                 case RTM_GETTCLASS:
2047                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2048                         goto out;
2049                 default:
2050                         err = -EINVAL;
2051                         goto out;
2052                 }
2053         }
2054
2055         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2056                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2057                 return -EOPNOTSUPP;
2058         }
2059
2060         new_cl = cl;
2061         err = -EOPNOTSUPP;
2062         if (cops->change)
2063                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2064         if (err == 0) {
2065                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2066                 /* We just create a new class, need to do reverse binding. */
2067                 if (cl != new_cl)
2068                         tc_bind_tclass(q, portid, clid, new_cl);
2069         }
2070 out:
2071         return err;
2072 }
2073
2074 struct qdisc_dump_args {
2075         struct qdisc_walker     w;
2076         struct sk_buff          *skb;
2077         struct netlink_callback *cb;
2078 };
2079
2080 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2081                             struct qdisc_walker *arg)
2082 {
2083         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2084
2085         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2086                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2087                               RTM_NEWTCLASS);
2088 }
2089
2090 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2091                                 struct tcmsg *tcm, struct netlink_callback *cb,
2092                                 int *t_p, int s_t)
2093 {
2094         struct qdisc_dump_args arg;
2095
2096         if (tc_qdisc_dump_ignore(q, false) ||
2097             *t_p < s_t || !q->ops->cl_ops ||
2098             (tcm->tcm_parent &&
2099              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2100                 (*t_p)++;
2101                 return 0;
2102         }
2103         if (*t_p > s_t)
2104                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2105         arg.w.fn = qdisc_class_dump;
2106         arg.skb = skb;
2107         arg.cb = cb;
2108         arg.w.stop  = 0;
2109         arg.w.skip = cb->args[1];
2110         arg.w.count = 0;
2111         q->ops->cl_ops->walk(q, &arg.w);
2112         cb->args[1] = arg.w.count;
2113         if (arg.w.stop)
2114                 return -1;
2115         (*t_p)++;
2116         return 0;
2117 }
2118
2119 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2120                                struct tcmsg *tcm, struct netlink_callback *cb,
2121                                int *t_p, int s_t)
2122 {
2123         struct Qdisc *q;
2124         int b;
2125
2126         if (!root)
2127                 return 0;
2128
2129         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2130                 return -1;
2131
2132         if (!qdisc_dev(root))
2133                 return 0;
2134
2135         if (tcm->tcm_parent) {
2136                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2137                 if (q && q != root &&
2138                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2139                         return -1;
2140                 return 0;
2141         }
2142         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2143                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2144                         return -1;
2145         }
2146
2147         return 0;
2148 }
2149
2150 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2151 {
2152         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2153         struct net *net = sock_net(skb->sk);
2154         struct netdev_queue *dev_queue;
2155         struct net_device *dev;
2156         int t, s_t;
2157
2158         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2159                 return 0;
2160         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2161         if (!dev)
2162                 return 0;
2163
2164         s_t = cb->args[0];
2165         t = 0;
2166
2167         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2168                 goto done;
2169
2170         dev_queue = dev_ingress_queue(dev);
2171         if (dev_queue &&
2172             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2173                                 &t, s_t) < 0)
2174                 goto done;
2175
2176 done:
2177         cb->args[0] = t;
2178
2179         dev_put(dev);
2180         return skb->len;
2181 }
2182
2183 #ifdef CONFIG_PROC_FS
2184 static int psched_show(struct seq_file *seq, void *v)
2185 {
2186         seq_printf(seq, "%08x %08x %08x %08x\n",
2187                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2188                    1000000,
2189                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2190
2191         return 0;
2192 }
2193
2194 static int __net_init psched_net_init(struct net *net)
2195 {
2196         struct proc_dir_entry *e;
2197
2198         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2199         if (e == NULL)
2200                 return -ENOMEM;
2201
2202         return 0;
2203 }
2204
2205 static void __net_exit psched_net_exit(struct net *net)
2206 {
2207         remove_proc_entry("psched", net->proc_net);
2208 }
2209 #else
2210 static int __net_init psched_net_init(struct net *net)
2211 {
2212         return 0;
2213 }
2214
2215 static void __net_exit psched_net_exit(struct net *net)
2216 {
2217 }
2218 #endif
2219
2220 static struct pernet_operations psched_net_ops = {
2221         .init = psched_net_init,
2222         .exit = psched_net_exit,
2223 };
2224
2225 static int __init pktsched_init(void)
2226 {
2227         int err;
2228
2229         err = register_pernet_subsys(&psched_net_ops);
2230         if (err) {
2231                 pr_err("pktsched_init: "
2232                        "cannot initialize per netns operations\n");
2233                 return err;
2234         }
2235
2236         register_qdisc(&pfifo_fast_ops);
2237         register_qdisc(&pfifo_qdisc_ops);
2238         register_qdisc(&bfifo_qdisc_ops);
2239         register_qdisc(&pfifo_head_drop_qdisc_ops);
2240         register_qdisc(&mq_qdisc_ops);
2241         register_qdisc(&noqueue_qdisc_ops);
2242
2243         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2244         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2245         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2246                       0);
2247         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2248         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2249         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2250                       0);
2251
2252         return 0;
2253 }
2254
2255 subsys_initcall(pktsched_init);