sched: Fix get_push_task() vs migrate_disable()
[linux-2.6-microblaze.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_packed bstats;
117         struct gnet_stats_basic_packed bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                 } leaf;
129                 struct htb_class_inner {
130                         struct htb_prio clprio[TC_HTB_NUMPRIO];
131                 } inner;
132         };
133         s64                     pq_key;
134
135         int                     prio_activity;  /* for which prios are we active */
136         enum htb_cmode          cmode;          /* current mode of the class */
137         struct rb_node          pq_node;        /* node for event queue */
138         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
139
140         unsigned int drops ____cacheline_aligned_in_smp;
141         unsigned int            overlimits;
142 };
143
144 struct htb_level {
145         struct rb_root  wait_pq;
146         struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148
149 struct htb_sched {
150         struct Qdisc_class_hash clhash;
151         int                     defcls;         /* class where unclassified flows go to */
152         int                     rate2quantum;   /* quant = rate / rate2quantum */
153
154         /* filters for qdisc itself */
155         struct tcf_proto __rcu  *filter_list;
156         struct tcf_block        *block;
157
158 #define HTB_WARN_TOOMANYEVENTS  0x1
159         unsigned int            warned; /* only one warning */
160         int                     direct_qlen;
161         struct work_struct      work;
162
163         /* non shaped skbs; let them go directly thru */
164         struct qdisc_skb_head   direct_queue;
165         u32                     direct_pkts;
166         u32                     overlimits;
167
168         struct qdisc_watchdog   watchdog;
169
170         s64                     now;    /* cached dequeue time */
171
172         /* time of nearest event per level (row) */
173         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
174
175         int                     row_mask[TC_HTB_MAXDEPTH];
176
177         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
178
179         struct Qdisc            **direct_qdiscs;
180         unsigned int            num_direct_qdiscs;
181
182         bool                    offload;
183 };
184
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188         struct htb_sched *q = qdisc_priv(sch);
189         struct Qdisc_class_common *clc;
190
191         clc = qdisc_class_find(&q->clhash, handle);
192         if (clc == NULL)
193                 return NULL;
194         return container_of(clc, struct htb_class, common);
195 }
196
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199         return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216                                       int *qerr)
217 {
218         struct htb_sched *q = qdisc_priv(sch);
219         struct htb_class *cl;
220         struct tcf_result res;
221         struct tcf_proto *tcf;
222         int result;
223
224         /* allow to select class by setting skb->priority to valid classid;
225          * note that nfmark can be used too by attaching filter fw with no
226          * rules in it
227          */
228         if (skb->priority == sch->handle)
229                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
230         cl = htb_find(skb->priority, sch);
231         if (cl) {
232                 if (cl->level == 0)
233                         return cl;
234                 /* Start with inner filter chain if a non-leaf class is selected */
235                 tcf = rcu_dereference_bh(cl->filter_list);
236         } else {
237                 tcf = rcu_dereference_bh(q->filter_list);
238         }
239
240         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241         while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243                 switch (result) {
244                 case TC_ACT_QUEUED:
245                 case TC_ACT_STOLEN:
246                 case TC_ACT_TRAP:
247                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248                         fallthrough;
249                 case TC_ACT_SHOT:
250                         return NULL;
251                 }
252 #endif
253                 cl = (void *)res.class;
254                 if (!cl) {
255                         if (res.classid == sch->handle)
256                                 return HTB_DIRECT;      /* X:0 (direct flow) */
257                         cl = htb_find(res.classid, sch);
258                         if (!cl)
259                                 break;  /* filter selected invalid classid */
260                 }
261                 if (!cl->level)
262                         return cl;      /* we hit leaf; return it */
263
264                 /* we have got inner class; apply inner filter chain */
265                 tcf = rcu_dereference_bh(cl->filter_list);
266         }
267         /* classification failed; try to use default class */
268         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269         if (!cl || cl->level)
270                 return HTB_DIRECT;      /* bad default .. this is safe bet */
271         return cl;
272 }
273
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  * @root: the root of the tree
277  * @cl: the class to add
278  * @prio: the give prio in class
279  *
280  * Routine adds class to the list (actually tree) sorted by classid.
281  * Make sure that class is not already on such list for given prio.
282  */
283 static void htb_add_to_id_tree(struct rb_root *root,
284                                struct htb_class *cl, int prio)
285 {
286         struct rb_node **p = &root->rb_node, *parent = NULL;
287
288         while (*p) {
289                 struct htb_class *c;
290                 parent = *p;
291                 c = rb_entry(parent, struct htb_class, node[prio]);
292
293                 if (cl->common.classid > c->common.classid)
294                         p = &parent->rb_right;
295                 else
296                         p = &parent->rb_left;
297         }
298         rb_link_node(&cl->node[prio], parent, p);
299         rb_insert_color(&cl->node[prio], root);
300 }
301
302 /**
303  * htb_add_to_wait_tree - adds class to the event queue with delay
304  * @q: the priority event queue
305  * @cl: the class to add
306  * @delay: delay in microseconds
307  *
308  * The class is added to priority event queue to indicate that class will
309  * change its mode in cl->pq_key microseconds. Make sure that class is not
310  * already in the queue.
311  */
312 static void htb_add_to_wait_tree(struct htb_sched *q,
313                                  struct htb_class *cl, s64 delay)
314 {
315         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
316
317         cl->pq_key = q->now + delay;
318         if (cl->pq_key == q->now)
319                 cl->pq_key++;
320
321         /* update the nearest event cache */
322         if (q->near_ev_cache[cl->level] > cl->pq_key)
323                 q->near_ev_cache[cl->level] = cl->pq_key;
324
325         while (*p) {
326                 struct htb_class *c;
327                 parent = *p;
328                 c = rb_entry(parent, struct htb_class, pq_node);
329                 if (cl->pq_key >= c->pq_key)
330                         p = &parent->rb_right;
331                 else
332                         p = &parent->rb_left;
333         }
334         rb_link_node(&cl->pq_node, parent, p);
335         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
336 }
337
338 /**
339  * htb_next_rb_node - finds next node in binary tree
340  * @n: the current node in binary tree
341  *
342  * When we are past last key we return NULL.
343  * Average complexity is 2 steps per call.
344  */
345 static inline void htb_next_rb_node(struct rb_node **n)
346 {
347         *n = rb_next(*n);
348 }
349
350 /**
351  * htb_add_class_to_row - add class to its row
352  * @q: the priority event queue
353  * @cl: the class to add
354  * @mask: the given priorities in class in bitmap
355  *
356  * The class is added to row at priorities marked in mask.
357  * It does nothing if mask == 0.
358  */
359 static inline void htb_add_class_to_row(struct htb_sched *q,
360                                         struct htb_class *cl, int mask)
361 {
362         q->row_mask[cl->level] |= mask;
363         while (mask) {
364                 int prio = ffz(~mask);
365                 mask &= ~(1 << prio);
366                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
367         }
368 }
369
370 /* If this triggers, it is a bug in this code, but it need not be fatal */
371 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
372 {
373         if (RB_EMPTY_NODE(rb)) {
374                 WARN_ON(1);
375         } else {
376                 rb_erase(rb, root);
377                 RB_CLEAR_NODE(rb);
378         }
379 }
380
381
382 /**
383  * htb_remove_class_from_row - removes class from its row
384  * @q: the priority event queue
385  * @cl: the class to add
386  * @mask: the given priorities in class in bitmap
387  *
388  * The class is removed from row at priorities marked in mask.
389  * It does nothing if mask == 0.
390  */
391 static inline void htb_remove_class_from_row(struct htb_sched *q,
392                                                  struct htb_class *cl, int mask)
393 {
394         int m = 0;
395         struct htb_level *hlevel = &q->hlevel[cl->level];
396
397         while (mask) {
398                 int prio = ffz(~mask);
399                 struct htb_prio *hprio = &hlevel->hprio[prio];
400
401                 mask &= ~(1 << prio);
402                 if (hprio->ptr == cl->node + prio)
403                         htb_next_rb_node(&hprio->ptr);
404
405                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
406                 if (!hprio->row.rb_node)
407                         m |= 1 << prio;
408         }
409         q->row_mask[cl->level] &= ~m;
410 }
411
412 /**
413  * htb_activate_prios - creates active classe's feed chain
414  * @q: the priority event queue
415  * @cl: the class to activate
416  *
417  * The class is connected to ancestors and/or appropriate rows
418  * for priorities it is participating on. cl->cmode must be new
419  * (activated) mode. It does nothing if cl->prio_activity == 0.
420  */
421 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
422 {
423         struct htb_class *p = cl->parent;
424         long m, mask = cl->prio_activity;
425
426         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
427                 m = mask;
428                 while (m) {
429                         int prio = ffz(~m);
430                         m &= ~(1 << prio);
431
432                         if (p->inner.clprio[prio].feed.rb_node)
433                                 /* parent already has its feed in use so that
434                                  * reset bit in mask as parent is already ok
435                                  */
436                                 mask &= ~(1 << prio);
437
438                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
439                 }
440                 p->prio_activity |= mask;
441                 cl = p;
442                 p = cl->parent;
443
444         }
445         if (cl->cmode == HTB_CAN_SEND && mask)
446                 htb_add_class_to_row(q, cl, mask);
447 }
448
449 /**
450  * htb_deactivate_prios - remove class from feed chain
451  * @q: the priority event queue
452  * @cl: the class to deactivate
453  *
454  * cl->cmode must represent old mode (before deactivation). It does
455  * nothing if cl->prio_activity == 0. Class is removed from all feed
456  * chains and rows.
457  */
458 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
459 {
460         struct htb_class *p = cl->parent;
461         long m, mask = cl->prio_activity;
462
463         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
464                 m = mask;
465                 mask = 0;
466                 while (m) {
467                         int prio = ffz(~m);
468                         m &= ~(1 << prio);
469
470                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
471                                 /* we are removing child which is pointed to from
472                                  * parent feed - forget the pointer but remember
473                                  * classid
474                                  */
475                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
476                                 p->inner.clprio[prio].ptr = NULL;
477                         }
478
479                         htb_safe_rb_erase(cl->node + prio,
480                                           &p->inner.clprio[prio].feed);
481
482                         if (!p->inner.clprio[prio].feed.rb_node)
483                                 mask |= 1 << prio;
484                 }
485
486                 p->prio_activity &= ~mask;
487                 cl = p;
488                 p = cl->parent;
489
490         }
491         if (cl->cmode == HTB_CAN_SEND && mask)
492                 htb_remove_class_from_row(q, cl, mask);
493 }
494
495 static inline s64 htb_lowater(const struct htb_class *cl)
496 {
497         if (htb_hysteresis)
498                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
499         else
500                 return 0;
501 }
502 static inline s64 htb_hiwater(const struct htb_class *cl)
503 {
504         if (htb_hysteresis)
505                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
506         else
507                 return 0;
508 }
509
510
511 /**
512  * htb_class_mode - computes and returns current class mode
513  * @cl: the target class
514  * @diff: diff time in microseconds
515  *
516  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
517  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
518  * from now to time when cl will change its state.
519  * Also it is worth to note that class mode doesn't change simply
520  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
521  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
522  * mode transitions per time unit. The speed gain is about 1/6.
523  */
524 static inline enum htb_cmode
525 htb_class_mode(struct htb_class *cl, s64 *diff)
526 {
527         s64 toks;
528
529         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
530                 *diff = -toks;
531                 return HTB_CANT_SEND;
532         }
533
534         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
535                 return HTB_CAN_SEND;
536
537         *diff = -toks;
538         return HTB_MAY_BORROW;
539 }
540
541 /**
542  * htb_change_class_mode - changes classe's mode
543  * @q: the priority event queue
544  * @cl: the target class
545  * @diff: diff time in microseconds
546  *
547  * This should be the only way how to change classe's mode under normal
548  * circumstances. Routine will update feed lists linkage, change mode
549  * and add class to the wait event queue if appropriate. New mode should
550  * be different from old one and cl->pq_key has to be valid if changing
551  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
552  */
553 static void
554 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
555 {
556         enum htb_cmode new_mode = htb_class_mode(cl, diff);
557
558         if (new_mode == cl->cmode)
559                 return;
560
561         if (new_mode == HTB_CANT_SEND) {
562                 cl->overlimits++;
563                 q->overlimits++;
564         }
565
566         if (cl->prio_activity) {        /* not necessary: speed optimization */
567                 if (cl->cmode != HTB_CANT_SEND)
568                         htb_deactivate_prios(q, cl);
569                 cl->cmode = new_mode;
570                 if (new_mode != HTB_CANT_SEND)
571                         htb_activate_prios(q, cl);
572         } else
573                 cl->cmode = new_mode;
574 }
575
576 /**
577  * htb_activate - inserts leaf cl into appropriate active feeds
578  * @q: the priority event queue
579  * @cl: the target class
580  *
581  * Routine learns (new) priority of leaf and activates feed chain
582  * for the prio. It can be called on already active leaf safely.
583  * It also adds leaf into droplist.
584  */
585 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
586 {
587         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
588
589         if (!cl->prio_activity) {
590                 cl->prio_activity = 1 << cl->prio;
591                 htb_activate_prios(q, cl);
592         }
593 }
594
595 /**
596  * htb_deactivate - remove leaf cl from active feeds
597  * @q: the priority event queue
598  * @cl: the target class
599  *
600  * Make sure that leaf is active. In the other words it can't be called
601  * with non-active leaf. It also removes class from the drop list.
602  */
603 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
604 {
605         WARN_ON(!cl->prio_activity);
606
607         htb_deactivate_prios(q, cl);
608         cl->prio_activity = 0;
609 }
610
611 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
612                        struct sk_buff **to_free)
613 {
614         int ret;
615         unsigned int len = qdisc_pkt_len(skb);
616         struct htb_sched *q = qdisc_priv(sch);
617         struct htb_class *cl = htb_classify(skb, sch, &ret);
618
619         if (cl == HTB_DIRECT) {
620                 /* enqueue to helper queue */
621                 if (q->direct_queue.qlen < q->direct_qlen) {
622                         __qdisc_enqueue_tail(skb, &q->direct_queue);
623                         q->direct_pkts++;
624                 } else {
625                         return qdisc_drop(skb, sch, to_free);
626                 }
627 #ifdef CONFIG_NET_CLS_ACT
628         } else if (!cl) {
629                 if (ret & __NET_XMIT_BYPASS)
630                         qdisc_qstats_drop(sch);
631                 __qdisc_drop(skb, to_free);
632                 return ret;
633 #endif
634         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
635                                         to_free)) != NET_XMIT_SUCCESS) {
636                 if (net_xmit_drop_count(ret)) {
637                         qdisc_qstats_drop(sch);
638                         cl->drops++;
639                 }
640                 return ret;
641         } else {
642                 htb_activate(q, cl);
643         }
644
645         sch->qstats.backlog += len;
646         sch->q.qlen++;
647         return NET_XMIT_SUCCESS;
648 }
649
650 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
651 {
652         s64 toks = diff + cl->tokens;
653
654         if (toks > cl->buffer)
655                 toks = cl->buffer;
656         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
657         if (toks <= -cl->mbuffer)
658                 toks = 1 - cl->mbuffer;
659
660         cl->tokens = toks;
661 }
662
663 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
664 {
665         s64 toks = diff + cl->ctokens;
666
667         if (toks > cl->cbuffer)
668                 toks = cl->cbuffer;
669         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
670         if (toks <= -cl->mbuffer)
671                 toks = 1 - cl->mbuffer;
672
673         cl->ctokens = toks;
674 }
675
676 /**
677  * htb_charge_class - charges amount "bytes" to leaf and ancestors
678  * @q: the priority event queue
679  * @cl: the class to start iterate
680  * @level: the minimum level to account
681  * @skb: the socket buffer
682  *
683  * Routine assumes that packet "bytes" long was dequeued from leaf cl
684  * borrowing from "level". It accounts bytes to ceil leaky bucket for
685  * leaf and all ancestors and to rate bucket for ancestors at levels
686  * "level" and higher. It also handles possible change of mode resulting
687  * from the update. Note that mode can also increase here (MAY_BORROW to
688  * CAN_SEND) because we can use more precise clock that event queue here.
689  * In such case we remove class from event queue first.
690  */
691 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
692                              int level, struct sk_buff *skb)
693 {
694         int bytes = qdisc_pkt_len(skb);
695         enum htb_cmode old_mode;
696         s64 diff;
697
698         while (cl) {
699                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
700                 if (cl->level >= level) {
701                         if (cl->level == level)
702                                 cl->xstats.lends++;
703                         htb_accnt_tokens(cl, bytes, diff);
704                 } else {
705                         cl->xstats.borrows++;
706                         cl->tokens += diff;     /* we moved t_c; update tokens */
707                 }
708                 htb_accnt_ctokens(cl, bytes, diff);
709                 cl->t_c = q->now;
710
711                 old_mode = cl->cmode;
712                 diff = 0;
713                 htb_change_class_mode(q, cl, &diff);
714                 if (old_mode != cl->cmode) {
715                         if (old_mode != HTB_CAN_SEND)
716                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
717                         if (cl->cmode != HTB_CAN_SEND)
718                                 htb_add_to_wait_tree(q, cl, diff);
719                 }
720
721                 /* update basic stats except for leaves which are already updated */
722                 if (cl->level)
723                         bstats_update(&cl->bstats, skb);
724
725                 cl = cl->parent;
726         }
727 }
728
729 /**
730  * htb_do_events - make mode changes to classes at the level
731  * @q: the priority event queue
732  * @level: which wait_pq in 'q->hlevel'
733  * @start: start jiffies
734  *
735  * Scans event queue for pending events and applies them. Returns time of
736  * next pending event (0 for no event in pq, q->now for too many events).
737  * Note: Applied are events whose have cl->pq_key <= q->now.
738  */
739 static s64 htb_do_events(struct htb_sched *q, const int level,
740                          unsigned long start)
741 {
742         /* don't run for longer than 2 jiffies; 2 is used instead of
743          * 1 to simplify things when jiffy is going to be incremented
744          * too soon
745          */
746         unsigned long stop_at = start + 2;
747         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
748
749         while (time_before(jiffies, stop_at)) {
750                 struct htb_class *cl;
751                 s64 diff;
752                 struct rb_node *p = rb_first(wait_pq);
753
754                 if (!p)
755                         return 0;
756
757                 cl = rb_entry(p, struct htb_class, pq_node);
758                 if (cl->pq_key > q->now)
759                         return cl->pq_key;
760
761                 htb_safe_rb_erase(p, wait_pq);
762                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
763                 htb_change_class_mode(q, cl, &diff);
764                 if (cl->cmode != HTB_CAN_SEND)
765                         htb_add_to_wait_tree(q, cl, diff);
766         }
767
768         /* too much load - let's continue after a break for scheduling */
769         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
770                 pr_warn("htb: too many events!\n");
771                 q->warned |= HTB_WARN_TOOMANYEVENTS;
772         }
773
774         return q->now;
775 }
776
777 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
778  * is no such one exists.
779  */
780 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
781                                               u32 id)
782 {
783         struct rb_node *r = NULL;
784         while (n) {
785                 struct htb_class *cl =
786                     rb_entry(n, struct htb_class, node[prio]);
787
788                 if (id > cl->common.classid) {
789                         n = n->rb_right;
790                 } else if (id < cl->common.classid) {
791                         r = n;
792                         n = n->rb_left;
793                 } else {
794                         return n;
795                 }
796         }
797         return r;
798 }
799
800 /**
801  * htb_lookup_leaf - returns next leaf class in DRR order
802  * @hprio: the current one
803  * @prio: which prio in class
804  *
805  * Find leaf where current feed pointers points to.
806  */
807 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
808 {
809         int i;
810         struct {
811                 struct rb_node *root;
812                 struct rb_node **pptr;
813                 u32 *pid;
814         } stk[TC_HTB_MAXDEPTH], *sp = stk;
815
816         BUG_ON(!hprio->row.rb_node);
817         sp->root = hprio->row.rb_node;
818         sp->pptr = &hprio->ptr;
819         sp->pid = &hprio->last_ptr_id;
820
821         for (i = 0; i < 65535; i++) {
822                 if (!*sp->pptr && *sp->pid) {
823                         /* ptr was invalidated but id is valid - try to recover
824                          * the original or next ptr
825                          */
826                         *sp->pptr =
827                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
828                 }
829                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
830                                  * can become out of date quickly
831                                  */
832                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
833                         *sp->pptr = sp->root;
834                         while ((*sp->pptr)->rb_left)
835                                 *sp->pptr = (*sp->pptr)->rb_left;
836                         if (sp > stk) {
837                                 sp--;
838                                 if (!*sp->pptr) {
839                                         WARN_ON(1);
840                                         return NULL;
841                                 }
842                                 htb_next_rb_node(sp->pptr);
843                         }
844                 } else {
845                         struct htb_class *cl;
846                         struct htb_prio *clp;
847
848                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
849                         if (!cl->level)
850                                 return cl;
851                         clp = &cl->inner.clprio[prio];
852                         (++sp)->root = clp->feed.rb_node;
853                         sp->pptr = &clp->ptr;
854                         sp->pid = &clp->last_ptr_id;
855                 }
856         }
857         WARN_ON(1);
858         return NULL;
859 }
860
861 /* dequeues packet at given priority and level; call only if
862  * you are sure that there is active class at prio/level
863  */
864 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
865                                         const int level)
866 {
867         struct sk_buff *skb = NULL;
868         struct htb_class *cl, *start;
869         struct htb_level *hlevel = &q->hlevel[level];
870         struct htb_prio *hprio = &hlevel->hprio[prio];
871
872         /* look initial class up in the row */
873         start = cl = htb_lookup_leaf(hprio, prio);
874
875         do {
876 next:
877                 if (unlikely(!cl))
878                         return NULL;
879
880                 /* class can be empty - it is unlikely but can be true if leaf
881                  * qdisc drops packets in enqueue routine or if someone used
882                  * graft operation on the leaf since last dequeue;
883                  * simply deactivate and skip such class
884                  */
885                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
886                         struct htb_class *next;
887                         htb_deactivate(q, cl);
888
889                         /* row/level might become empty */
890                         if ((q->row_mask[level] & (1 << prio)) == 0)
891                                 return NULL;
892
893                         next = htb_lookup_leaf(hprio, prio);
894
895                         if (cl == start)        /* fix start if we just deleted it */
896                                 start = next;
897                         cl = next;
898                         goto next;
899                 }
900
901                 skb = cl->leaf.q->dequeue(cl->leaf.q);
902                 if (likely(skb != NULL))
903                         break;
904
905                 qdisc_warn_nonwc("htb", cl->leaf.q);
906                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
907                                          &q->hlevel[0].hprio[prio].ptr);
908                 cl = htb_lookup_leaf(hprio, prio);
909
910         } while (cl != start);
911
912         if (likely(skb != NULL)) {
913                 bstats_update(&cl->bstats, skb);
914                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
915                 if (cl->leaf.deficit[level] < 0) {
916                         cl->leaf.deficit[level] += cl->quantum;
917                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
918                                                  &q->hlevel[0].hprio[prio].ptr);
919                 }
920                 /* this used to be after charge_class but this constelation
921                  * gives us slightly better performance
922                  */
923                 if (!cl->leaf.q->q.qlen)
924                         htb_deactivate(q, cl);
925                 htb_charge_class(q, cl, level, skb);
926         }
927         return skb;
928 }
929
930 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
931 {
932         struct sk_buff *skb;
933         struct htb_sched *q = qdisc_priv(sch);
934         int level;
935         s64 next_event;
936         unsigned long start_at;
937
938         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
939         skb = __qdisc_dequeue_head(&q->direct_queue);
940         if (skb != NULL) {
941 ok:
942                 qdisc_bstats_update(sch, skb);
943                 qdisc_qstats_backlog_dec(sch, skb);
944                 sch->q.qlen--;
945                 return skb;
946         }
947
948         if (!sch->q.qlen)
949                 goto fin;
950         q->now = ktime_get_ns();
951         start_at = jiffies;
952
953         next_event = q->now + 5LLU * NSEC_PER_SEC;
954
955         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
956                 /* common case optimization - skip event handler quickly */
957                 int m;
958                 s64 event = q->near_ev_cache[level];
959
960                 if (q->now >= event) {
961                         event = htb_do_events(q, level, start_at);
962                         if (!event)
963                                 event = q->now + NSEC_PER_SEC;
964                         q->near_ev_cache[level] = event;
965                 }
966
967                 if (next_event > event)
968                         next_event = event;
969
970                 m = ~q->row_mask[level];
971                 while (m != (int)(-1)) {
972                         int prio = ffz(m);
973
974                         m |= 1 << prio;
975                         skb = htb_dequeue_tree(q, prio, level);
976                         if (likely(skb != NULL))
977                                 goto ok;
978                 }
979         }
980         if (likely(next_event > q->now))
981                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
982         else
983                 schedule_work(&q->work);
984 fin:
985         return skb;
986 }
987
988 /* reset all classes */
989 /* always caled under BH & queue lock */
990 static void htb_reset(struct Qdisc *sch)
991 {
992         struct htb_sched *q = qdisc_priv(sch);
993         struct htb_class *cl;
994         unsigned int i;
995
996         for (i = 0; i < q->clhash.hashsize; i++) {
997                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
998                         if (cl->level)
999                                 memset(&cl->inner, 0, sizeof(cl->inner));
1000                         else {
1001                                 if (cl->leaf.q && !q->offload)
1002                                         qdisc_reset(cl->leaf.q);
1003                         }
1004                         cl->prio_activity = 0;
1005                         cl->cmode = HTB_CAN_SEND;
1006                 }
1007         }
1008         qdisc_watchdog_cancel(&q->watchdog);
1009         __qdisc_reset_queue(&q->direct_queue);
1010         sch->q.qlen = 0;
1011         sch->qstats.backlog = 0;
1012         memset(q->hlevel, 0, sizeof(q->hlevel));
1013         memset(q->row_mask, 0, sizeof(q->row_mask));
1014 }
1015
1016 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1017         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1018         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1019         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1020         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1022         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1023         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1024         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1025 };
1026
1027 static void htb_work_func(struct work_struct *work)
1028 {
1029         struct htb_sched *q = container_of(work, struct htb_sched, work);
1030         struct Qdisc *sch = q->watchdog.qdisc;
1031
1032         rcu_read_lock();
1033         __netif_schedule(qdisc_root(sch));
1034         rcu_read_unlock();
1035 }
1036
1037 static void htb_set_lockdep_class_child(struct Qdisc *q)
1038 {
1039         static struct lock_class_key child_key;
1040
1041         lockdep_set_class(qdisc_lock(q), &child_key);
1042 }
1043
1044 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1045 {
1046         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1047 }
1048
1049 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1050                     struct netlink_ext_ack *extack)
1051 {
1052         struct net_device *dev = qdisc_dev(sch);
1053         struct tc_htb_qopt_offload offload_opt;
1054         struct htb_sched *q = qdisc_priv(sch);
1055         struct nlattr *tb[TCA_HTB_MAX + 1];
1056         struct tc_htb_glob *gopt;
1057         unsigned int ntx;
1058         bool offload;
1059         int err;
1060
1061         qdisc_watchdog_init(&q->watchdog, sch);
1062         INIT_WORK(&q->work, htb_work_func);
1063
1064         if (!opt)
1065                 return -EINVAL;
1066
1067         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1068         if (err)
1069                 return err;
1070
1071         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1072                                           NULL);
1073         if (err < 0)
1074                 return err;
1075
1076         if (!tb[TCA_HTB_INIT])
1077                 return -EINVAL;
1078
1079         gopt = nla_data(tb[TCA_HTB_INIT]);
1080         if (gopt->version != HTB_VER >> 16)
1081                 return -EINVAL;
1082
1083         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1084
1085         if (offload) {
1086                 if (sch->parent != TC_H_ROOT)
1087                         return -EOPNOTSUPP;
1088
1089                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1090                         return -EOPNOTSUPP;
1091
1092                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1093                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1094                                            sizeof(*q->direct_qdiscs),
1095                                            GFP_KERNEL);
1096                 if (!q->direct_qdiscs)
1097                         return -ENOMEM;
1098         }
1099
1100         err = qdisc_class_hash_init(&q->clhash);
1101         if (err < 0)
1102                 goto err_free_direct_qdiscs;
1103
1104         qdisc_skb_head_init(&q->direct_queue);
1105
1106         if (tb[TCA_HTB_DIRECT_QLEN])
1107                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1108         else
1109                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1110
1111         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1112                 q->rate2quantum = 1;
1113         q->defcls = gopt->defcls;
1114
1115         if (!offload)
1116                 return 0;
1117
1118         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1119                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1120                 struct Qdisc *qdisc;
1121
1122                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1123                                           TC_H_MAKE(sch->handle, 0), extack);
1124                 if (!qdisc) {
1125                         err = -ENOMEM;
1126                         goto err_free_qdiscs;
1127                 }
1128
1129                 htb_set_lockdep_class_child(qdisc);
1130                 q->direct_qdiscs[ntx] = qdisc;
1131                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1132         }
1133
1134         sch->flags |= TCQ_F_MQROOT;
1135
1136         offload_opt = (struct tc_htb_qopt_offload) {
1137                 .command = TC_HTB_CREATE,
1138                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1139                 .classid = TC_H_MIN(q->defcls),
1140                 .extack = extack,
1141         };
1142         err = htb_offload(dev, &offload_opt);
1143         if (err)
1144                 goto err_free_qdiscs;
1145
1146         /* Defer this assignment, so that htb_destroy skips offload-related
1147          * parts (especially calling ndo_setup_tc) on errors.
1148          */
1149         q->offload = true;
1150
1151         return 0;
1152
1153 err_free_qdiscs:
1154         for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1155              ntx++)
1156                 qdisc_put(q->direct_qdiscs[ntx]);
1157
1158         qdisc_class_hash_destroy(&q->clhash);
1159         /* Prevent use-after-free and double-free when htb_destroy gets called.
1160          */
1161         q->clhash.hash = NULL;
1162         q->clhash.hashsize = 0;
1163
1164 err_free_direct_qdiscs:
1165         kfree(q->direct_qdiscs);
1166         q->direct_qdiscs = NULL;
1167         return err;
1168 }
1169
1170 static void htb_attach_offload(struct Qdisc *sch)
1171 {
1172         struct net_device *dev = qdisc_dev(sch);
1173         struct htb_sched *q = qdisc_priv(sch);
1174         unsigned int ntx;
1175
1176         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1177                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1178
1179                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1180                 qdisc_put(old);
1181                 qdisc_hash_add(qdisc, false);
1182         }
1183         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1184                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1185                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1186
1187                 qdisc_put(old);
1188         }
1189
1190         kfree(q->direct_qdiscs);
1191         q->direct_qdiscs = NULL;
1192 }
1193
1194 static void htb_attach_software(struct Qdisc *sch)
1195 {
1196         struct net_device *dev = qdisc_dev(sch);
1197         unsigned int ntx;
1198
1199         /* Resemble qdisc_graft behavior. */
1200         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1201                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1202                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1203
1204                 qdisc_refcount_inc(sch);
1205
1206                 qdisc_put(old);
1207         }
1208 }
1209
1210 static void htb_attach(struct Qdisc *sch)
1211 {
1212         struct htb_sched *q = qdisc_priv(sch);
1213
1214         if (q->offload)
1215                 htb_attach_offload(sch);
1216         else
1217                 htb_attach_software(sch);
1218 }
1219
1220 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1221 {
1222         struct htb_sched *q = qdisc_priv(sch);
1223         struct nlattr *nest;
1224         struct tc_htb_glob gopt;
1225
1226         if (q->offload)
1227                 sch->flags |= TCQ_F_OFFLOADED;
1228         else
1229                 sch->flags &= ~TCQ_F_OFFLOADED;
1230
1231         sch->qstats.overlimits = q->overlimits;
1232         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1233          * no change can happen on the qdisc parameters.
1234          */
1235
1236         gopt.direct_pkts = q->direct_pkts;
1237         gopt.version = HTB_VER;
1238         gopt.rate2quantum = q->rate2quantum;
1239         gopt.defcls = q->defcls;
1240         gopt.debug = 0;
1241
1242         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1243         if (nest == NULL)
1244                 goto nla_put_failure;
1245         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1246             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1247                 goto nla_put_failure;
1248         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1249                 goto nla_put_failure;
1250
1251         return nla_nest_end(skb, nest);
1252
1253 nla_put_failure:
1254         nla_nest_cancel(skb, nest);
1255         return -1;
1256 }
1257
1258 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1259                           struct sk_buff *skb, struct tcmsg *tcm)
1260 {
1261         struct htb_class *cl = (struct htb_class *)arg;
1262         struct htb_sched *q = qdisc_priv(sch);
1263         struct nlattr *nest;
1264         struct tc_htb_opt opt;
1265
1266         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1267          * no change can happen on the class parameters.
1268          */
1269         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1270         tcm->tcm_handle = cl->common.classid;
1271         if (!cl->level && cl->leaf.q)
1272                 tcm->tcm_info = cl->leaf.q->handle;
1273
1274         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1275         if (nest == NULL)
1276                 goto nla_put_failure;
1277
1278         memset(&opt, 0, sizeof(opt));
1279
1280         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1281         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1282         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1283         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1284         opt.quantum = cl->quantum;
1285         opt.prio = cl->prio;
1286         opt.level = cl->level;
1287         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1288                 goto nla_put_failure;
1289         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1290                 goto nla_put_failure;
1291         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1292             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1293                               TCA_HTB_PAD))
1294                 goto nla_put_failure;
1295         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1296             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1297                               TCA_HTB_PAD))
1298                 goto nla_put_failure;
1299
1300         return nla_nest_end(skb, nest);
1301
1302 nla_put_failure:
1303         nla_nest_cancel(skb, nest);
1304         return -1;
1305 }
1306
1307 static void htb_offload_aggregate_stats(struct htb_sched *q,
1308                                         struct htb_class *cl)
1309 {
1310         struct htb_class *c;
1311         unsigned int i;
1312
1313         memset(&cl->bstats, 0, sizeof(cl->bstats));
1314
1315         for (i = 0; i < q->clhash.hashsize; i++) {
1316                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1317                         struct htb_class *p = c;
1318
1319                         while (p && p->level < cl->level)
1320                                 p = p->parent;
1321
1322                         if (p != cl)
1323                                 continue;
1324
1325                         cl->bstats.bytes += c->bstats_bias.bytes;
1326                         cl->bstats.packets += c->bstats_bias.packets;
1327                         if (c->level == 0) {
1328                                 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1329                                 cl->bstats.packets += c->leaf.q->bstats.packets;
1330                         }
1331                 }
1332         }
1333 }
1334
1335 static int
1336 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1337 {
1338         struct htb_class *cl = (struct htb_class *)arg;
1339         struct htb_sched *q = qdisc_priv(sch);
1340         struct gnet_stats_queue qs = {
1341                 .drops = cl->drops,
1342                 .overlimits = cl->overlimits,
1343         };
1344         __u32 qlen = 0;
1345
1346         if (!cl->level && cl->leaf.q)
1347                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1348
1349         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1350                                     INT_MIN, INT_MAX);
1351         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1352                                      INT_MIN, INT_MAX);
1353
1354         if (q->offload) {
1355                 if (!cl->level) {
1356                         if (cl->leaf.q)
1357                                 cl->bstats = cl->leaf.q->bstats;
1358                         else
1359                                 memset(&cl->bstats, 0, sizeof(cl->bstats));
1360                         cl->bstats.bytes += cl->bstats_bias.bytes;
1361                         cl->bstats.packets += cl->bstats_bias.packets;
1362                 } else {
1363                         htb_offload_aggregate_stats(q, cl);
1364                 }
1365         }
1366
1367         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1368                                   d, NULL, &cl->bstats) < 0 ||
1369             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1370             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1371                 return -1;
1372
1373         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1374 }
1375
1376 static struct netdev_queue *
1377 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1378 {
1379         struct net_device *dev = qdisc_dev(sch);
1380         struct tc_htb_qopt_offload offload_opt;
1381         struct htb_sched *q = qdisc_priv(sch);
1382         int err;
1383
1384         if (!q->offload)
1385                 return sch->dev_queue;
1386
1387         offload_opt = (struct tc_htb_qopt_offload) {
1388                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1389                 .classid = TC_H_MIN(tcm->tcm_parent),
1390         };
1391         err = htb_offload(dev, &offload_opt);
1392         if (err || offload_opt.qid >= dev->num_tx_queues)
1393                 return NULL;
1394         return netdev_get_tx_queue(dev, offload_opt.qid);
1395 }
1396
1397 static struct Qdisc *
1398 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1399 {
1400         struct net_device *dev = dev_queue->dev;
1401         struct Qdisc *old_q;
1402
1403         if (dev->flags & IFF_UP)
1404                 dev_deactivate(dev);
1405         old_q = dev_graft_qdisc(dev_queue, new_q);
1406         if (new_q)
1407                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1408         if (dev->flags & IFF_UP)
1409                 dev_activate(dev);
1410
1411         return old_q;
1412 }
1413
1414 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1415 {
1416         struct netdev_queue *queue_old, *queue_new;
1417         struct net_device *dev = qdisc_dev(sch);
1418         struct Qdisc *qdisc;
1419
1420         queue_old = netdev_get_tx_queue(dev, qid_old);
1421         queue_new = netdev_get_tx_queue(dev, qid_new);
1422
1423         if (dev->flags & IFF_UP)
1424                 dev_deactivate(dev);
1425         qdisc = dev_graft_qdisc(queue_old, NULL);
1426         qdisc->dev_queue = queue_new;
1427         qdisc = dev_graft_qdisc(queue_new, qdisc);
1428         if (dev->flags & IFF_UP)
1429                 dev_activate(dev);
1430
1431         WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1432 }
1433
1434 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1435                      struct Qdisc **old, struct netlink_ext_ack *extack)
1436 {
1437         struct netdev_queue *dev_queue = sch->dev_queue;
1438         struct htb_class *cl = (struct htb_class *)arg;
1439         struct htb_sched *q = qdisc_priv(sch);
1440         struct Qdisc *old_q;
1441
1442         if (cl->level)
1443                 return -EINVAL;
1444
1445         if (q->offload) {
1446                 dev_queue = new->dev_queue;
1447                 WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1448         }
1449
1450         if (!new) {
1451                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1452                                         cl->common.classid, extack);
1453                 if (!new)
1454                         return -ENOBUFS;
1455         }
1456
1457         if (q->offload) {
1458                 htb_set_lockdep_class_child(new);
1459                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1460                 qdisc_refcount_inc(new);
1461                 old_q = htb_graft_helper(dev_queue, new);
1462         }
1463
1464         *old = qdisc_replace(sch, new, &cl->leaf.q);
1465
1466         if (q->offload) {
1467                 WARN_ON(old_q != *old);
1468                 qdisc_put(old_q);
1469         }
1470
1471         return 0;
1472 }
1473
1474 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1475 {
1476         struct htb_class *cl = (struct htb_class *)arg;
1477         return !cl->level ? cl->leaf.q : NULL;
1478 }
1479
1480 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1481 {
1482         struct htb_class *cl = (struct htb_class *)arg;
1483
1484         htb_deactivate(qdisc_priv(sch), cl);
1485 }
1486
1487 static inline int htb_parent_last_child(struct htb_class *cl)
1488 {
1489         if (!cl->parent)
1490                 /* the root class */
1491                 return 0;
1492         if (cl->parent->children > 1)
1493                 /* not the last child */
1494                 return 0;
1495         return 1;
1496 }
1497
1498 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1499                                struct Qdisc *new_q)
1500 {
1501         struct htb_sched *q = qdisc_priv(sch);
1502         struct htb_class *parent = cl->parent;
1503
1504         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1505
1506         if (parent->cmode != HTB_CAN_SEND)
1507                 htb_safe_rb_erase(&parent->pq_node,
1508                                   &q->hlevel[parent->level].wait_pq);
1509
1510         parent->level = 0;
1511         memset(&parent->inner, 0, sizeof(parent->inner));
1512         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1513         parent->tokens = parent->buffer;
1514         parent->ctokens = parent->cbuffer;
1515         parent->t_c = ktime_get_ns();
1516         parent->cmode = HTB_CAN_SEND;
1517 }
1518
1519 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1520                                        struct netdev_queue *dev_queue,
1521                                        struct Qdisc *new_q)
1522 {
1523         struct Qdisc *old_q;
1524
1525         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1526         if (new_q)
1527                 qdisc_refcount_inc(new_q);
1528         old_q = htb_graft_helper(dev_queue, new_q);
1529         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1530 }
1531
1532 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1533                                      bool last_child, bool destroying,
1534                                      struct netlink_ext_ack *extack)
1535 {
1536         struct tc_htb_qopt_offload offload_opt;
1537         struct Qdisc *q = cl->leaf.q;
1538         struct Qdisc *old = NULL;
1539         int err;
1540
1541         if (cl->level)
1542                 return -EINVAL;
1543
1544         WARN_ON(!q);
1545         if (!destroying) {
1546                 /* On destroy of HTB, two cases are possible:
1547                  * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1548                  * 2. q is a noop qdisc (for nodes that were inner),
1549                  *    q->dev_queue is noop_netdev_queue.
1550                  */
1551                 old = htb_graft_helper(q->dev_queue, NULL);
1552                 WARN_ON(!old);
1553                 WARN_ON(old != q);
1554         }
1555
1556         if (cl->parent) {
1557                 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1558                 cl->parent->bstats_bias.packets += q->bstats.packets;
1559         }
1560
1561         offload_opt = (struct tc_htb_qopt_offload) {
1562                 .command = !last_child ? TC_HTB_LEAF_DEL :
1563                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1564                            TC_HTB_LEAF_DEL_LAST,
1565                 .classid = cl->common.classid,
1566                 .extack = extack,
1567         };
1568         err = htb_offload(qdisc_dev(sch), &offload_opt);
1569
1570         if (!err || destroying)
1571                 qdisc_put(old);
1572         else
1573                 htb_graft_helper(q->dev_queue, old);
1574
1575         if (last_child)
1576                 return err;
1577
1578         if (!err && offload_opt.moved_qid != 0) {
1579                 if (destroying)
1580                         q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1581                                                            offload_opt.qid);
1582                 else
1583                         htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1584                                                offload_opt.qid);
1585         }
1586
1587         return err;
1588 }
1589
1590 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1591 {
1592         if (!cl->level) {
1593                 WARN_ON(!cl->leaf.q);
1594                 qdisc_put(cl->leaf.q);
1595         }
1596         gen_kill_estimator(&cl->rate_est);
1597         tcf_block_put(cl->block);
1598         kfree(cl);
1599 }
1600
1601 static void htb_destroy(struct Qdisc *sch)
1602 {
1603         struct net_device *dev = qdisc_dev(sch);
1604         struct tc_htb_qopt_offload offload_opt;
1605         struct htb_sched *q = qdisc_priv(sch);
1606         struct hlist_node *next;
1607         bool nonempty, changed;
1608         struct htb_class *cl;
1609         unsigned int i;
1610
1611         cancel_work_sync(&q->work);
1612         qdisc_watchdog_cancel(&q->watchdog);
1613         /* This line used to be after htb_destroy_class call below
1614          * and surprisingly it worked in 2.4. But it must precede it
1615          * because filter need its target class alive to be able to call
1616          * unbind_filter on it (without Oops).
1617          */
1618         tcf_block_put(q->block);
1619
1620         for (i = 0; i < q->clhash.hashsize; i++) {
1621                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1622                         tcf_block_put(cl->block);
1623                         cl->block = NULL;
1624                 }
1625         }
1626
1627         do {
1628                 nonempty = false;
1629                 changed = false;
1630                 for (i = 0; i < q->clhash.hashsize; i++) {
1631                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1632                                                   common.hnode) {
1633                                 bool last_child;
1634
1635                                 if (!q->offload) {
1636                                         htb_destroy_class(sch, cl);
1637                                         continue;
1638                                 }
1639
1640                                 nonempty = true;
1641
1642                                 if (cl->level)
1643                                         continue;
1644
1645                                 changed = true;
1646
1647                                 last_child = htb_parent_last_child(cl);
1648                                 htb_destroy_class_offload(sch, cl, last_child,
1649                                                           true, NULL);
1650                                 qdisc_class_hash_remove(&q->clhash,
1651                                                         &cl->common);
1652                                 if (cl->parent)
1653                                         cl->parent->children--;
1654                                 if (last_child)
1655                                         htb_parent_to_leaf(sch, cl, NULL);
1656                                 htb_destroy_class(sch, cl);
1657                         }
1658                 }
1659         } while (changed);
1660         WARN_ON(nonempty);
1661
1662         qdisc_class_hash_destroy(&q->clhash);
1663         __qdisc_reset_queue(&q->direct_queue);
1664
1665         if (!q->offload)
1666                 return;
1667
1668         offload_opt = (struct tc_htb_qopt_offload) {
1669                 .command = TC_HTB_DESTROY,
1670         };
1671         htb_offload(dev, &offload_opt);
1672
1673         if (!q->direct_qdiscs)
1674                 return;
1675         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1676                 qdisc_put(q->direct_qdiscs[i]);
1677         kfree(q->direct_qdiscs);
1678 }
1679
1680 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1681                       struct netlink_ext_ack *extack)
1682 {
1683         struct htb_sched *q = qdisc_priv(sch);
1684         struct htb_class *cl = (struct htb_class *)arg;
1685         struct Qdisc *new_q = NULL;
1686         int last_child = 0;
1687         int err;
1688
1689         /* TODO: why don't allow to delete subtree ? references ? does
1690          * tc subsys guarantee us that in htb_destroy it holds no class
1691          * refs so that we can remove children safely there ?
1692          */
1693         if (cl->children || cl->filter_cnt)
1694                 return -EBUSY;
1695
1696         if (!cl->level && htb_parent_last_child(cl))
1697                 last_child = 1;
1698
1699         if (q->offload) {
1700                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1701                                                 extack);
1702                 if (err)
1703                         return err;
1704         }
1705
1706         if (last_child) {
1707                 struct netdev_queue *dev_queue;
1708
1709                 dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1710                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1711                                           cl->parent->common.classid,
1712                                           NULL);
1713                 if (q->offload) {
1714                         if (new_q)
1715                                 htb_set_lockdep_class_child(new_q);
1716                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1717                 }
1718         }
1719
1720         sch_tree_lock(sch);
1721
1722         if (!cl->level)
1723                 qdisc_purge_queue(cl->leaf.q);
1724
1725         /* delete from hash and active; remainder in destroy_class */
1726         qdisc_class_hash_remove(&q->clhash, &cl->common);
1727         if (cl->parent)
1728                 cl->parent->children--;
1729
1730         if (cl->prio_activity)
1731                 htb_deactivate(q, cl);
1732
1733         if (cl->cmode != HTB_CAN_SEND)
1734                 htb_safe_rb_erase(&cl->pq_node,
1735                                   &q->hlevel[cl->level].wait_pq);
1736
1737         if (last_child)
1738                 htb_parent_to_leaf(sch, cl, new_q);
1739
1740         sch_tree_unlock(sch);
1741
1742         htb_destroy_class(sch, cl);
1743         return 0;
1744 }
1745
1746 static int htb_change_class(struct Qdisc *sch, u32 classid,
1747                             u32 parentid, struct nlattr **tca,
1748                             unsigned long *arg, struct netlink_ext_ack *extack)
1749 {
1750         int err = -EINVAL;
1751         struct htb_sched *q = qdisc_priv(sch);
1752         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1753         struct tc_htb_qopt_offload offload_opt;
1754         struct nlattr *opt = tca[TCA_OPTIONS];
1755         struct nlattr *tb[TCA_HTB_MAX + 1];
1756         struct Qdisc *parent_qdisc = NULL;
1757         struct netdev_queue *dev_queue;
1758         struct tc_htb_opt *hopt;
1759         u64 rate64, ceil64;
1760         int warn = 0;
1761
1762         /* extract all subattrs from opt attr */
1763         if (!opt)
1764                 goto failure;
1765
1766         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1767                                           NULL);
1768         if (err < 0)
1769                 goto failure;
1770
1771         err = -EINVAL;
1772         if (tb[TCA_HTB_PARMS] == NULL)
1773                 goto failure;
1774
1775         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1776
1777         hopt = nla_data(tb[TCA_HTB_PARMS]);
1778         if (!hopt->rate.rate || !hopt->ceil.rate)
1779                 goto failure;
1780
1781         /* Keeping backward compatible with rate_table based iproute2 tc */
1782         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1783                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1784                                               NULL));
1785
1786         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1787                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1788                                               NULL));
1789
1790         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1791         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1792
1793         if (!cl) {              /* new class */
1794                 struct net_device *dev = qdisc_dev(sch);
1795                 struct Qdisc *new_q, *old_q;
1796                 int prio;
1797                 struct {
1798                         struct nlattr           nla;
1799                         struct gnet_estimator   opt;
1800                 } est = {
1801                         .nla = {
1802                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1803                                 .nla_type       = TCA_RATE,
1804                         },
1805                         .opt = {
1806                                 /* 4s interval, 16s averaging constant */
1807                                 .interval       = 2,
1808                                 .ewma_log       = 2,
1809                         },
1810                 };
1811
1812                 /* check for valid classid */
1813                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1814                     htb_find(classid, sch))
1815                         goto failure;
1816
1817                 /* check maximal depth */
1818                 if (parent && parent->parent && parent->parent->level < 2) {
1819                         pr_err("htb: tree is too deep\n");
1820                         goto failure;
1821                 }
1822                 err = -ENOBUFS;
1823                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1824                 if (!cl)
1825                         goto failure;
1826
1827                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1828                 if (err) {
1829                         kfree(cl);
1830                         goto failure;
1831                 }
1832                 if (htb_rate_est || tca[TCA_RATE]) {
1833                         err = gen_new_estimator(&cl->bstats, NULL,
1834                                                 &cl->rate_est,
1835                                                 NULL,
1836                                                 qdisc_root_sleeping_running(sch),
1837                                                 tca[TCA_RATE] ? : &est.nla);
1838                         if (err)
1839                                 goto err_block_put;
1840                 }
1841
1842                 cl->children = 0;
1843                 RB_CLEAR_NODE(&cl->pq_node);
1844
1845                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1846                         RB_CLEAR_NODE(&cl->node[prio]);
1847
1848                 cl->common.classid = classid;
1849
1850                 /* Make sure nothing interrupts us in between of two
1851                  * ndo_setup_tc calls.
1852                  */
1853                 ASSERT_RTNL();
1854
1855                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1856                  * so that can't be used inside of sch_tree_lock
1857                  * -- thanks to Karlis Peisenieks
1858                  */
1859                 if (!q->offload) {
1860                         dev_queue = sch->dev_queue;
1861                 } else if (!(parent && !parent->level)) {
1862                         /* Assign a dev_queue to this classid. */
1863                         offload_opt = (struct tc_htb_qopt_offload) {
1864                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1865                                 .classid = cl->common.classid,
1866                                 .parent_classid = parent ?
1867                                         TC_H_MIN(parent->common.classid) :
1868                                         TC_HTB_CLASSID_ROOT,
1869                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1870                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1871                                 .extack = extack,
1872                         };
1873                         err = htb_offload(dev, &offload_opt);
1874                         if (err) {
1875                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1876                                        err);
1877                                 goto err_kill_estimator;
1878                         }
1879                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1880                 } else { /* First child. */
1881                         dev_queue = parent->leaf.q->dev_queue;
1882                         old_q = htb_graft_helper(dev_queue, NULL);
1883                         WARN_ON(old_q != parent->leaf.q);
1884                         offload_opt = (struct tc_htb_qopt_offload) {
1885                                 .command = TC_HTB_LEAF_TO_INNER,
1886                                 .classid = cl->common.classid,
1887                                 .parent_classid =
1888                                         TC_H_MIN(parent->common.classid),
1889                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1890                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1891                                 .extack = extack,
1892                         };
1893                         err = htb_offload(dev, &offload_opt);
1894                         if (err) {
1895                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1896                                        err);
1897                                 htb_graft_helper(dev_queue, old_q);
1898                                 goto err_kill_estimator;
1899                         }
1900                         parent->bstats_bias.bytes += old_q->bstats.bytes;
1901                         parent->bstats_bias.packets += old_q->bstats.packets;
1902                         qdisc_put(old_q);
1903                 }
1904                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1905                                           classid, NULL);
1906                 if (q->offload) {
1907                         if (new_q) {
1908                                 htb_set_lockdep_class_child(new_q);
1909                                 /* One ref for cl->leaf.q, the other for
1910                                  * dev_queue->qdisc.
1911                                  */
1912                                 qdisc_refcount_inc(new_q);
1913                         }
1914                         old_q = htb_graft_helper(dev_queue, new_q);
1915                         /* No qdisc_put needed. */
1916                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1917                 }
1918                 sch_tree_lock(sch);
1919                 if (parent && !parent->level) {
1920                         /* turn parent into inner node */
1921                         qdisc_purge_queue(parent->leaf.q);
1922                         parent_qdisc = parent->leaf.q;
1923                         if (parent->prio_activity)
1924                                 htb_deactivate(q, parent);
1925
1926                         /* remove from evt list because of level change */
1927                         if (parent->cmode != HTB_CAN_SEND) {
1928                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1929                                 parent->cmode = HTB_CAN_SEND;
1930                         }
1931                         parent->level = (parent->parent ? parent->parent->level
1932                                          : TC_HTB_MAXDEPTH) - 1;
1933                         memset(&parent->inner, 0, sizeof(parent->inner));
1934                 }
1935
1936                 /* leaf (we) needs elementary qdisc */
1937                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1938
1939                 cl->parent = parent;
1940
1941                 /* set class to be in HTB_CAN_SEND state */
1942                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1943                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1944                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1945                 cl->t_c = ktime_get_ns();
1946                 cl->cmode = HTB_CAN_SEND;
1947
1948                 /* attach to the hash list and parent's family */
1949                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1950                 if (parent)
1951                         parent->children++;
1952                 if (cl->leaf.q != &noop_qdisc)
1953                         qdisc_hash_add(cl->leaf.q, true);
1954         } else {
1955                 if (tca[TCA_RATE]) {
1956                         err = gen_replace_estimator(&cl->bstats, NULL,
1957                                                     &cl->rate_est,
1958                                                     NULL,
1959                                                     qdisc_root_sleeping_running(sch),
1960                                                     tca[TCA_RATE]);
1961                         if (err)
1962                                 return err;
1963                 }
1964
1965                 if (q->offload) {
1966                         struct net_device *dev = qdisc_dev(sch);
1967
1968                         offload_opt = (struct tc_htb_qopt_offload) {
1969                                 .command = TC_HTB_NODE_MODIFY,
1970                                 .classid = cl->common.classid,
1971                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1972                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1973                                 .extack = extack,
1974                         };
1975                         err = htb_offload(dev, &offload_opt);
1976                         if (err)
1977                                 /* Estimator was replaced, and rollback may fail
1978                                  * as well, so we don't try to recover it, and
1979                                  * the estimator won't work property with the
1980                                  * offload anyway, because bstats are updated
1981                                  * only when the stats are queried.
1982                                  */
1983                                 return err;
1984                 }
1985
1986                 sch_tree_lock(sch);
1987         }
1988
1989         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1990         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1991
1992         /* it used to be a nasty bug here, we have to check that node
1993          * is really leaf before changing cl->leaf !
1994          */
1995         if (!cl->level) {
1996                 u64 quantum = cl->rate.rate_bytes_ps;
1997
1998                 do_div(quantum, q->rate2quantum);
1999                 cl->quantum = min_t(u64, quantum, INT_MAX);
2000
2001                 if (!hopt->quantum && cl->quantum < 1000) {
2002                         warn = -1;
2003                         cl->quantum = 1000;
2004                 }
2005                 if (!hopt->quantum && cl->quantum > 200000) {
2006                         warn = 1;
2007                         cl->quantum = 200000;
2008                 }
2009                 if (hopt->quantum)
2010                         cl->quantum = hopt->quantum;
2011                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2012                         cl->prio = TC_HTB_NUMPRIO - 1;
2013         }
2014
2015         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2016         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2017
2018         sch_tree_unlock(sch);
2019         qdisc_put(parent_qdisc);
2020
2021         if (warn)
2022                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2023                             cl->common.classid, (warn == -1 ? "small" : "big"));
2024
2025         qdisc_class_hash_grow(sch, &q->clhash);
2026
2027         *arg = (unsigned long)cl;
2028         return 0;
2029
2030 err_kill_estimator:
2031         gen_kill_estimator(&cl->rate_est);
2032 err_block_put:
2033         tcf_block_put(cl->block);
2034         kfree(cl);
2035 failure:
2036         return err;
2037 }
2038
2039 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2040                                        struct netlink_ext_ack *extack)
2041 {
2042         struct htb_sched *q = qdisc_priv(sch);
2043         struct htb_class *cl = (struct htb_class *)arg;
2044
2045         return cl ? cl->block : q->block;
2046 }
2047
2048 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2049                                      u32 classid)
2050 {
2051         struct htb_class *cl = htb_find(classid, sch);
2052
2053         /*if (cl && !cl->level) return 0;
2054          * The line above used to be there to prevent attaching filters to
2055          * leaves. But at least tc_index filter uses this just to get class
2056          * for other reasons so that we have to allow for it.
2057          * ----
2058          * 19.6.2002 As Werner explained it is ok - bind filter is just
2059          * another way to "lock" the class - unlike "get" this lock can
2060          * be broken by class during destroy IIUC.
2061          */
2062         if (cl)
2063                 cl->filter_cnt++;
2064         return (unsigned long)cl;
2065 }
2066
2067 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2068 {
2069         struct htb_class *cl = (struct htb_class *)arg;
2070
2071         if (cl)
2072                 cl->filter_cnt--;
2073 }
2074
2075 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2076 {
2077         struct htb_sched *q = qdisc_priv(sch);
2078         struct htb_class *cl;
2079         unsigned int i;
2080
2081         if (arg->stop)
2082                 return;
2083
2084         for (i = 0; i < q->clhash.hashsize; i++) {
2085                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2086                         if (arg->count < arg->skip) {
2087                                 arg->count++;
2088                                 continue;
2089                         }
2090                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2091                                 arg->stop = 1;
2092                                 return;
2093                         }
2094                         arg->count++;
2095                 }
2096         }
2097 }
2098
2099 static const struct Qdisc_class_ops htb_class_ops = {
2100         .select_queue   =       htb_select_queue,
2101         .graft          =       htb_graft,
2102         .leaf           =       htb_leaf,
2103         .qlen_notify    =       htb_qlen_notify,
2104         .find           =       htb_search,
2105         .change         =       htb_change_class,
2106         .delete         =       htb_delete,
2107         .walk           =       htb_walk,
2108         .tcf_block      =       htb_tcf_block,
2109         .bind_tcf       =       htb_bind_filter,
2110         .unbind_tcf     =       htb_unbind_filter,
2111         .dump           =       htb_dump_class,
2112         .dump_stats     =       htb_dump_class_stats,
2113 };
2114
2115 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2116         .cl_ops         =       &htb_class_ops,
2117         .id             =       "htb",
2118         .priv_size      =       sizeof(struct htb_sched),
2119         .enqueue        =       htb_enqueue,
2120         .dequeue        =       htb_dequeue,
2121         .peek           =       qdisc_peek_dequeued,
2122         .init           =       htb_init,
2123         .attach         =       htb_attach,
2124         .reset          =       htb_reset,
2125         .destroy        =       htb_destroy,
2126         .dump           =       htb_dump,
2127         .owner          =       THIS_MODULE,
2128 };
2129
2130 static int __init htb_module_init(void)
2131 {
2132         return register_qdisc(&htb_qdisc_ops);
2133 }
2134 static void __exit htb_module_exit(void)
2135 {
2136         unregister_qdisc(&htb_qdisc_ops);
2137 }
2138
2139 module_init(htb_module_init)
2140 module_exit(htb_module_exit)
2141 MODULE_LICENSE("GPL");