875ef6cd2ce0e32c1efe7a86f8d702421cdb4eea
[linux-2.6-microblaze.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_packed bstats;
117         struct gnet_stats_basic_packed bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                 } leaf;
129                 struct htb_class_inner {
130                         struct htb_prio clprio[TC_HTB_NUMPRIO];
131                 } inner;
132         };
133         s64                     pq_key;
134
135         int                     prio_activity;  /* for which prios are we active */
136         enum htb_cmode          cmode;          /* current mode of the class */
137         struct rb_node          pq_node;        /* node for event queue */
138         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
139
140         unsigned int drops ____cacheline_aligned_in_smp;
141         unsigned int            overlimits;
142 };
143
144 struct htb_level {
145         struct rb_root  wait_pq;
146         struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148
149 struct htb_sched {
150         struct Qdisc_class_hash clhash;
151         int                     defcls;         /* class where unclassified flows go to */
152         int                     rate2quantum;   /* quant = rate / rate2quantum */
153
154         /* filters for qdisc itself */
155         struct tcf_proto __rcu  *filter_list;
156         struct tcf_block        *block;
157
158 #define HTB_WARN_TOOMANYEVENTS  0x1
159         unsigned int            warned; /* only one warning */
160         int                     direct_qlen;
161         struct work_struct      work;
162
163         /* non shaped skbs; let them go directly thru */
164         struct qdisc_skb_head   direct_queue;
165         u32                     direct_pkts;
166         u32                     overlimits;
167
168         struct qdisc_watchdog   watchdog;
169
170         s64                     now;    /* cached dequeue time */
171
172         /* time of nearest event per level (row) */
173         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
174
175         int                     row_mask[TC_HTB_MAXDEPTH];
176
177         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
178
179         struct Qdisc            **direct_qdiscs;
180         unsigned int            num_direct_qdiscs;
181
182         bool                    offload;
183 };
184
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188         struct htb_sched *q = qdisc_priv(sch);
189         struct Qdisc_class_common *clc;
190
191         clc = qdisc_class_find(&q->clhash, handle);
192         if (clc == NULL)
193                 return NULL;
194         return container_of(clc, struct htb_class, common);
195 }
196
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199         return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216                                       int *qerr)
217 {
218         struct htb_sched *q = qdisc_priv(sch);
219         struct htb_class *cl;
220         struct tcf_result res;
221         struct tcf_proto *tcf;
222         int result;
223
224         /* allow to select class by setting skb->priority to valid classid;
225          * note that nfmark can be used too by attaching filter fw with no
226          * rules in it
227          */
228         if (skb->priority == sch->handle)
229                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
230         cl = htb_find(skb->priority, sch);
231         if (cl) {
232                 if (cl->level == 0)
233                         return cl;
234                 /* Start with inner filter chain if a non-leaf class is selected */
235                 tcf = rcu_dereference_bh(cl->filter_list);
236         } else {
237                 tcf = rcu_dereference_bh(q->filter_list);
238         }
239
240         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241         while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243                 switch (result) {
244                 case TC_ACT_QUEUED:
245                 case TC_ACT_STOLEN:
246                 case TC_ACT_TRAP:
247                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248                         fallthrough;
249                 case TC_ACT_SHOT:
250                         return NULL;
251                 }
252 #endif
253                 cl = (void *)res.class;
254                 if (!cl) {
255                         if (res.classid == sch->handle)
256                                 return HTB_DIRECT;      /* X:0 (direct flow) */
257                         cl = htb_find(res.classid, sch);
258                         if (!cl)
259                                 break;  /* filter selected invalid classid */
260                 }
261                 if (!cl->level)
262                         return cl;      /* we hit leaf; return it */
263
264                 /* we have got inner class; apply inner filter chain */
265                 tcf = rcu_dereference_bh(cl->filter_list);
266         }
267         /* classification failed; try to use default class */
268         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269         if (!cl || cl->level)
270                 return HTB_DIRECT;      /* bad default .. this is safe bet */
271         return cl;
272 }
273
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  * @root: the root of the tree
277  * @cl: the class to add
278  * @prio: the give prio in class
279  *
280  * Routine adds class to the list (actually tree) sorted by classid.
281  * Make sure that class is not already on such list for given prio.
282  */
283 static void htb_add_to_id_tree(struct rb_root *root,
284                                struct htb_class *cl, int prio)
285 {
286         struct rb_node **p = &root->rb_node, *parent = NULL;
287
288         while (*p) {
289                 struct htb_class *c;
290                 parent = *p;
291                 c = rb_entry(parent, struct htb_class, node[prio]);
292
293                 if (cl->common.classid > c->common.classid)
294                         p = &parent->rb_right;
295                 else
296                         p = &parent->rb_left;
297         }
298         rb_link_node(&cl->node[prio], parent, p);
299         rb_insert_color(&cl->node[prio], root);
300 }
301
302 /**
303  * htb_add_to_wait_tree - adds class to the event queue with delay
304  * @q: the priority event queue
305  * @cl: the class to add
306  * @delay: delay in microseconds
307  *
308  * The class is added to priority event queue to indicate that class will
309  * change its mode in cl->pq_key microseconds. Make sure that class is not
310  * already in the queue.
311  */
312 static void htb_add_to_wait_tree(struct htb_sched *q,
313                                  struct htb_class *cl, s64 delay)
314 {
315         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
316
317         cl->pq_key = q->now + delay;
318         if (cl->pq_key == q->now)
319                 cl->pq_key++;
320
321         /* update the nearest event cache */
322         if (q->near_ev_cache[cl->level] > cl->pq_key)
323                 q->near_ev_cache[cl->level] = cl->pq_key;
324
325         while (*p) {
326                 struct htb_class *c;
327                 parent = *p;
328                 c = rb_entry(parent, struct htb_class, pq_node);
329                 if (cl->pq_key >= c->pq_key)
330                         p = &parent->rb_right;
331                 else
332                         p = &parent->rb_left;
333         }
334         rb_link_node(&cl->pq_node, parent, p);
335         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
336 }
337
338 /**
339  * htb_next_rb_node - finds next node in binary tree
340  * @n: the current node in binary tree
341  *
342  * When we are past last key we return NULL.
343  * Average complexity is 2 steps per call.
344  */
345 static inline void htb_next_rb_node(struct rb_node **n)
346 {
347         *n = rb_next(*n);
348 }
349
350 /**
351  * htb_add_class_to_row - add class to its row
352  * @q: the priority event queue
353  * @cl: the class to add
354  * @mask: the given priorities in class in bitmap
355  *
356  * The class is added to row at priorities marked in mask.
357  * It does nothing if mask == 0.
358  */
359 static inline void htb_add_class_to_row(struct htb_sched *q,
360                                         struct htb_class *cl, int mask)
361 {
362         q->row_mask[cl->level] |= mask;
363         while (mask) {
364                 int prio = ffz(~mask);
365                 mask &= ~(1 << prio);
366                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
367         }
368 }
369
370 /* If this triggers, it is a bug in this code, but it need not be fatal */
371 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
372 {
373         if (RB_EMPTY_NODE(rb)) {
374                 WARN_ON(1);
375         } else {
376                 rb_erase(rb, root);
377                 RB_CLEAR_NODE(rb);
378         }
379 }
380
381
382 /**
383  * htb_remove_class_from_row - removes class from its row
384  * @q: the priority event queue
385  * @cl: the class to add
386  * @mask: the given priorities in class in bitmap
387  *
388  * The class is removed from row at priorities marked in mask.
389  * It does nothing if mask == 0.
390  */
391 static inline void htb_remove_class_from_row(struct htb_sched *q,
392                                                  struct htb_class *cl, int mask)
393 {
394         int m = 0;
395         struct htb_level *hlevel = &q->hlevel[cl->level];
396
397         while (mask) {
398                 int prio = ffz(~mask);
399                 struct htb_prio *hprio = &hlevel->hprio[prio];
400
401                 mask &= ~(1 << prio);
402                 if (hprio->ptr == cl->node + prio)
403                         htb_next_rb_node(&hprio->ptr);
404
405                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
406                 if (!hprio->row.rb_node)
407                         m |= 1 << prio;
408         }
409         q->row_mask[cl->level] &= ~m;
410 }
411
412 /**
413  * htb_activate_prios - creates active classe's feed chain
414  * @q: the priority event queue
415  * @cl: the class to activate
416  *
417  * The class is connected to ancestors and/or appropriate rows
418  * for priorities it is participating on. cl->cmode must be new
419  * (activated) mode. It does nothing if cl->prio_activity == 0.
420  */
421 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
422 {
423         struct htb_class *p = cl->parent;
424         long m, mask = cl->prio_activity;
425
426         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
427                 m = mask;
428                 while (m) {
429                         int prio = ffz(~m);
430                         m &= ~(1 << prio);
431
432                         if (p->inner.clprio[prio].feed.rb_node)
433                                 /* parent already has its feed in use so that
434                                  * reset bit in mask as parent is already ok
435                                  */
436                                 mask &= ~(1 << prio);
437
438                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
439                 }
440                 p->prio_activity |= mask;
441                 cl = p;
442                 p = cl->parent;
443
444         }
445         if (cl->cmode == HTB_CAN_SEND && mask)
446                 htb_add_class_to_row(q, cl, mask);
447 }
448
449 /**
450  * htb_deactivate_prios - remove class from feed chain
451  * @q: the priority event queue
452  * @cl: the class to deactivate
453  *
454  * cl->cmode must represent old mode (before deactivation). It does
455  * nothing if cl->prio_activity == 0. Class is removed from all feed
456  * chains and rows.
457  */
458 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
459 {
460         struct htb_class *p = cl->parent;
461         long m, mask = cl->prio_activity;
462
463         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
464                 m = mask;
465                 mask = 0;
466                 while (m) {
467                         int prio = ffz(~m);
468                         m &= ~(1 << prio);
469
470                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
471                                 /* we are removing child which is pointed to from
472                                  * parent feed - forget the pointer but remember
473                                  * classid
474                                  */
475                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
476                                 p->inner.clprio[prio].ptr = NULL;
477                         }
478
479                         htb_safe_rb_erase(cl->node + prio,
480                                           &p->inner.clprio[prio].feed);
481
482                         if (!p->inner.clprio[prio].feed.rb_node)
483                                 mask |= 1 << prio;
484                 }
485
486                 p->prio_activity &= ~mask;
487                 cl = p;
488                 p = cl->parent;
489
490         }
491         if (cl->cmode == HTB_CAN_SEND && mask)
492                 htb_remove_class_from_row(q, cl, mask);
493 }
494
495 static inline s64 htb_lowater(const struct htb_class *cl)
496 {
497         if (htb_hysteresis)
498                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
499         else
500                 return 0;
501 }
502 static inline s64 htb_hiwater(const struct htb_class *cl)
503 {
504         if (htb_hysteresis)
505                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
506         else
507                 return 0;
508 }
509
510
511 /**
512  * htb_class_mode - computes and returns current class mode
513  * @cl: the target class
514  * @diff: diff time in microseconds
515  *
516  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
517  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
518  * from now to time when cl will change its state.
519  * Also it is worth to note that class mode doesn't change simply
520  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
521  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
522  * mode transitions per time unit. The speed gain is about 1/6.
523  */
524 static inline enum htb_cmode
525 htb_class_mode(struct htb_class *cl, s64 *diff)
526 {
527         s64 toks;
528
529         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
530                 *diff = -toks;
531                 return HTB_CANT_SEND;
532         }
533
534         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
535                 return HTB_CAN_SEND;
536
537         *diff = -toks;
538         return HTB_MAY_BORROW;
539 }
540
541 /**
542  * htb_change_class_mode - changes classe's mode
543  * @q: the priority event queue
544  * @cl: the target class
545  * @diff: diff time in microseconds
546  *
547  * This should be the only way how to change classe's mode under normal
548  * circumstances. Routine will update feed lists linkage, change mode
549  * and add class to the wait event queue if appropriate. New mode should
550  * be different from old one and cl->pq_key has to be valid if changing
551  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
552  */
553 static void
554 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
555 {
556         enum htb_cmode new_mode = htb_class_mode(cl, diff);
557
558         if (new_mode == cl->cmode)
559                 return;
560
561         if (new_mode == HTB_CANT_SEND) {
562                 cl->overlimits++;
563                 q->overlimits++;
564         }
565
566         if (cl->prio_activity) {        /* not necessary: speed optimization */
567                 if (cl->cmode != HTB_CANT_SEND)
568                         htb_deactivate_prios(q, cl);
569                 cl->cmode = new_mode;
570                 if (new_mode != HTB_CANT_SEND)
571                         htb_activate_prios(q, cl);
572         } else
573                 cl->cmode = new_mode;
574 }
575
576 /**
577  * htb_activate - inserts leaf cl into appropriate active feeds
578  *
579  * Routine learns (new) priority of leaf and activates feed chain
580  * for the prio. It can be called on already active leaf safely.
581  * It also adds leaf into droplist.
582  */
583 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
584 {
585         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
586
587         if (!cl->prio_activity) {
588                 cl->prio_activity = 1 << cl->prio;
589                 htb_activate_prios(q, cl);
590         }
591 }
592
593 /**
594  * htb_deactivate - remove leaf cl from active feeds
595  *
596  * Make sure that leaf is active. In the other words it can't be called
597  * with non-active leaf. It also removes class from the drop list.
598  */
599 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
600 {
601         WARN_ON(!cl->prio_activity);
602
603         htb_deactivate_prios(q, cl);
604         cl->prio_activity = 0;
605 }
606
607 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
608                        struct sk_buff **to_free)
609 {
610         int ret;
611         unsigned int len = qdisc_pkt_len(skb);
612         struct htb_sched *q = qdisc_priv(sch);
613         struct htb_class *cl = htb_classify(skb, sch, &ret);
614
615         if (cl == HTB_DIRECT) {
616                 /* enqueue to helper queue */
617                 if (q->direct_queue.qlen < q->direct_qlen) {
618                         __qdisc_enqueue_tail(skb, &q->direct_queue);
619                         q->direct_pkts++;
620                 } else {
621                         return qdisc_drop(skb, sch, to_free);
622                 }
623 #ifdef CONFIG_NET_CLS_ACT
624         } else if (!cl) {
625                 if (ret & __NET_XMIT_BYPASS)
626                         qdisc_qstats_drop(sch);
627                 __qdisc_drop(skb, to_free);
628                 return ret;
629 #endif
630         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
631                                         to_free)) != NET_XMIT_SUCCESS) {
632                 if (net_xmit_drop_count(ret)) {
633                         qdisc_qstats_drop(sch);
634                         cl->drops++;
635                 }
636                 return ret;
637         } else {
638                 htb_activate(q, cl);
639         }
640
641         sch->qstats.backlog += len;
642         sch->q.qlen++;
643         return NET_XMIT_SUCCESS;
644 }
645
646 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
647 {
648         s64 toks = diff + cl->tokens;
649
650         if (toks > cl->buffer)
651                 toks = cl->buffer;
652         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
653         if (toks <= -cl->mbuffer)
654                 toks = 1 - cl->mbuffer;
655
656         cl->tokens = toks;
657 }
658
659 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
660 {
661         s64 toks = diff + cl->ctokens;
662
663         if (toks > cl->cbuffer)
664                 toks = cl->cbuffer;
665         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
666         if (toks <= -cl->mbuffer)
667                 toks = 1 - cl->mbuffer;
668
669         cl->ctokens = toks;
670 }
671
672 /**
673  * htb_charge_class - charges amount "bytes" to leaf and ancestors
674  *
675  * Routine assumes that packet "bytes" long was dequeued from leaf cl
676  * borrowing from "level". It accounts bytes to ceil leaky bucket for
677  * leaf and all ancestors and to rate bucket for ancestors at levels
678  * "level" and higher. It also handles possible change of mode resulting
679  * from the update. Note that mode can also increase here (MAY_BORROW to
680  * CAN_SEND) because we can use more precise clock that event queue here.
681  * In such case we remove class from event queue first.
682  */
683 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
684                              int level, struct sk_buff *skb)
685 {
686         int bytes = qdisc_pkt_len(skb);
687         enum htb_cmode old_mode;
688         s64 diff;
689
690         while (cl) {
691                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
692                 if (cl->level >= level) {
693                         if (cl->level == level)
694                                 cl->xstats.lends++;
695                         htb_accnt_tokens(cl, bytes, diff);
696                 } else {
697                         cl->xstats.borrows++;
698                         cl->tokens += diff;     /* we moved t_c; update tokens */
699                 }
700                 htb_accnt_ctokens(cl, bytes, diff);
701                 cl->t_c = q->now;
702
703                 old_mode = cl->cmode;
704                 diff = 0;
705                 htb_change_class_mode(q, cl, &diff);
706                 if (old_mode != cl->cmode) {
707                         if (old_mode != HTB_CAN_SEND)
708                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
709                         if (cl->cmode != HTB_CAN_SEND)
710                                 htb_add_to_wait_tree(q, cl, diff);
711                 }
712
713                 /* update basic stats except for leaves which are already updated */
714                 if (cl->level)
715                         bstats_update(&cl->bstats, skb);
716
717                 cl = cl->parent;
718         }
719 }
720
721 /**
722  * htb_do_events - make mode changes to classes at the level
723  *
724  * Scans event queue for pending events and applies them. Returns time of
725  * next pending event (0 for no event in pq, q->now for too many events).
726  * Note: Applied are events whose have cl->pq_key <= q->now.
727  */
728 static s64 htb_do_events(struct htb_sched *q, const int level,
729                          unsigned long start)
730 {
731         /* don't run for longer than 2 jiffies; 2 is used instead of
732          * 1 to simplify things when jiffy is going to be incremented
733          * too soon
734          */
735         unsigned long stop_at = start + 2;
736         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
737
738         while (time_before(jiffies, stop_at)) {
739                 struct htb_class *cl;
740                 s64 diff;
741                 struct rb_node *p = rb_first(wait_pq);
742
743                 if (!p)
744                         return 0;
745
746                 cl = rb_entry(p, struct htb_class, pq_node);
747                 if (cl->pq_key > q->now)
748                         return cl->pq_key;
749
750                 htb_safe_rb_erase(p, wait_pq);
751                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
752                 htb_change_class_mode(q, cl, &diff);
753                 if (cl->cmode != HTB_CAN_SEND)
754                         htb_add_to_wait_tree(q, cl, diff);
755         }
756
757         /* too much load - let's continue after a break for scheduling */
758         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
759                 pr_warn("htb: too many events!\n");
760                 q->warned |= HTB_WARN_TOOMANYEVENTS;
761         }
762
763         return q->now;
764 }
765
766 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
767  * is no such one exists.
768  */
769 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
770                                               u32 id)
771 {
772         struct rb_node *r = NULL;
773         while (n) {
774                 struct htb_class *cl =
775                     rb_entry(n, struct htb_class, node[prio]);
776
777                 if (id > cl->common.classid) {
778                         n = n->rb_right;
779                 } else if (id < cl->common.classid) {
780                         r = n;
781                         n = n->rb_left;
782                 } else {
783                         return n;
784                 }
785         }
786         return r;
787 }
788
789 /**
790  * htb_lookup_leaf - returns next leaf class in DRR order
791  *
792  * Find leaf where current feed pointers points to.
793  */
794 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
795 {
796         int i;
797         struct {
798                 struct rb_node *root;
799                 struct rb_node **pptr;
800                 u32 *pid;
801         } stk[TC_HTB_MAXDEPTH], *sp = stk;
802
803         BUG_ON(!hprio->row.rb_node);
804         sp->root = hprio->row.rb_node;
805         sp->pptr = &hprio->ptr;
806         sp->pid = &hprio->last_ptr_id;
807
808         for (i = 0; i < 65535; i++) {
809                 if (!*sp->pptr && *sp->pid) {
810                         /* ptr was invalidated but id is valid - try to recover
811                          * the original or next ptr
812                          */
813                         *sp->pptr =
814                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
815                 }
816                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
817                                  * can become out of date quickly
818                                  */
819                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
820                         *sp->pptr = sp->root;
821                         while ((*sp->pptr)->rb_left)
822                                 *sp->pptr = (*sp->pptr)->rb_left;
823                         if (sp > stk) {
824                                 sp--;
825                                 if (!*sp->pptr) {
826                                         WARN_ON(1);
827                                         return NULL;
828                                 }
829                                 htb_next_rb_node(sp->pptr);
830                         }
831                 } else {
832                         struct htb_class *cl;
833                         struct htb_prio *clp;
834
835                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
836                         if (!cl->level)
837                                 return cl;
838                         clp = &cl->inner.clprio[prio];
839                         (++sp)->root = clp->feed.rb_node;
840                         sp->pptr = &clp->ptr;
841                         sp->pid = &clp->last_ptr_id;
842                 }
843         }
844         WARN_ON(1);
845         return NULL;
846 }
847
848 /* dequeues packet at given priority and level; call only if
849  * you are sure that there is active class at prio/level
850  */
851 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
852                                         const int level)
853 {
854         struct sk_buff *skb = NULL;
855         struct htb_class *cl, *start;
856         struct htb_level *hlevel = &q->hlevel[level];
857         struct htb_prio *hprio = &hlevel->hprio[prio];
858
859         /* look initial class up in the row */
860         start = cl = htb_lookup_leaf(hprio, prio);
861
862         do {
863 next:
864                 if (unlikely(!cl))
865                         return NULL;
866
867                 /* class can be empty - it is unlikely but can be true if leaf
868                  * qdisc drops packets in enqueue routine or if someone used
869                  * graft operation on the leaf since last dequeue;
870                  * simply deactivate and skip such class
871                  */
872                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
873                         struct htb_class *next;
874                         htb_deactivate(q, cl);
875
876                         /* row/level might become empty */
877                         if ((q->row_mask[level] & (1 << prio)) == 0)
878                                 return NULL;
879
880                         next = htb_lookup_leaf(hprio, prio);
881
882                         if (cl == start)        /* fix start if we just deleted it */
883                                 start = next;
884                         cl = next;
885                         goto next;
886                 }
887
888                 skb = cl->leaf.q->dequeue(cl->leaf.q);
889                 if (likely(skb != NULL))
890                         break;
891
892                 qdisc_warn_nonwc("htb", cl->leaf.q);
893                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
894                                          &q->hlevel[0].hprio[prio].ptr);
895                 cl = htb_lookup_leaf(hprio, prio);
896
897         } while (cl != start);
898
899         if (likely(skb != NULL)) {
900                 bstats_update(&cl->bstats, skb);
901                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
902                 if (cl->leaf.deficit[level] < 0) {
903                         cl->leaf.deficit[level] += cl->quantum;
904                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
905                                                  &q->hlevel[0].hprio[prio].ptr);
906                 }
907                 /* this used to be after charge_class but this constelation
908                  * gives us slightly better performance
909                  */
910                 if (!cl->leaf.q->q.qlen)
911                         htb_deactivate(q, cl);
912                 htb_charge_class(q, cl, level, skb);
913         }
914         return skb;
915 }
916
917 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
918 {
919         struct sk_buff *skb;
920         struct htb_sched *q = qdisc_priv(sch);
921         int level;
922         s64 next_event;
923         unsigned long start_at;
924
925         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
926         skb = __qdisc_dequeue_head(&q->direct_queue);
927         if (skb != NULL) {
928 ok:
929                 qdisc_bstats_update(sch, skb);
930                 qdisc_qstats_backlog_dec(sch, skb);
931                 sch->q.qlen--;
932                 return skb;
933         }
934
935         if (!sch->q.qlen)
936                 goto fin;
937         q->now = ktime_get_ns();
938         start_at = jiffies;
939
940         next_event = q->now + 5LLU * NSEC_PER_SEC;
941
942         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
943                 /* common case optimization - skip event handler quickly */
944                 int m;
945                 s64 event = q->near_ev_cache[level];
946
947                 if (q->now >= event) {
948                         event = htb_do_events(q, level, start_at);
949                         if (!event)
950                                 event = q->now + NSEC_PER_SEC;
951                         q->near_ev_cache[level] = event;
952                 }
953
954                 if (next_event > event)
955                         next_event = event;
956
957                 m = ~q->row_mask[level];
958                 while (m != (int)(-1)) {
959                         int prio = ffz(m);
960
961                         m |= 1 << prio;
962                         skb = htb_dequeue_tree(q, prio, level);
963                         if (likely(skb != NULL))
964                                 goto ok;
965                 }
966         }
967         if (likely(next_event > q->now))
968                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
969         else
970                 schedule_work(&q->work);
971 fin:
972         return skb;
973 }
974
975 /* reset all classes */
976 /* always caled under BH & queue lock */
977 static void htb_reset(struct Qdisc *sch)
978 {
979         struct htb_sched *q = qdisc_priv(sch);
980         struct htb_class *cl;
981         unsigned int i;
982
983         for (i = 0; i < q->clhash.hashsize; i++) {
984                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
985                         if (cl->level)
986                                 memset(&cl->inner, 0, sizeof(cl->inner));
987                         else {
988                                 if (cl->leaf.q && !q->offload)
989                                         qdisc_reset(cl->leaf.q);
990                         }
991                         cl->prio_activity = 0;
992                         cl->cmode = HTB_CAN_SEND;
993                 }
994         }
995         qdisc_watchdog_cancel(&q->watchdog);
996         __qdisc_reset_queue(&q->direct_queue);
997         sch->q.qlen = 0;
998         sch->qstats.backlog = 0;
999         memset(q->hlevel, 0, sizeof(q->hlevel));
1000         memset(q->row_mask, 0, sizeof(q->row_mask));
1001 }
1002
1003 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1004         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1005         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1006         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1007         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1008         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1009         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1010         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1011         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1012 };
1013
1014 static void htb_work_func(struct work_struct *work)
1015 {
1016         struct htb_sched *q = container_of(work, struct htb_sched, work);
1017         struct Qdisc *sch = q->watchdog.qdisc;
1018
1019         rcu_read_lock();
1020         __netif_schedule(qdisc_root(sch));
1021         rcu_read_unlock();
1022 }
1023
1024 static void htb_set_lockdep_class_child(struct Qdisc *q)
1025 {
1026         static struct lock_class_key child_key;
1027
1028         lockdep_set_class(qdisc_lock(q), &child_key);
1029 }
1030
1031 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1032 {
1033         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1034 }
1035
1036 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1037                     struct netlink_ext_ack *extack)
1038 {
1039         struct net_device *dev = qdisc_dev(sch);
1040         struct tc_htb_qopt_offload offload_opt;
1041         struct htb_sched *q = qdisc_priv(sch);
1042         struct nlattr *tb[TCA_HTB_MAX + 1];
1043         struct tc_htb_glob *gopt;
1044         unsigned int ntx;
1045         bool offload;
1046         int err;
1047
1048         qdisc_watchdog_init(&q->watchdog, sch);
1049         INIT_WORK(&q->work, htb_work_func);
1050
1051         if (!opt)
1052                 return -EINVAL;
1053
1054         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1055         if (err)
1056                 return err;
1057
1058         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1059                                           NULL);
1060         if (err < 0)
1061                 return err;
1062
1063         if (!tb[TCA_HTB_INIT])
1064                 return -EINVAL;
1065
1066         gopt = nla_data(tb[TCA_HTB_INIT]);
1067         if (gopt->version != HTB_VER >> 16)
1068                 return -EINVAL;
1069
1070         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1071
1072         if (offload) {
1073                 if (sch->parent != TC_H_ROOT)
1074                         return -EOPNOTSUPP;
1075
1076                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1077                         return -EOPNOTSUPP;
1078
1079                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1080                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1081                                            sizeof(*q->direct_qdiscs),
1082                                            GFP_KERNEL);
1083                 if (!q->direct_qdiscs)
1084                         return -ENOMEM;
1085         }
1086
1087         err = qdisc_class_hash_init(&q->clhash);
1088         if (err < 0)
1089                 goto err_free_direct_qdiscs;
1090
1091         qdisc_skb_head_init(&q->direct_queue);
1092
1093         if (tb[TCA_HTB_DIRECT_QLEN])
1094                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1095         else
1096                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1097
1098         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1099                 q->rate2quantum = 1;
1100         q->defcls = gopt->defcls;
1101
1102         if (!offload)
1103                 return 0;
1104
1105         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1106                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1107                 struct Qdisc *qdisc;
1108
1109                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1110                                           TC_H_MAKE(sch->handle, 0), extack);
1111                 if (!qdisc) {
1112                         err = -ENOMEM;
1113                         goto err_free_qdiscs;
1114                 }
1115
1116                 htb_set_lockdep_class_child(qdisc);
1117                 q->direct_qdiscs[ntx] = qdisc;
1118                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1119         }
1120
1121         sch->flags |= TCQ_F_MQROOT;
1122
1123         offload_opt = (struct tc_htb_qopt_offload) {
1124                 .command = TC_HTB_CREATE,
1125                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1126                 .classid = TC_H_MIN(q->defcls),
1127                 .extack = extack,
1128         };
1129         err = htb_offload(dev, &offload_opt);
1130         if (err)
1131                 goto err_free_qdiscs;
1132
1133         /* Defer this assignment, so that htb_destroy skips offload-related
1134          * parts (especially calling ndo_setup_tc) on errors.
1135          */
1136         q->offload = true;
1137
1138         return 0;
1139
1140 err_free_qdiscs:
1141         for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1142              ntx++)
1143                 qdisc_put(q->direct_qdiscs[ntx]);
1144
1145         qdisc_class_hash_destroy(&q->clhash);
1146         /* Prevent use-after-free and double-free when htb_destroy gets called.
1147          */
1148         q->clhash.hash = NULL;
1149         q->clhash.hashsize = 0;
1150
1151 err_free_direct_qdiscs:
1152         kfree(q->direct_qdiscs);
1153         q->direct_qdiscs = NULL;
1154         return err;
1155 }
1156
1157 static void htb_attach_offload(struct Qdisc *sch)
1158 {
1159         struct net_device *dev = qdisc_dev(sch);
1160         struct htb_sched *q = qdisc_priv(sch);
1161         unsigned int ntx;
1162
1163         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1164                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1165
1166                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1167                 qdisc_put(old);
1168                 qdisc_hash_add(qdisc, false);
1169         }
1170         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1171                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1172                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1173
1174                 qdisc_put(old);
1175         }
1176
1177         kfree(q->direct_qdiscs);
1178         q->direct_qdiscs = NULL;
1179 }
1180
1181 static void htb_attach_software(struct Qdisc *sch)
1182 {
1183         struct net_device *dev = qdisc_dev(sch);
1184         unsigned int ntx;
1185
1186         /* Resemble qdisc_graft behavior. */
1187         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1188                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1189                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1190
1191                 qdisc_refcount_inc(sch);
1192
1193                 qdisc_put(old);
1194         }
1195 }
1196
1197 static void htb_attach(struct Qdisc *sch)
1198 {
1199         struct htb_sched *q = qdisc_priv(sch);
1200
1201         if (q->offload)
1202                 htb_attach_offload(sch);
1203         else
1204                 htb_attach_software(sch);
1205 }
1206
1207 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1208 {
1209         struct htb_sched *q = qdisc_priv(sch);
1210         struct nlattr *nest;
1211         struct tc_htb_glob gopt;
1212
1213         if (q->offload)
1214                 sch->flags |= TCQ_F_OFFLOADED;
1215         else
1216                 sch->flags &= ~TCQ_F_OFFLOADED;
1217
1218         sch->qstats.overlimits = q->overlimits;
1219         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1220          * no change can happen on the qdisc parameters.
1221          */
1222
1223         gopt.direct_pkts = q->direct_pkts;
1224         gopt.version = HTB_VER;
1225         gopt.rate2quantum = q->rate2quantum;
1226         gopt.defcls = q->defcls;
1227         gopt.debug = 0;
1228
1229         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1230         if (nest == NULL)
1231                 goto nla_put_failure;
1232         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1233             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1234                 goto nla_put_failure;
1235         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1236                 goto nla_put_failure;
1237
1238         return nla_nest_end(skb, nest);
1239
1240 nla_put_failure:
1241         nla_nest_cancel(skb, nest);
1242         return -1;
1243 }
1244
1245 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1246                           struct sk_buff *skb, struct tcmsg *tcm)
1247 {
1248         struct htb_class *cl = (struct htb_class *)arg;
1249         struct htb_sched *q = qdisc_priv(sch);
1250         struct nlattr *nest;
1251         struct tc_htb_opt opt;
1252
1253         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1254          * no change can happen on the class parameters.
1255          */
1256         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1257         tcm->tcm_handle = cl->common.classid;
1258         if (!cl->level && cl->leaf.q)
1259                 tcm->tcm_info = cl->leaf.q->handle;
1260
1261         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1262         if (nest == NULL)
1263                 goto nla_put_failure;
1264
1265         memset(&opt, 0, sizeof(opt));
1266
1267         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1268         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1269         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1270         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1271         opt.quantum = cl->quantum;
1272         opt.prio = cl->prio;
1273         opt.level = cl->level;
1274         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1275                 goto nla_put_failure;
1276         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1277                 goto nla_put_failure;
1278         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1279             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1280                               TCA_HTB_PAD))
1281                 goto nla_put_failure;
1282         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1283             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1284                               TCA_HTB_PAD))
1285                 goto nla_put_failure;
1286
1287         return nla_nest_end(skb, nest);
1288
1289 nla_put_failure:
1290         nla_nest_cancel(skb, nest);
1291         return -1;
1292 }
1293
1294 static void htb_offload_aggregate_stats(struct htb_sched *q,
1295                                         struct htb_class *cl)
1296 {
1297         struct htb_class *c;
1298         unsigned int i;
1299
1300         memset(&cl->bstats, 0, sizeof(cl->bstats));
1301
1302         for (i = 0; i < q->clhash.hashsize; i++) {
1303                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1304                         struct htb_class *p = c;
1305
1306                         while (p && p->level < cl->level)
1307                                 p = p->parent;
1308
1309                         if (p != cl)
1310                                 continue;
1311
1312                         cl->bstats.bytes += c->bstats_bias.bytes;
1313                         cl->bstats.packets += c->bstats_bias.packets;
1314                         if (c->level == 0) {
1315                                 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1316                                 cl->bstats.packets += c->leaf.q->bstats.packets;
1317                         }
1318                 }
1319         }
1320 }
1321
1322 static int
1323 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1324 {
1325         struct htb_class *cl = (struct htb_class *)arg;
1326         struct htb_sched *q = qdisc_priv(sch);
1327         struct gnet_stats_queue qs = {
1328                 .drops = cl->drops,
1329                 .overlimits = cl->overlimits,
1330         };
1331         __u32 qlen = 0;
1332
1333         if (!cl->level && cl->leaf.q)
1334                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1335
1336         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1337                                     INT_MIN, INT_MAX);
1338         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1339                                      INT_MIN, INT_MAX);
1340
1341         if (q->offload) {
1342                 if (!cl->level) {
1343                         if (cl->leaf.q)
1344                                 cl->bstats = cl->leaf.q->bstats;
1345                         else
1346                                 memset(&cl->bstats, 0, sizeof(cl->bstats));
1347                         cl->bstats.bytes += cl->bstats_bias.bytes;
1348                         cl->bstats.packets += cl->bstats_bias.packets;
1349                 } else {
1350                         htb_offload_aggregate_stats(q, cl);
1351                 }
1352         }
1353
1354         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1355                                   d, NULL, &cl->bstats) < 0 ||
1356             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1357             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1358                 return -1;
1359
1360         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1361 }
1362
1363 static struct netdev_queue *
1364 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1365 {
1366         struct net_device *dev = qdisc_dev(sch);
1367         struct tc_htb_qopt_offload offload_opt;
1368         struct htb_sched *q = qdisc_priv(sch);
1369         int err;
1370
1371         if (!q->offload)
1372                 return sch->dev_queue;
1373
1374         offload_opt = (struct tc_htb_qopt_offload) {
1375                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1376                 .classid = TC_H_MIN(tcm->tcm_parent),
1377         };
1378         err = htb_offload(dev, &offload_opt);
1379         if (err || offload_opt.qid >= dev->num_tx_queues)
1380                 return NULL;
1381         return netdev_get_tx_queue(dev, offload_opt.qid);
1382 }
1383
1384 static struct Qdisc *
1385 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1386 {
1387         struct net_device *dev = dev_queue->dev;
1388         struct Qdisc *old_q;
1389
1390         if (dev->flags & IFF_UP)
1391                 dev_deactivate(dev);
1392         old_q = dev_graft_qdisc(dev_queue, new_q);
1393         if (new_q)
1394                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1395         if (dev->flags & IFF_UP)
1396                 dev_activate(dev);
1397
1398         return old_q;
1399 }
1400
1401 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1402 {
1403         struct netdev_queue *queue_old, *queue_new;
1404         struct net_device *dev = qdisc_dev(sch);
1405         struct Qdisc *qdisc;
1406
1407         queue_old = netdev_get_tx_queue(dev, qid_old);
1408         queue_new = netdev_get_tx_queue(dev, qid_new);
1409
1410         if (dev->flags & IFF_UP)
1411                 dev_deactivate(dev);
1412         qdisc = dev_graft_qdisc(queue_old, NULL);
1413         qdisc->dev_queue = queue_new;
1414         qdisc = dev_graft_qdisc(queue_new, qdisc);
1415         if (dev->flags & IFF_UP)
1416                 dev_activate(dev);
1417
1418         WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1419 }
1420
1421 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1422                      struct Qdisc **old, struct netlink_ext_ack *extack)
1423 {
1424         struct netdev_queue *dev_queue = sch->dev_queue;
1425         struct htb_class *cl = (struct htb_class *)arg;
1426         struct htb_sched *q = qdisc_priv(sch);
1427         struct Qdisc *old_q;
1428
1429         if (cl->level)
1430                 return -EINVAL;
1431
1432         if (q->offload) {
1433                 dev_queue = new->dev_queue;
1434                 WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1435         }
1436
1437         if (!new) {
1438                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1439                                         cl->common.classid, extack);
1440                 if (!new)
1441                         return -ENOBUFS;
1442         }
1443
1444         if (q->offload) {
1445                 htb_set_lockdep_class_child(new);
1446                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1447                 qdisc_refcount_inc(new);
1448                 old_q = htb_graft_helper(dev_queue, new);
1449         }
1450
1451         *old = qdisc_replace(sch, new, &cl->leaf.q);
1452
1453         if (q->offload) {
1454                 WARN_ON(old_q != *old);
1455                 qdisc_put(old_q);
1456         }
1457
1458         return 0;
1459 }
1460
1461 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1462 {
1463         struct htb_class *cl = (struct htb_class *)arg;
1464         return !cl->level ? cl->leaf.q : NULL;
1465 }
1466
1467 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1468 {
1469         struct htb_class *cl = (struct htb_class *)arg;
1470
1471         htb_deactivate(qdisc_priv(sch), cl);
1472 }
1473
1474 static inline int htb_parent_last_child(struct htb_class *cl)
1475 {
1476         if (!cl->parent)
1477                 /* the root class */
1478                 return 0;
1479         if (cl->parent->children > 1)
1480                 /* not the last child */
1481                 return 0;
1482         return 1;
1483 }
1484
1485 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1486                                struct Qdisc *new_q)
1487 {
1488         struct htb_sched *q = qdisc_priv(sch);
1489         struct htb_class *parent = cl->parent;
1490
1491         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1492
1493         if (parent->cmode != HTB_CAN_SEND)
1494                 htb_safe_rb_erase(&parent->pq_node,
1495                                   &q->hlevel[parent->level].wait_pq);
1496
1497         parent->level = 0;
1498         memset(&parent->inner, 0, sizeof(parent->inner));
1499         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1500         parent->tokens = parent->buffer;
1501         parent->ctokens = parent->cbuffer;
1502         parent->t_c = ktime_get_ns();
1503         parent->cmode = HTB_CAN_SEND;
1504 }
1505
1506 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1507                                        struct netdev_queue *dev_queue,
1508                                        struct Qdisc *new_q)
1509 {
1510         struct Qdisc *old_q;
1511
1512         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1513         qdisc_refcount_inc(new_q);
1514         old_q = htb_graft_helper(dev_queue, new_q);
1515         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1516 }
1517
1518 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1519                                      bool last_child, bool destroying,
1520                                      struct netlink_ext_ack *extack)
1521 {
1522         struct tc_htb_qopt_offload offload_opt;
1523         struct Qdisc *q = cl->leaf.q;
1524         struct Qdisc *old = NULL;
1525         int err;
1526
1527         if (cl->level)
1528                 return -EINVAL;
1529
1530         WARN_ON(!q);
1531         if (!destroying) {
1532                 /* On destroy of HTB, two cases are possible:
1533                  * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1534                  * 2. q is a noop qdisc (for nodes that were inner),
1535                  *    q->dev_queue is noop_netdev_queue.
1536                  */
1537                 old = htb_graft_helper(q->dev_queue, NULL);
1538                 WARN_ON(!old);
1539                 WARN_ON(old != q);
1540         }
1541
1542         if (cl->parent) {
1543                 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1544                 cl->parent->bstats_bias.packets += q->bstats.packets;
1545         }
1546
1547         offload_opt = (struct tc_htb_qopt_offload) {
1548                 .command = !last_child ? TC_HTB_LEAF_DEL :
1549                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1550                            TC_HTB_LEAF_DEL_LAST,
1551                 .classid = cl->common.classid,
1552                 .extack = extack,
1553         };
1554         err = htb_offload(qdisc_dev(sch), &offload_opt);
1555
1556         if (!err || destroying)
1557                 qdisc_put(old);
1558         else
1559                 htb_graft_helper(q->dev_queue, old);
1560
1561         if (last_child)
1562                 return err;
1563
1564         if (!err && offload_opt.moved_qid != 0) {
1565                 if (destroying)
1566                         q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1567                                                            offload_opt.qid);
1568                 else
1569                         htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1570                                                offload_opt.qid);
1571         }
1572
1573         return err;
1574 }
1575
1576 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1577 {
1578         if (!cl->level) {
1579                 WARN_ON(!cl->leaf.q);
1580                 qdisc_put(cl->leaf.q);
1581         }
1582         gen_kill_estimator(&cl->rate_est);
1583         tcf_block_put(cl->block);
1584         kfree(cl);
1585 }
1586
1587 static void htb_destroy(struct Qdisc *sch)
1588 {
1589         struct net_device *dev = qdisc_dev(sch);
1590         struct tc_htb_qopt_offload offload_opt;
1591         struct htb_sched *q = qdisc_priv(sch);
1592         struct hlist_node *next;
1593         bool nonempty, changed;
1594         struct htb_class *cl;
1595         unsigned int i;
1596
1597         cancel_work_sync(&q->work);
1598         qdisc_watchdog_cancel(&q->watchdog);
1599         /* This line used to be after htb_destroy_class call below
1600          * and surprisingly it worked in 2.4. But it must precede it
1601          * because filter need its target class alive to be able to call
1602          * unbind_filter on it (without Oops).
1603          */
1604         tcf_block_put(q->block);
1605
1606         for (i = 0; i < q->clhash.hashsize; i++) {
1607                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1608                         tcf_block_put(cl->block);
1609                         cl->block = NULL;
1610                 }
1611         }
1612
1613         do {
1614                 nonempty = false;
1615                 changed = false;
1616                 for (i = 0; i < q->clhash.hashsize; i++) {
1617                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1618                                                   common.hnode) {
1619                                 bool last_child;
1620
1621                                 if (!q->offload) {
1622                                         htb_destroy_class(sch, cl);
1623                                         continue;
1624                                 }
1625
1626                                 nonempty = true;
1627
1628                                 if (cl->level)
1629                                         continue;
1630
1631                                 changed = true;
1632
1633                                 last_child = htb_parent_last_child(cl);
1634                                 htb_destroy_class_offload(sch, cl, last_child,
1635                                                           true, NULL);
1636                                 qdisc_class_hash_remove(&q->clhash,
1637                                                         &cl->common);
1638                                 if (cl->parent)
1639                                         cl->parent->children--;
1640                                 if (last_child)
1641                                         htb_parent_to_leaf(sch, cl, NULL);
1642                                 htb_destroy_class(sch, cl);
1643                         }
1644                 }
1645         } while (changed);
1646         WARN_ON(nonempty);
1647
1648         qdisc_class_hash_destroy(&q->clhash);
1649         __qdisc_reset_queue(&q->direct_queue);
1650
1651         if (!q->offload)
1652                 return;
1653
1654         offload_opt = (struct tc_htb_qopt_offload) {
1655                 .command = TC_HTB_DESTROY,
1656         };
1657         htb_offload(dev, &offload_opt);
1658
1659         if (!q->direct_qdiscs)
1660                 return;
1661         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1662                 qdisc_put(q->direct_qdiscs[i]);
1663         kfree(q->direct_qdiscs);
1664 }
1665
1666 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1667                       struct netlink_ext_ack *extack)
1668 {
1669         struct htb_sched *q = qdisc_priv(sch);
1670         struct htb_class *cl = (struct htb_class *)arg;
1671         struct Qdisc *new_q = NULL;
1672         int last_child = 0;
1673         int err;
1674
1675         /* TODO: why don't allow to delete subtree ? references ? does
1676          * tc subsys guarantee us that in htb_destroy it holds no class
1677          * refs so that we can remove children safely there ?
1678          */
1679         if (cl->children || cl->filter_cnt)
1680                 return -EBUSY;
1681
1682         if (!cl->level && htb_parent_last_child(cl))
1683                 last_child = 1;
1684
1685         if (q->offload) {
1686                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1687                                                 extack);
1688                 if (err)
1689                         return err;
1690         }
1691
1692         if (last_child) {
1693                 struct netdev_queue *dev_queue;
1694
1695                 dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1696                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1697                                           cl->parent->common.classid,
1698                                           NULL);
1699                 if (q->offload) {
1700                         if (new_q) {
1701                                 htb_set_lockdep_class_child(new_q);
1702                                 htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1703                         }
1704                 }
1705         }
1706
1707         sch_tree_lock(sch);
1708
1709         if (!cl->level)
1710                 qdisc_purge_queue(cl->leaf.q);
1711
1712         /* delete from hash and active; remainder in destroy_class */
1713         qdisc_class_hash_remove(&q->clhash, &cl->common);
1714         if (cl->parent)
1715                 cl->parent->children--;
1716
1717         if (cl->prio_activity)
1718                 htb_deactivate(q, cl);
1719
1720         if (cl->cmode != HTB_CAN_SEND)
1721                 htb_safe_rb_erase(&cl->pq_node,
1722                                   &q->hlevel[cl->level].wait_pq);
1723
1724         if (last_child)
1725                 htb_parent_to_leaf(sch, cl, new_q);
1726
1727         sch_tree_unlock(sch);
1728
1729         htb_destroy_class(sch, cl);
1730         return 0;
1731 }
1732
1733 static int htb_change_class(struct Qdisc *sch, u32 classid,
1734                             u32 parentid, struct nlattr **tca,
1735                             unsigned long *arg, struct netlink_ext_ack *extack)
1736 {
1737         int err = -EINVAL;
1738         struct htb_sched *q = qdisc_priv(sch);
1739         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1740         struct tc_htb_qopt_offload offload_opt;
1741         struct nlattr *opt = tca[TCA_OPTIONS];
1742         struct nlattr *tb[TCA_HTB_MAX + 1];
1743         struct Qdisc *parent_qdisc = NULL;
1744         struct netdev_queue *dev_queue;
1745         struct tc_htb_opt *hopt;
1746         u64 rate64, ceil64;
1747         int warn = 0;
1748
1749         /* extract all subattrs from opt attr */
1750         if (!opt)
1751                 goto failure;
1752
1753         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1754                                           NULL);
1755         if (err < 0)
1756                 goto failure;
1757
1758         err = -EINVAL;
1759         if (tb[TCA_HTB_PARMS] == NULL)
1760                 goto failure;
1761
1762         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1763
1764         hopt = nla_data(tb[TCA_HTB_PARMS]);
1765         if (!hopt->rate.rate || !hopt->ceil.rate)
1766                 goto failure;
1767
1768         /* Keeping backward compatible with rate_table based iproute2 tc */
1769         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1770                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1771                                               NULL));
1772
1773         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1774                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1775                                               NULL));
1776
1777         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1778         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1779
1780         if (!cl) {              /* new class */
1781                 struct net_device *dev = qdisc_dev(sch);
1782                 struct Qdisc *new_q, *old_q;
1783                 int prio;
1784                 struct {
1785                         struct nlattr           nla;
1786                         struct gnet_estimator   opt;
1787                 } est = {
1788                         .nla = {
1789                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1790                                 .nla_type       = TCA_RATE,
1791                         },
1792                         .opt = {
1793                                 /* 4s interval, 16s averaging constant */
1794                                 .interval       = 2,
1795                                 .ewma_log       = 2,
1796                         },
1797                 };
1798
1799                 /* check for valid classid */
1800                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1801                     htb_find(classid, sch))
1802                         goto failure;
1803
1804                 /* check maximal depth */
1805                 if (parent && parent->parent && parent->parent->level < 2) {
1806                         pr_err("htb: tree is too deep\n");
1807                         goto failure;
1808                 }
1809                 err = -ENOBUFS;
1810                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1811                 if (!cl)
1812                         goto failure;
1813
1814                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1815                 if (err) {
1816                         kfree(cl);
1817                         goto failure;
1818                 }
1819                 if (htb_rate_est || tca[TCA_RATE]) {
1820                         err = gen_new_estimator(&cl->bstats, NULL,
1821                                                 &cl->rate_est,
1822                                                 NULL,
1823                                                 qdisc_root_sleeping_running(sch),
1824                                                 tca[TCA_RATE] ? : &est.nla);
1825                         if (err)
1826                                 goto err_block_put;
1827                 }
1828
1829                 cl->children = 0;
1830                 RB_CLEAR_NODE(&cl->pq_node);
1831
1832                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1833                         RB_CLEAR_NODE(&cl->node[prio]);
1834
1835                 cl->common.classid = classid;
1836
1837                 /* Make sure nothing interrupts us in between of two
1838                  * ndo_setup_tc calls.
1839                  */
1840                 ASSERT_RTNL();
1841
1842                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1843                  * so that can't be used inside of sch_tree_lock
1844                  * -- thanks to Karlis Peisenieks
1845                  */
1846                 if (!q->offload) {
1847                         dev_queue = sch->dev_queue;
1848                 } else if (!(parent && !parent->level)) {
1849                         /* Assign a dev_queue to this classid. */
1850                         offload_opt = (struct tc_htb_qopt_offload) {
1851                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1852                                 .classid = cl->common.classid,
1853                                 .parent_classid = parent ?
1854                                         TC_H_MIN(parent->common.classid) :
1855                                         TC_HTB_CLASSID_ROOT,
1856                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1857                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1858                                 .extack = extack,
1859                         };
1860                         err = htb_offload(dev, &offload_opt);
1861                         if (err) {
1862                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1863                                        err);
1864                                 goto err_kill_estimator;
1865                         }
1866                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1867                 } else { /* First child. */
1868                         dev_queue = parent->leaf.q->dev_queue;
1869                         old_q = htb_graft_helper(dev_queue, NULL);
1870                         WARN_ON(old_q != parent->leaf.q);
1871                         offload_opt = (struct tc_htb_qopt_offload) {
1872                                 .command = TC_HTB_LEAF_TO_INNER,
1873                                 .classid = cl->common.classid,
1874                                 .parent_classid =
1875                                         TC_H_MIN(parent->common.classid),
1876                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1877                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1878                                 .extack = extack,
1879                         };
1880                         err = htb_offload(dev, &offload_opt);
1881                         if (err) {
1882                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1883                                        err);
1884                                 htb_graft_helper(dev_queue, old_q);
1885                                 goto err_kill_estimator;
1886                         }
1887                         parent->bstats_bias.bytes += old_q->bstats.bytes;
1888                         parent->bstats_bias.packets += old_q->bstats.packets;
1889                         qdisc_put(old_q);
1890                 }
1891                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1892                                           classid, NULL);
1893                 if (q->offload) {
1894                         if (new_q) {
1895                                 htb_set_lockdep_class_child(new_q);
1896                                 /* One ref for cl->leaf.q, the other for
1897                                  * dev_queue->qdisc.
1898                                  */
1899                                 qdisc_refcount_inc(new_q);
1900                         }
1901                         old_q = htb_graft_helper(dev_queue, new_q);
1902                         /* No qdisc_put needed. */
1903                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1904                 }
1905                 sch_tree_lock(sch);
1906                 if (parent && !parent->level) {
1907                         /* turn parent into inner node */
1908                         qdisc_purge_queue(parent->leaf.q);
1909                         parent_qdisc = parent->leaf.q;
1910                         if (parent->prio_activity)
1911                                 htb_deactivate(q, parent);
1912
1913                         /* remove from evt list because of level change */
1914                         if (parent->cmode != HTB_CAN_SEND) {
1915                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1916                                 parent->cmode = HTB_CAN_SEND;
1917                         }
1918                         parent->level = (parent->parent ? parent->parent->level
1919                                          : TC_HTB_MAXDEPTH) - 1;
1920                         memset(&parent->inner, 0, sizeof(parent->inner));
1921                 }
1922
1923                 /* leaf (we) needs elementary qdisc */
1924                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1925
1926                 cl->parent = parent;
1927
1928                 /* set class to be in HTB_CAN_SEND state */
1929                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1930                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1931                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1932                 cl->t_c = ktime_get_ns();
1933                 cl->cmode = HTB_CAN_SEND;
1934
1935                 /* attach to the hash list and parent's family */
1936                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1937                 if (parent)
1938                         parent->children++;
1939                 if (cl->leaf.q != &noop_qdisc)
1940                         qdisc_hash_add(cl->leaf.q, true);
1941         } else {
1942                 if (tca[TCA_RATE]) {
1943                         err = gen_replace_estimator(&cl->bstats, NULL,
1944                                                     &cl->rate_est,
1945                                                     NULL,
1946                                                     qdisc_root_sleeping_running(sch),
1947                                                     tca[TCA_RATE]);
1948                         if (err)
1949                                 return err;
1950                 }
1951
1952                 if (q->offload) {
1953                         struct net_device *dev = qdisc_dev(sch);
1954
1955                         offload_opt = (struct tc_htb_qopt_offload) {
1956                                 .command = TC_HTB_NODE_MODIFY,
1957                                 .classid = cl->common.classid,
1958                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1959                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1960                                 .extack = extack,
1961                         };
1962                         err = htb_offload(dev, &offload_opt);
1963                         if (err)
1964                                 /* Estimator was replaced, and rollback may fail
1965                                  * as well, so we don't try to recover it, and
1966                                  * the estimator won't work property with the
1967                                  * offload anyway, because bstats are updated
1968                                  * only when the stats are queried.
1969                                  */
1970                                 return err;
1971                 }
1972
1973                 sch_tree_lock(sch);
1974         }
1975
1976         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1977         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1978
1979         /* it used to be a nasty bug here, we have to check that node
1980          * is really leaf before changing cl->leaf !
1981          */
1982         if (!cl->level) {
1983                 u64 quantum = cl->rate.rate_bytes_ps;
1984
1985                 do_div(quantum, q->rate2quantum);
1986                 cl->quantum = min_t(u64, quantum, INT_MAX);
1987
1988                 if (!hopt->quantum && cl->quantum < 1000) {
1989                         warn = -1;
1990                         cl->quantum = 1000;
1991                 }
1992                 if (!hopt->quantum && cl->quantum > 200000) {
1993                         warn = 1;
1994                         cl->quantum = 200000;
1995                 }
1996                 if (hopt->quantum)
1997                         cl->quantum = hopt->quantum;
1998                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1999                         cl->prio = TC_HTB_NUMPRIO - 1;
2000         }
2001
2002         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2003         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2004
2005         sch_tree_unlock(sch);
2006         qdisc_put(parent_qdisc);
2007
2008         if (warn)
2009                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2010                             cl->common.classid, (warn == -1 ? "small" : "big"));
2011
2012         qdisc_class_hash_grow(sch, &q->clhash);
2013
2014         *arg = (unsigned long)cl;
2015         return 0;
2016
2017 err_kill_estimator:
2018         gen_kill_estimator(&cl->rate_est);
2019 err_block_put:
2020         tcf_block_put(cl->block);
2021         kfree(cl);
2022 failure:
2023         return err;
2024 }
2025
2026 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2027                                        struct netlink_ext_ack *extack)
2028 {
2029         struct htb_sched *q = qdisc_priv(sch);
2030         struct htb_class *cl = (struct htb_class *)arg;
2031
2032         return cl ? cl->block : q->block;
2033 }
2034
2035 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2036                                      u32 classid)
2037 {
2038         struct htb_class *cl = htb_find(classid, sch);
2039
2040         /*if (cl && !cl->level) return 0;
2041          * The line above used to be there to prevent attaching filters to
2042          * leaves. But at least tc_index filter uses this just to get class
2043          * for other reasons so that we have to allow for it.
2044          * ----
2045          * 19.6.2002 As Werner explained it is ok - bind filter is just
2046          * another way to "lock" the class - unlike "get" this lock can
2047          * be broken by class during destroy IIUC.
2048          */
2049         if (cl)
2050                 cl->filter_cnt++;
2051         return (unsigned long)cl;
2052 }
2053
2054 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2055 {
2056         struct htb_class *cl = (struct htb_class *)arg;
2057
2058         if (cl)
2059                 cl->filter_cnt--;
2060 }
2061
2062 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2063 {
2064         struct htb_sched *q = qdisc_priv(sch);
2065         struct htb_class *cl;
2066         unsigned int i;
2067
2068         if (arg->stop)
2069                 return;
2070
2071         for (i = 0; i < q->clhash.hashsize; i++) {
2072                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2073                         if (arg->count < arg->skip) {
2074                                 arg->count++;
2075                                 continue;
2076                         }
2077                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2078                                 arg->stop = 1;
2079                                 return;
2080                         }
2081                         arg->count++;
2082                 }
2083         }
2084 }
2085
2086 static const struct Qdisc_class_ops htb_class_ops = {
2087         .select_queue   =       htb_select_queue,
2088         .graft          =       htb_graft,
2089         .leaf           =       htb_leaf,
2090         .qlen_notify    =       htb_qlen_notify,
2091         .find           =       htb_search,
2092         .change         =       htb_change_class,
2093         .delete         =       htb_delete,
2094         .walk           =       htb_walk,
2095         .tcf_block      =       htb_tcf_block,
2096         .bind_tcf       =       htb_bind_filter,
2097         .unbind_tcf     =       htb_unbind_filter,
2098         .dump           =       htb_dump_class,
2099         .dump_stats     =       htb_dump_class_stats,
2100 };
2101
2102 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2103         .cl_ops         =       &htb_class_ops,
2104         .id             =       "htb",
2105         .priv_size      =       sizeof(struct htb_sched),
2106         .enqueue        =       htb_enqueue,
2107         .dequeue        =       htb_dequeue,
2108         .peek           =       qdisc_peek_dequeued,
2109         .init           =       htb_init,
2110         .attach         =       htb_attach,
2111         .reset          =       htb_reset,
2112         .destroy        =       htb_destroy,
2113         .dump           =       htb_dump,
2114         .owner          =       THIS_MODULE,
2115 };
2116
2117 static int __init htb_module_init(void)
2118 {
2119         return register_qdisc(&htb_qdisc_ops);
2120 }
2121 static void __exit htb_module_exit(void)
2122 {
2123         unregister_qdisc(&htb_qdisc_ops);
2124 }
2125
2126 module_init(htb_module_init)
2127 module_exit(htb_module_exit)
2128 MODULE_LICENSE("GPL");