Merge remote-tracking branch 'torvalds/master' into perf/core
[linux-2.6-microblaze.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_packed bstats;
117         struct gnet_stats_basic_packed bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                 } leaf;
129                 struct htb_class_inner {
130                         struct htb_prio clprio[TC_HTB_NUMPRIO];
131                 } inner;
132         };
133         s64                     pq_key;
134
135         int                     prio_activity;  /* for which prios are we active */
136         enum htb_cmode          cmode;          /* current mode of the class */
137         struct rb_node          pq_node;        /* node for event queue */
138         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
139
140         unsigned int drops ____cacheline_aligned_in_smp;
141         unsigned int            overlimits;
142 };
143
144 struct htb_level {
145         struct rb_root  wait_pq;
146         struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148
149 struct htb_sched {
150         struct Qdisc_class_hash clhash;
151         int                     defcls;         /* class where unclassified flows go to */
152         int                     rate2quantum;   /* quant = rate / rate2quantum */
153
154         /* filters for qdisc itself */
155         struct tcf_proto __rcu  *filter_list;
156         struct tcf_block        *block;
157
158 #define HTB_WARN_TOOMANYEVENTS  0x1
159         unsigned int            warned; /* only one warning */
160         int                     direct_qlen;
161         struct work_struct      work;
162
163         /* non shaped skbs; let them go directly thru */
164         struct qdisc_skb_head   direct_queue;
165         u32                     direct_pkts;
166         u32                     overlimits;
167
168         struct qdisc_watchdog   watchdog;
169
170         s64                     now;    /* cached dequeue time */
171
172         /* time of nearest event per level (row) */
173         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
174
175         int                     row_mask[TC_HTB_MAXDEPTH];
176
177         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
178
179         struct Qdisc            **direct_qdiscs;
180         unsigned int            num_direct_qdiscs;
181
182         bool                    offload;
183 };
184
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188         struct htb_sched *q = qdisc_priv(sch);
189         struct Qdisc_class_common *clc;
190
191         clc = qdisc_class_find(&q->clhash, handle);
192         if (clc == NULL)
193                 return NULL;
194         return container_of(clc, struct htb_class, common);
195 }
196
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199         return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216                                       int *qerr)
217 {
218         struct htb_sched *q = qdisc_priv(sch);
219         struct htb_class *cl;
220         struct tcf_result res;
221         struct tcf_proto *tcf;
222         int result;
223
224         /* allow to select class by setting skb->priority to valid classid;
225          * note that nfmark can be used too by attaching filter fw with no
226          * rules in it
227          */
228         if (skb->priority == sch->handle)
229                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
230         cl = htb_find(skb->priority, sch);
231         if (cl) {
232                 if (cl->level == 0)
233                         return cl;
234                 /* Start with inner filter chain if a non-leaf class is selected */
235                 tcf = rcu_dereference_bh(cl->filter_list);
236         } else {
237                 tcf = rcu_dereference_bh(q->filter_list);
238         }
239
240         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241         while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243                 switch (result) {
244                 case TC_ACT_QUEUED:
245                 case TC_ACT_STOLEN:
246                 case TC_ACT_TRAP:
247                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248                         fallthrough;
249                 case TC_ACT_SHOT:
250                         return NULL;
251                 }
252 #endif
253                 cl = (void *)res.class;
254                 if (!cl) {
255                         if (res.classid == sch->handle)
256                                 return HTB_DIRECT;      /* X:0 (direct flow) */
257                         cl = htb_find(res.classid, sch);
258                         if (!cl)
259                                 break;  /* filter selected invalid classid */
260                 }
261                 if (!cl->level)
262                         return cl;      /* we hit leaf; return it */
263
264                 /* we have got inner class; apply inner filter chain */
265                 tcf = rcu_dereference_bh(cl->filter_list);
266         }
267         /* classification failed; try to use default class */
268         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269         if (!cl || cl->level)
270                 return HTB_DIRECT;      /* bad default .. this is safe bet */
271         return cl;
272 }
273
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  *
277  * Routine adds class to the list (actually tree) sorted by classid.
278  * Make sure that class is not already on such list for given prio.
279  */
280 static void htb_add_to_id_tree(struct rb_root *root,
281                                struct htb_class *cl, int prio)
282 {
283         struct rb_node **p = &root->rb_node, *parent = NULL;
284
285         while (*p) {
286                 struct htb_class *c;
287                 parent = *p;
288                 c = rb_entry(parent, struct htb_class, node[prio]);
289
290                 if (cl->common.classid > c->common.classid)
291                         p = &parent->rb_right;
292                 else
293                         p = &parent->rb_left;
294         }
295         rb_link_node(&cl->node[prio], parent, p);
296         rb_insert_color(&cl->node[prio], root);
297 }
298
299 /**
300  * htb_add_to_wait_tree - adds class to the event queue with delay
301  *
302  * The class is added to priority event queue to indicate that class will
303  * change its mode in cl->pq_key microseconds. Make sure that class is not
304  * already in the queue.
305  */
306 static void htb_add_to_wait_tree(struct htb_sched *q,
307                                  struct htb_class *cl, s64 delay)
308 {
309         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
310
311         cl->pq_key = q->now + delay;
312         if (cl->pq_key == q->now)
313                 cl->pq_key++;
314
315         /* update the nearest event cache */
316         if (q->near_ev_cache[cl->level] > cl->pq_key)
317                 q->near_ev_cache[cl->level] = cl->pq_key;
318
319         while (*p) {
320                 struct htb_class *c;
321                 parent = *p;
322                 c = rb_entry(parent, struct htb_class, pq_node);
323                 if (cl->pq_key >= c->pq_key)
324                         p = &parent->rb_right;
325                 else
326                         p = &parent->rb_left;
327         }
328         rb_link_node(&cl->pq_node, parent, p);
329         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
330 }
331
332 /**
333  * htb_next_rb_node - finds next node in binary tree
334  *
335  * When we are past last key we return NULL.
336  * Average complexity is 2 steps per call.
337  */
338 static inline void htb_next_rb_node(struct rb_node **n)
339 {
340         *n = rb_next(*n);
341 }
342
343 /**
344  * htb_add_class_to_row - add class to its row
345  *
346  * The class is added to row at priorities marked in mask.
347  * It does nothing if mask == 0.
348  */
349 static inline void htb_add_class_to_row(struct htb_sched *q,
350                                         struct htb_class *cl, int mask)
351 {
352         q->row_mask[cl->level] |= mask;
353         while (mask) {
354                 int prio = ffz(~mask);
355                 mask &= ~(1 << prio);
356                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
357         }
358 }
359
360 /* If this triggers, it is a bug in this code, but it need not be fatal */
361 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
362 {
363         if (RB_EMPTY_NODE(rb)) {
364                 WARN_ON(1);
365         } else {
366                 rb_erase(rb, root);
367                 RB_CLEAR_NODE(rb);
368         }
369 }
370
371
372 /**
373  * htb_remove_class_from_row - removes class from its row
374  *
375  * The class is removed from row at priorities marked in mask.
376  * It does nothing if mask == 0.
377  */
378 static inline void htb_remove_class_from_row(struct htb_sched *q,
379                                                  struct htb_class *cl, int mask)
380 {
381         int m = 0;
382         struct htb_level *hlevel = &q->hlevel[cl->level];
383
384         while (mask) {
385                 int prio = ffz(~mask);
386                 struct htb_prio *hprio = &hlevel->hprio[prio];
387
388                 mask &= ~(1 << prio);
389                 if (hprio->ptr == cl->node + prio)
390                         htb_next_rb_node(&hprio->ptr);
391
392                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
393                 if (!hprio->row.rb_node)
394                         m |= 1 << prio;
395         }
396         q->row_mask[cl->level] &= ~m;
397 }
398
399 /**
400  * htb_activate_prios - creates active classe's feed chain
401  *
402  * The class is connected to ancestors and/or appropriate rows
403  * for priorities it is participating on. cl->cmode must be new
404  * (activated) mode. It does nothing if cl->prio_activity == 0.
405  */
406 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
407 {
408         struct htb_class *p = cl->parent;
409         long m, mask = cl->prio_activity;
410
411         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
412                 m = mask;
413                 while (m) {
414                         int prio = ffz(~m);
415                         m &= ~(1 << prio);
416
417                         if (p->inner.clprio[prio].feed.rb_node)
418                                 /* parent already has its feed in use so that
419                                  * reset bit in mask as parent is already ok
420                                  */
421                                 mask &= ~(1 << prio);
422
423                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
424                 }
425                 p->prio_activity |= mask;
426                 cl = p;
427                 p = cl->parent;
428
429         }
430         if (cl->cmode == HTB_CAN_SEND && mask)
431                 htb_add_class_to_row(q, cl, mask);
432 }
433
434 /**
435  * htb_deactivate_prios - remove class from feed chain
436  *
437  * cl->cmode must represent old mode (before deactivation). It does
438  * nothing if cl->prio_activity == 0. Class is removed from all feed
439  * chains and rows.
440  */
441 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
442 {
443         struct htb_class *p = cl->parent;
444         long m, mask = cl->prio_activity;
445
446         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
447                 m = mask;
448                 mask = 0;
449                 while (m) {
450                         int prio = ffz(~m);
451                         m &= ~(1 << prio);
452
453                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
454                                 /* we are removing child which is pointed to from
455                                  * parent feed - forget the pointer but remember
456                                  * classid
457                                  */
458                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
459                                 p->inner.clprio[prio].ptr = NULL;
460                         }
461
462                         htb_safe_rb_erase(cl->node + prio,
463                                           &p->inner.clprio[prio].feed);
464
465                         if (!p->inner.clprio[prio].feed.rb_node)
466                                 mask |= 1 << prio;
467                 }
468
469                 p->prio_activity &= ~mask;
470                 cl = p;
471                 p = cl->parent;
472
473         }
474         if (cl->cmode == HTB_CAN_SEND && mask)
475                 htb_remove_class_from_row(q, cl, mask);
476 }
477
478 static inline s64 htb_lowater(const struct htb_class *cl)
479 {
480         if (htb_hysteresis)
481                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
482         else
483                 return 0;
484 }
485 static inline s64 htb_hiwater(const struct htb_class *cl)
486 {
487         if (htb_hysteresis)
488                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
489         else
490                 return 0;
491 }
492
493
494 /**
495  * htb_class_mode - computes and returns current class mode
496  *
497  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
498  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
499  * from now to time when cl will change its state.
500  * Also it is worth to note that class mode doesn't change simply
501  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
502  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
503  * mode transitions per time unit. The speed gain is about 1/6.
504  */
505 static inline enum htb_cmode
506 htb_class_mode(struct htb_class *cl, s64 *diff)
507 {
508         s64 toks;
509
510         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
511                 *diff = -toks;
512                 return HTB_CANT_SEND;
513         }
514
515         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
516                 return HTB_CAN_SEND;
517
518         *diff = -toks;
519         return HTB_MAY_BORROW;
520 }
521
522 /**
523  * htb_change_class_mode - changes classe's mode
524  *
525  * This should be the only way how to change classe's mode under normal
526  * cirsumstances. Routine will update feed lists linkage, change mode
527  * and add class to the wait event queue if appropriate. New mode should
528  * be different from old one and cl->pq_key has to be valid if changing
529  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
530  */
531 static void
532 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
533 {
534         enum htb_cmode new_mode = htb_class_mode(cl, diff);
535
536         if (new_mode == cl->cmode)
537                 return;
538
539         if (new_mode == HTB_CANT_SEND) {
540                 cl->overlimits++;
541                 q->overlimits++;
542         }
543
544         if (cl->prio_activity) {        /* not necessary: speed optimization */
545                 if (cl->cmode != HTB_CANT_SEND)
546                         htb_deactivate_prios(q, cl);
547                 cl->cmode = new_mode;
548                 if (new_mode != HTB_CANT_SEND)
549                         htb_activate_prios(q, cl);
550         } else
551                 cl->cmode = new_mode;
552 }
553
554 /**
555  * htb_activate - inserts leaf cl into appropriate active feeds
556  *
557  * Routine learns (new) priority of leaf and activates feed chain
558  * for the prio. It can be called on already active leaf safely.
559  * It also adds leaf into droplist.
560  */
561 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
562 {
563         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
564
565         if (!cl->prio_activity) {
566                 cl->prio_activity = 1 << cl->prio;
567                 htb_activate_prios(q, cl);
568         }
569 }
570
571 /**
572  * htb_deactivate - remove leaf cl from active feeds
573  *
574  * Make sure that leaf is active. In the other words it can't be called
575  * with non-active leaf. It also removes class from the drop list.
576  */
577 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
578 {
579         WARN_ON(!cl->prio_activity);
580
581         htb_deactivate_prios(q, cl);
582         cl->prio_activity = 0;
583 }
584
585 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
586                        struct sk_buff **to_free)
587 {
588         int ret;
589         unsigned int len = qdisc_pkt_len(skb);
590         struct htb_sched *q = qdisc_priv(sch);
591         struct htb_class *cl = htb_classify(skb, sch, &ret);
592
593         if (cl == HTB_DIRECT) {
594                 /* enqueue to helper queue */
595                 if (q->direct_queue.qlen < q->direct_qlen) {
596                         __qdisc_enqueue_tail(skb, &q->direct_queue);
597                         q->direct_pkts++;
598                 } else {
599                         return qdisc_drop(skb, sch, to_free);
600                 }
601 #ifdef CONFIG_NET_CLS_ACT
602         } else if (!cl) {
603                 if (ret & __NET_XMIT_BYPASS)
604                         qdisc_qstats_drop(sch);
605                 __qdisc_drop(skb, to_free);
606                 return ret;
607 #endif
608         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
609                                         to_free)) != NET_XMIT_SUCCESS) {
610                 if (net_xmit_drop_count(ret)) {
611                         qdisc_qstats_drop(sch);
612                         cl->drops++;
613                 }
614                 return ret;
615         } else {
616                 htb_activate(q, cl);
617         }
618
619         sch->qstats.backlog += len;
620         sch->q.qlen++;
621         return NET_XMIT_SUCCESS;
622 }
623
624 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
625 {
626         s64 toks = diff + cl->tokens;
627
628         if (toks > cl->buffer)
629                 toks = cl->buffer;
630         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
631         if (toks <= -cl->mbuffer)
632                 toks = 1 - cl->mbuffer;
633
634         cl->tokens = toks;
635 }
636
637 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
638 {
639         s64 toks = diff + cl->ctokens;
640
641         if (toks > cl->cbuffer)
642                 toks = cl->cbuffer;
643         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
644         if (toks <= -cl->mbuffer)
645                 toks = 1 - cl->mbuffer;
646
647         cl->ctokens = toks;
648 }
649
650 /**
651  * htb_charge_class - charges amount "bytes" to leaf and ancestors
652  *
653  * Routine assumes that packet "bytes" long was dequeued from leaf cl
654  * borrowing from "level". It accounts bytes to ceil leaky bucket for
655  * leaf and all ancestors and to rate bucket for ancestors at levels
656  * "level" and higher. It also handles possible change of mode resulting
657  * from the update. Note that mode can also increase here (MAY_BORROW to
658  * CAN_SEND) because we can use more precise clock that event queue here.
659  * In such case we remove class from event queue first.
660  */
661 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
662                              int level, struct sk_buff *skb)
663 {
664         int bytes = qdisc_pkt_len(skb);
665         enum htb_cmode old_mode;
666         s64 diff;
667
668         while (cl) {
669                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
670                 if (cl->level >= level) {
671                         if (cl->level == level)
672                                 cl->xstats.lends++;
673                         htb_accnt_tokens(cl, bytes, diff);
674                 } else {
675                         cl->xstats.borrows++;
676                         cl->tokens += diff;     /* we moved t_c; update tokens */
677                 }
678                 htb_accnt_ctokens(cl, bytes, diff);
679                 cl->t_c = q->now;
680
681                 old_mode = cl->cmode;
682                 diff = 0;
683                 htb_change_class_mode(q, cl, &diff);
684                 if (old_mode != cl->cmode) {
685                         if (old_mode != HTB_CAN_SEND)
686                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
687                         if (cl->cmode != HTB_CAN_SEND)
688                                 htb_add_to_wait_tree(q, cl, diff);
689                 }
690
691                 /* update basic stats except for leaves which are already updated */
692                 if (cl->level)
693                         bstats_update(&cl->bstats, skb);
694
695                 cl = cl->parent;
696         }
697 }
698
699 /**
700  * htb_do_events - make mode changes to classes at the level
701  *
702  * Scans event queue for pending events and applies them. Returns time of
703  * next pending event (0 for no event in pq, q->now for too many events).
704  * Note: Applied are events whose have cl->pq_key <= q->now.
705  */
706 static s64 htb_do_events(struct htb_sched *q, const int level,
707                          unsigned long start)
708 {
709         /* don't run for longer than 2 jiffies; 2 is used instead of
710          * 1 to simplify things when jiffy is going to be incremented
711          * too soon
712          */
713         unsigned long stop_at = start + 2;
714         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
715
716         while (time_before(jiffies, stop_at)) {
717                 struct htb_class *cl;
718                 s64 diff;
719                 struct rb_node *p = rb_first(wait_pq);
720
721                 if (!p)
722                         return 0;
723
724                 cl = rb_entry(p, struct htb_class, pq_node);
725                 if (cl->pq_key > q->now)
726                         return cl->pq_key;
727
728                 htb_safe_rb_erase(p, wait_pq);
729                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
730                 htb_change_class_mode(q, cl, &diff);
731                 if (cl->cmode != HTB_CAN_SEND)
732                         htb_add_to_wait_tree(q, cl, diff);
733         }
734
735         /* too much load - let's continue after a break for scheduling */
736         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
737                 pr_warn("htb: too many events!\n");
738                 q->warned |= HTB_WARN_TOOMANYEVENTS;
739         }
740
741         return q->now;
742 }
743
744 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
745  * is no such one exists.
746  */
747 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
748                                               u32 id)
749 {
750         struct rb_node *r = NULL;
751         while (n) {
752                 struct htb_class *cl =
753                     rb_entry(n, struct htb_class, node[prio]);
754
755                 if (id > cl->common.classid) {
756                         n = n->rb_right;
757                 } else if (id < cl->common.classid) {
758                         r = n;
759                         n = n->rb_left;
760                 } else {
761                         return n;
762                 }
763         }
764         return r;
765 }
766
767 /**
768  * htb_lookup_leaf - returns next leaf class in DRR order
769  *
770  * Find leaf where current feed pointers points to.
771  */
772 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
773 {
774         int i;
775         struct {
776                 struct rb_node *root;
777                 struct rb_node **pptr;
778                 u32 *pid;
779         } stk[TC_HTB_MAXDEPTH], *sp = stk;
780
781         BUG_ON(!hprio->row.rb_node);
782         sp->root = hprio->row.rb_node;
783         sp->pptr = &hprio->ptr;
784         sp->pid = &hprio->last_ptr_id;
785
786         for (i = 0; i < 65535; i++) {
787                 if (!*sp->pptr && *sp->pid) {
788                         /* ptr was invalidated but id is valid - try to recover
789                          * the original or next ptr
790                          */
791                         *sp->pptr =
792                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
793                 }
794                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
795                                  * can become out of date quickly
796                                  */
797                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
798                         *sp->pptr = sp->root;
799                         while ((*sp->pptr)->rb_left)
800                                 *sp->pptr = (*sp->pptr)->rb_left;
801                         if (sp > stk) {
802                                 sp--;
803                                 if (!*sp->pptr) {
804                                         WARN_ON(1);
805                                         return NULL;
806                                 }
807                                 htb_next_rb_node(sp->pptr);
808                         }
809                 } else {
810                         struct htb_class *cl;
811                         struct htb_prio *clp;
812
813                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
814                         if (!cl->level)
815                                 return cl;
816                         clp = &cl->inner.clprio[prio];
817                         (++sp)->root = clp->feed.rb_node;
818                         sp->pptr = &clp->ptr;
819                         sp->pid = &clp->last_ptr_id;
820                 }
821         }
822         WARN_ON(1);
823         return NULL;
824 }
825
826 /* dequeues packet at given priority and level; call only if
827  * you are sure that there is active class at prio/level
828  */
829 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
830                                         const int level)
831 {
832         struct sk_buff *skb = NULL;
833         struct htb_class *cl, *start;
834         struct htb_level *hlevel = &q->hlevel[level];
835         struct htb_prio *hprio = &hlevel->hprio[prio];
836
837         /* look initial class up in the row */
838         start = cl = htb_lookup_leaf(hprio, prio);
839
840         do {
841 next:
842                 if (unlikely(!cl))
843                         return NULL;
844
845                 /* class can be empty - it is unlikely but can be true if leaf
846                  * qdisc drops packets in enqueue routine or if someone used
847                  * graft operation on the leaf since last dequeue;
848                  * simply deactivate and skip such class
849                  */
850                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
851                         struct htb_class *next;
852                         htb_deactivate(q, cl);
853
854                         /* row/level might become empty */
855                         if ((q->row_mask[level] & (1 << prio)) == 0)
856                                 return NULL;
857
858                         next = htb_lookup_leaf(hprio, prio);
859
860                         if (cl == start)        /* fix start if we just deleted it */
861                                 start = next;
862                         cl = next;
863                         goto next;
864                 }
865
866                 skb = cl->leaf.q->dequeue(cl->leaf.q);
867                 if (likely(skb != NULL))
868                         break;
869
870                 qdisc_warn_nonwc("htb", cl->leaf.q);
871                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
872                                          &q->hlevel[0].hprio[prio].ptr);
873                 cl = htb_lookup_leaf(hprio, prio);
874
875         } while (cl != start);
876
877         if (likely(skb != NULL)) {
878                 bstats_update(&cl->bstats, skb);
879                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
880                 if (cl->leaf.deficit[level] < 0) {
881                         cl->leaf.deficit[level] += cl->quantum;
882                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
883                                                  &q->hlevel[0].hprio[prio].ptr);
884                 }
885                 /* this used to be after charge_class but this constelation
886                  * gives us slightly better performance
887                  */
888                 if (!cl->leaf.q->q.qlen)
889                         htb_deactivate(q, cl);
890                 htb_charge_class(q, cl, level, skb);
891         }
892         return skb;
893 }
894
895 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
896 {
897         struct sk_buff *skb;
898         struct htb_sched *q = qdisc_priv(sch);
899         int level;
900         s64 next_event;
901         unsigned long start_at;
902
903         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
904         skb = __qdisc_dequeue_head(&q->direct_queue);
905         if (skb != NULL) {
906 ok:
907                 qdisc_bstats_update(sch, skb);
908                 qdisc_qstats_backlog_dec(sch, skb);
909                 sch->q.qlen--;
910                 return skb;
911         }
912
913         if (!sch->q.qlen)
914                 goto fin;
915         q->now = ktime_get_ns();
916         start_at = jiffies;
917
918         next_event = q->now + 5LLU * NSEC_PER_SEC;
919
920         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
921                 /* common case optimization - skip event handler quickly */
922                 int m;
923                 s64 event = q->near_ev_cache[level];
924
925                 if (q->now >= event) {
926                         event = htb_do_events(q, level, start_at);
927                         if (!event)
928                                 event = q->now + NSEC_PER_SEC;
929                         q->near_ev_cache[level] = event;
930                 }
931
932                 if (next_event > event)
933                         next_event = event;
934
935                 m = ~q->row_mask[level];
936                 while (m != (int)(-1)) {
937                         int prio = ffz(m);
938
939                         m |= 1 << prio;
940                         skb = htb_dequeue_tree(q, prio, level);
941                         if (likely(skb != NULL))
942                                 goto ok;
943                 }
944         }
945         if (likely(next_event > q->now))
946                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
947         else
948                 schedule_work(&q->work);
949 fin:
950         return skb;
951 }
952
953 /* reset all classes */
954 /* always caled under BH & queue lock */
955 static void htb_reset(struct Qdisc *sch)
956 {
957         struct htb_sched *q = qdisc_priv(sch);
958         struct htb_class *cl;
959         unsigned int i;
960
961         for (i = 0; i < q->clhash.hashsize; i++) {
962                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
963                         if (cl->level)
964                                 memset(&cl->inner, 0, sizeof(cl->inner));
965                         else {
966                                 if (cl->leaf.q && !q->offload)
967                                         qdisc_reset(cl->leaf.q);
968                         }
969                         cl->prio_activity = 0;
970                         cl->cmode = HTB_CAN_SEND;
971                 }
972         }
973         qdisc_watchdog_cancel(&q->watchdog);
974         __qdisc_reset_queue(&q->direct_queue);
975         sch->q.qlen = 0;
976         sch->qstats.backlog = 0;
977         memset(q->hlevel, 0, sizeof(q->hlevel));
978         memset(q->row_mask, 0, sizeof(q->row_mask));
979 }
980
981 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
982         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
983         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
984         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
985         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
986         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
987         [TCA_HTB_RATE64] = { .type = NLA_U64 },
988         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
989         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
990 };
991
992 static void htb_work_func(struct work_struct *work)
993 {
994         struct htb_sched *q = container_of(work, struct htb_sched, work);
995         struct Qdisc *sch = q->watchdog.qdisc;
996
997         rcu_read_lock();
998         __netif_schedule(qdisc_root(sch));
999         rcu_read_unlock();
1000 }
1001
1002 static void htb_set_lockdep_class_child(struct Qdisc *q)
1003 {
1004         static struct lock_class_key child_key;
1005
1006         lockdep_set_class(qdisc_lock(q), &child_key);
1007 }
1008
1009 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1010 {
1011         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1012 }
1013
1014 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1015                     struct netlink_ext_ack *extack)
1016 {
1017         struct net_device *dev = qdisc_dev(sch);
1018         struct tc_htb_qopt_offload offload_opt;
1019         struct htb_sched *q = qdisc_priv(sch);
1020         struct nlattr *tb[TCA_HTB_MAX + 1];
1021         struct tc_htb_glob *gopt;
1022         unsigned int ntx;
1023         bool offload;
1024         int err;
1025
1026         qdisc_watchdog_init(&q->watchdog, sch);
1027         INIT_WORK(&q->work, htb_work_func);
1028
1029         if (!opt)
1030                 return -EINVAL;
1031
1032         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1033         if (err)
1034                 return err;
1035
1036         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1037                                           NULL);
1038         if (err < 0)
1039                 return err;
1040
1041         if (!tb[TCA_HTB_INIT])
1042                 return -EINVAL;
1043
1044         gopt = nla_data(tb[TCA_HTB_INIT]);
1045         if (gopt->version != HTB_VER >> 16)
1046                 return -EINVAL;
1047
1048         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1049
1050         if (offload) {
1051                 if (sch->parent != TC_H_ROOT)
1052                         return -EOPNOTSUPP;
1053
1054                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1055                         return -EOPNOTSUPP;
1056
1057                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1058                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1059                                            sizeof(*q->direct_qdiscs),
1060                                            GFP_KERNEL);
1061                 if (!q->direct_qdiscs)
1062                         return -ENOMEM;
1063         }
1064
1065         err = qdisc_class_hash_init(&q->clhash);
1066         if (err < 0)
1067                 goto err_free_direct_qdiscs;
1068
1069         qdisc_skb_head_init(&q->direct_queue);
1070
1071         if (tb[TCA_HTB_DIRECT_QLEN])
1072                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1073         else
1074                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1075
1076         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1077                 q->rate2quantum = 1;
1078         q->defcls = gopt->defcls;
1079
1080         if (!offload)
1081                 return 0;
1082
1083         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1084                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1085                 struct Qdisc *qdisc;
1086
1087                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1088                                           TC_H_MAKE(sch->handle, 0), extack);
1089                 if (!qdisc) {
1090                         err = -ENOMEM;
1091                         goto err_free_qdiscs;
1092                 }
1093
1094                 htb_set_lockdep_class_child(qdisc);
1095                 q->direct_qdiscs[ntx] = qdisc;
1096                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1097         }
1098
1099         sch->flags |= TCQ_F_MQROOT;
1100
1101         offload_opt = (struct tc_htb_qopt_offload) {
1102                 .command = TC_HTB_CREATE,
1103                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1104                 .classid = TC_H_MIN(q->defcls),
1105                 .extack = extack,
1106         };
1107         err = htb_offload(dev, &offload_opt);
1108         if (err)
1109                 goto err_free_qdiscs;
1110
1111         /* Defer this assignment, so that htb_destroy skips offload-related
1112          * parts (especially calling ndo_setup_tc) on errors.
1113          */
1114         q->offload = true;
1115
1116         return 0;
1117
1118 err_free_qdiscs:
1119         for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1120              ntx++)
1121                 qdisc_put(q->direct_qdiscs[ntx]);
1122
1123         qdisc_class_hash_destroy(&q->clhash);
1124         /* Prevent use-after-free and double-free when htb_destroy gets called.
1125          */
1126         q->clhash.hash = NULL;
1127         q->clhash.hashsize = 0;
1128
1129 err_free_direct_qdiscs:
1130         kfree(q->direct_qdiscs);
1131         q->direct_qdiscs = NULL;
1132         return err;
1133 }
1134
1135 static void htb_attach_offload(struct Qdisc *sch)
1136 {
1137         struct net_device *dev = qdisc_dev(sch);
1138         struct htb_sched *q = qdisc_priv(sch);
1139         unsigned int ntx;
1140
1141         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1142                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1143
1144                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1145                 qdisc_put(old);
1146                 qdisc_hash_add(qdisc, false);
1147         }
1148         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1149                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1150                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1151
1152                 qdisc_put(old);
1153         }
1154
1155         kfree(q->direct_qdiscs);
1156         q->direct_qdiscs = NULL;
1157 }
1158
1159 static void htb_attach_software(struct Qdisc *sch)
1160 {
1161         struct net_device *dev = qdisc_dev(sch);
1162         unsigned int ntx;
1163
1164         /* Resemble qdisc_graft behavior. */
1165         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1166                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1167                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1168
1169                 qdisc_refcount_inc(sch);
1170
1171                 qdisc_put(old);
1172         }
1173 }
1174
1175 static void htb_attach(struct Qdisc *sch)
1176 {
1177         struct htb_sched *q = qdisc_priv(sch);
1178
1179         if (q->offload)
1180                 htb_attach_offload(sch);
1181         else
1182                 htb_attach_software(sch);
1183 }
1184
1185 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1186 {
1187         struct htb_sched *q = qdisc_priv(sch);
1188         struct nlattr *nest;
1189         struct tc_htb_glob gopt;
1190
1191         if (q->offload)
1192                 sch->flags |= TCQ_F_OFFLOADED;
1193         else
1194                 sch->flags &= ~TCQ_F_OFFLOADED;
1195
1196         sch->qstats.overlimits = q->overlimits;
1197         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1198          * no change can happen on the qdisc parameters.
1199          */
1200
1201         gopt.direct_pkts = q->direct_pkts;
1202         gopt.version = HTB_VER;
1203         gopt.rate2quantum = q->rate2quantum;
1204         gopt.defcls = q->defcls;
1205         gopt.debug = 0;
1206
1207         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1208         if (nest == NULL)
1209                 goto nla_put_failure;
1210         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1211             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1212                 goto nla_put_failure;
1213         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1214                 goto nla_put_failure;
1215
1216         return nla_nest_end(skb, nest);
1217
1218 nla_put_failure:
1219         nla_nest_cancel(skb, nest);
1220         return -1;
1221 }
1222
1223 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1224                           struct sk_buff *skb, struct tcmsg *tcm)
1225 {
1226         struct htb_class *cl = (struct htb_class *)arg;
1227         struct htb_sched *q = qdisc_priv(sch);
1228         struct nlattr *nest;
1229         struct tc_htb_opt opt;
1230
1231         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1232          * no change can happen on the class parameters.
1233          */
1234         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1235         tcm->tcm_handle = cl->common.classid;
1236         if (!cl->level && cl->leaf.q)
1237                 tcm->tcm_info = cl->leaf.q->handle;
1238
1239         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1240         if (nest == NULL)
1241                 goto nla_put_failure;
1242
1243         memset(&opt, 0, sizeof(opt));
1244
1245         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1246         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1247         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1248         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1249         opt.quantum = cl->quantum;
1250         opt.prio = cl->prio;
1251         opt.level = cl->level;
1252         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1253                 goto nla_put_failure;
1254         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1255                 goto nla_put_failure;
1256         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1257             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1258                               TCA_HTB_PAD))
1259                 goto nla_put_failure;
1260         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1261             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1262                               TCA_HTB_PAD))
1263                 goto nla_put_failure;
1264
1265         return nla_nest_end(skb, nest);
1266
1267 nla_put_failure:
1268         nla_nest_cancel(skb, nest);
1269         return -1;
1270 }
1271
1272 static void htb_offload_aggregate_stats(struct htb_sched *q,
1273                                         struct htb_class *cl)
1274 {
1275         struct htb_class *c;
1276         unsigned int i;
1277
1278         memset(&cl->bstats, 0, sizeof(cl->bstats));
1279
1280         for (i = 0; i < q->clhash.hashsize; i++) {
1281                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1282                         struct htb_class *p = c;
1283
1284                         while (p && p->level < cl->level)
1285                                 p = p->parent;
1286
1287                         if (p != cl)
1288                                 continue;
1289
1290                         cl->bstats.bytes += c->bstats_bias.bytes;
1291                         cl->bstats.packets += c->bstats_bias.packets;
1292                         if (c->level == 0) {
1293                                 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1294                                 cl->bstats.packets += c->leaf.q->bstats.packets;
1295                         }
1296                 }
1297         }
1298 }
1299
1300 static int
1301 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1302 {
1303         struct htb_class *cl = (struct htb_class *)arg;
1304         struct htb_sched *q = qdisc_priv(sch);
1305         struct gnet_stats_queue qs = {
1306                 .drops = cl->drops,
1307                 .overlimits = cl->overlimits,
1308         };
1309         __u32 qlen = 0;
1310
1311         if (!cl->level && cl->leaf.q)
1312                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1313
1314         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1315                                     INT_MIN, INT_MAX);
1316         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1317                                      INT_MIN, INT_MAX);
1318
1319         if (q->offload) {
1320                 if (!cl->level) {
1321                         if (cl->leaf.q)
1322                                 cl->bstats = cl->leaf.q->bstats;
1323                         else
1324                                 memset(&cl->bstats, 0, sizeof(cl->bstats));
1325                         cl->bstats.bytes += cl->bstats_bias.bytes;
1326                         cl->bstats.packets += cl->bstats_bias.packets;
1327                 } else {
1328                         htb_offload_aggregate_stats(q, cl);
1329                 }
1330         }
1331
1332         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1333                                   d, NULL, &cl->bstats) < 0 ||
1334             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1335             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1336                 return -1;
1337
1338         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1339 }
1340
1341 static struct netdev_queue *
1342 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1343 {
1344         struct net_device *dev = qdisc_dev(sch);
1345         struct tc_htb_qopt_offload offload_opt;
1346         struct htb_sched *q = qdisc_priv(sch);
1347         int err;
1348
1349         if (!q->offload)
1350                 return sch->dev_queue;
1351
1352         offload_opt = (struct tc_htb_qopt_offload) {
1353                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1354                 .classid = TC_H_MIN(tcm->tcm_parent),
1355         };
1356         err = htb_offload(dev, &offload_opt);
1357         if (err || offload_opt.qid >= dev->num_tx_queues)
1358                 return NULL;
1359         return netdev_get_tx_queue(dev, offload_opt.qid);
1360 }
1361
1362 static struct Qdisc *
1363 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1364 {
1365         struct net_device *dev = dev_queue->dev;
1366         struct Qdisc *old_q;
1367
1368         if (dev->flags & IFF_UP)
1369                 dev_deactivate(dev);
1370         old_q = dev_graft_qdisc(dev_queue, new_q);
1371         if (new_q)
1372                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1373         if (dev->flags & IFF_UP)
1374                 dev_activate(dev);
1375
1376         return old_q;
1377 }
1378
1379 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1380 {
1381         struct netdev_queue *queue_old, *queue_new;
1382         struct net_device *dev = qdisc_dev(sch);
1383         struct Qdisc *qdisc;
1384
1385         queue_old = netdev_get_tx_queue(dev, qid_old);
1386         queue_new = netdev_get_tx_queue(dev, qid_new);
1387
1388         if (dev->flags & IFF_UP)
1389                 dev_deactivate(dev);
1390         qdisc = dev_graft_qdisc(queue_old, NULL);
1391         qdisc->dev_queue = queue_new;
1392         qdisc = dev_graft_qdisc(queue_new, qdisc);
1393         if (dev->flags & IFF_UP)
1394                 dev_activate(dev);
1395
1396         WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1397 }
1398
1399 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1400                      struct Qdisc **old, struct netlink_ext_ack *extack)
1401 {
1402         struct netdev_queue *dev_queue = sch->dev_queue;
1403         struct htb_class *cl = (struct htb_class *)arg;
1404         struct htb_sched *q = qdisc_priv(sch);
1405         struct Qdisc *old_q;
1406
1407         if (cl->level)
1408                 return -EINVAL;
1409
1410         if (q->offload) {
1411                 dev_queue = new->dev_queue;
1412                 WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1413         }
1414
1415         if (!new) {
1416                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1417                                         cl->common.classid, extack);
1418                 if (!new)
1419                         return -ENOBUFS;
1420         }
1421
1422         if (q->offload) {
1423                 htb_set_lockdep_class_child(new);
1424                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1425                 qdisc_refcount_inc(new);
1426                 old_q = htb_graft_helper(dev_queue, new);
1427         }
1428
1429         *old = qdisc_replace(sch, new, &cl->leaf.q);
1430
1431         if (q->offload) {
1432                 WARN_ON(old_q != *old);
1433                 qdisc_put(old_q);
1434         }
1435
1436         return 0;
1437 }
1438
1439 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1440 {
1441         struct htb_class *cl = (struct htb_class *)arg;
1442         return !cl->level ? cl->leaf.q : NULL;
1443 }
1444
1445 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1446 {
1447         struct htb_class *cl = (struct htb_class *)arg;
1448
1449         htb_deactivate(qdisc_priv(sch), cl);
1450 }
1451
1452 static inline int htb_parent_last_child(struct htb_class *cl)
1453 {
1454         if (!cl->parent)
1455                 /* the root class */
1456                 return 0;
1457         if (cl->parent->children > 1)
1458                 /* not the last child */
1459                 return 0;
1460         return 1;
1461 }
1462
1463 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1464                                struct Qdisc *new_q)
1465 {
1466         struct htb_sched *q = qdisc_priv(sch);
1467         struct htb_class *parent = cl->parent;
1468
1469         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1470
1471         if (parent->cmode != HTB_CAN_SEND)
1472                 htb_safe_rb_erase(&parent->pq_node,
1473                                   &q->hlevel[parent->level].wait_pq);
1474
1475         parent->level = 0;
1476         memset(&parent->inner, 0, sizeof(parent->inner));
1477         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1478         parent->tokens = parent->buffer;
1479         parent->ctokens = parent->cbuffer;
1480         parent->t_c = ktime_get_ns();
1481         parent->cmode = HTB_CAN_SEND;
1482 }
1483
1484 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1485                                        struct netdev_queue *dev_queue,
1486                                        struct Qdisc *new_q)
1487 {
1488         struct Qdisc *old_q;
1489
1490         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1491         if (new_q)
1492                 qdisc_refcount_inc(new_q);
1493         old_q = htb_graft_helper(dev_queue, new_q);
1494         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1495 }
1496
1497 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1498                                      bool last_child, bool destroying,
1499                                      struct netlink_ext_ack *extack)
1500 {
1501         struct tc_htb_qopt_offload offload_opt;
1502         struct Qdisc *q = cl->leaf.q;
1503         struct Qdisc *old = NULL;
1504         int err;
1505
1506         if (cl->level)
1507                 return -EINVAL;
1508
1509         WARN_ON(!q);
1510         if (!destroying) {
1511                 /* On destroy of HTB, two cases are possible:
1512                  * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1513                  * 2. q is a noop qdisc (for nodes that were inner),
1514                  *    q->dev_queue is noop_netdev_queue.
1515                  */
1516                 old = htb_graft_helper(q->dev_queue, NULL);
1517                 WARN_ON(!old);
1518                 WARN_ON(old != q);
1519         }
1520
1521         if (cl->parent) {
1522                 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1523                 cl->parent->bstats_bias.packets += q->bstats.packets;
1524         }
1525
1526         offload_opt = (struct tc_htb_qopt_offload) {
1527                 .command = !last_child ? TC_HTB_LEAF_DEL :
1528                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1529                            TC_HTB_LEAF_DEL_LAST,
1530                 .classid = cl->common.classid,
1531                 .extack = extack,
1532         };
1533         err = htb_offload(qdisc_dev(sch), &offload_opt);
1534
1535         if (!err || destroying)
1536                 qdisc_put(old);
1537         else
1538                 htb_graft_helper(q->dev_queue, old);
1539
1540         if (last_child)
1541                 return err;
1542
1543         if (!err && offload_opt.moved_qid != 0) {
1544                 if (destroying)
1545                         q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1546                                                            offload_opt.qid);
1547                 else
1548                         htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1549                                                offload_opt.qid);
1550         }
1551
1552         return err;
1553 }
1554
1555 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1556 {
1557         if (!cl->level) {
1558                 WARN_ON(!cl->leaf.q);
1559                 qdisc_put(cl->leaf.q);
1560         }
1561         gen_kill_estimator(&cl->rate_est);
1562         tcf_block_put(cl->block);
1563         kfree(cl);
1564 }
1565
1566 static void htb_destroy(struct Qdisc *sch)
1567 {
1568         struct net_device *dev = qdisc_dev(sch);
1569         struct tc_htb_qopt_offload offload_opt;
1570         struct htb_sched *q = qdisc_priv(sch);
1571         struct hlist_node *next;
1572         bool nonempty, changed;
1573         struct htb_class *cl;
1574         unsigned int i;
1575
1576         cancel_work_sync(&q->work);
1577         qdisc_watchdog_cancel(&q->watchdog);
1578         /* This line used to be after htb_destroy_class call below
1579          * and surprisingly it worked in 2.4. But it must precede it
1580          * because filter need its target class alive to be able to call
1581          * unbind_filter on it (without Oops).
1582          */
1583         tcf_block_put(q->block);
1584
1585         for (i = 0; i < q->clhash.hashsize; i++) {
1586                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1587                         tcf_block_put(cl->block);
1588                         cl->block = NULL;
1589                 }
1590         }
1591
1592         do {
1593                 nonempty = false;
1594                 changed = false;
1595                 for (i = 0; i < q->clhash.hashsize; i++) {
1596                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1597                                                   common.hnode) {
1598                                 bool last_child;
1599
1600                                 if (!q->offload) {
1601                                         htb_destroy_class(sch, cl);
1602                                         continue;
1603                                 }
1604
1605                                 nonempty = true;
1606
1607                                 if (cl->level)
1608                                         continue;
1609
1610                                 changed = true;
1611
1612                                 last_child = htb_parent_last_child(cl);
1613                                 htb_destroy_class_offload(sch, cl, last_child,
1614                                                           true, NULL);
1615                                 qdisc_class_hash_remove(&q->clhash,
1616                                                         &cl->common);
1617                                 if (cl->parent)
1618                                         cl->parent->children--;
1619                                 if (last_child)
1620                                         htb_parent_to_leaf(sch, cl, NULL);
1621                                 htb_destroy_class(sch, cl);
1622                         }
1623                 }
1624         } while (changed);
1625         WARN_ON(nonempty);
1626
1627         qdisc_class_hash_destroy(&q->clhash);
1628         __qdisc_reset_queue(&q->direct_queue);
1629
1630         if (!q->offload)
1631                 return;
1632
1633         offload_opt = (struct tc_htb_qopt_offload) {
1634                 .command = TC_HTB_DESTROY,
1635         };
1636         htb_offload(dev, &offload_opt);
1637
1638         if (!q->direct_qdiscs)
1639                 return;
1640         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1641                 qdisc_put(q->direct_qdiscs[i]);
1642         kfree(q->direct_qdiscs);
1643 }
1644
1645 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1646                       struct netlink_ext_ack *extack)
1647 {
1648         struct htb_sched *q = qdisc_priv(sch);
1649         struct htb_class *cl = (struct htb_class *)arg;
1650         struct Qdisc *new_q = NULL;
1651         int last_child = 0;
1652         int err;
1653
1654         /* TODO: why don't allow to delete subtree ? references ? does
1655          * tc subsys guarantee us that in htb_destroy it holds no class
1656          * refs so that we can remove children safely there ?
1657          */
1658         if (cl->children || cl->filter_cnt)
1659                 return -EBUSY;
1660
1661         if (!cl->level && htb_parent_last_child(cl))
1662                 last_child = 1;
1663
1664         if (q->offload) {
1665                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1666                                                 extack);
1667                 if (err)
1668                         return err;
1669         }
1670
1671         if (last_child) {
1672                 struct netdev_queue *dev_queue;
1673
1674                 dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1675                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1676                                           cl->parent->common.classid,
1677                                           NULL);
1678                 if (q->offload) {
1679                         if (new_q)
1680                                 htb_set_lockdep_class_child(new_q);
1681                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1682                 }
1683         }
1684
1685         sch_tree_lock(sch);
1686
1687         if (!cl->level)
1688                 qdisc_purge_queue(cl->leaf.q);
1689
1690         /* delete from hash and active; remainder in destroy_class */
1691         qdisc_class_hash_remove(&q->clhash, &cl->common);
1692         if (cl->parent)
1693                 cl->parent->children--;
1694
1695         if (cl->prio_activity)
1696                 htb_deactivate(q, cl);
1697
1698         if (cl->cmode != HTB_CAN_SEND)
1699                 htb_safe_rb_erase(&cl->pq_node,
1700                                   &q->hlevel[cl->level].wait_pq);
1701
1702         if (last_child)
1703                 htb_parent_to_leaf(sch, cl, new_q);
1704
1705         sch_tree_unlock(sch);
1706
1707         htb_destroy_class(sch, cl);
1708         return 0;
1709 }
1710
1711 static int htb_change_class(struct Qdisc *sch, u32 classid,
1712                             u32 parentid, struct nlattr **tca,
1713                             unsigned long *arg, struct netlink_ext_ack *extack)
1714 {
1715         int err = -EINVAL;
1716         struct htb_sched *q = qdisc_priv(sch);
1717         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1718         struct tc_htb_qopt_offload offload_opt;
1719         struct nlattr *opt = tca[TCA_OPTIONS];
1720         struct nlattr *tb[TCA_HTB_MAX + 1];
1721         struct Qdisc *parent_qdisc = NULL;
1722         struct netdev_queue *dev_queue;
1723         struct tc_htb_opt *hopt;
1724         u64 rate64, ceil64;
1725         int warn = 0;
1726
1727         /* extract all subattrs from opt attr */
1728         if (!opt)
1729                 goto failure;
1730
1731         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1732                                           NULL);
1733         if (err < 0)
1734                 goto failure;
1735
1736         err = -EINVAL;
1737         if (tb[TCA_HTB_PARMS] == NULL)
1738                 goto failure;
1739
1740         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1741
1742         hopt = nla_data(tb[TCA_HTB_PARMS]);
1743         if (!hopt->rate.rate || !hopt->ceil.rate)
1744                 goto failure;
1745
1746         /* Keeping backward compatible with rate_table based iproute2 tc */
1747         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1748                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1749                                               NULL));
1750
1751         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1752                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1753                                               NULL));
1754
1755         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1756         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1757
1758         if (!cl) {              /* new class */
1759                 struct net_device *dev = qdisc_dev(sch);
1760                 struct Qdisc *new_q, *old_q;
1761                 int prio;
1762                 struct {
1763                         struct nlattr           nla;
1764                         struct gnet_estimator   opt;
1765                 } est = {
1766                         .nla = {
1767                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1768                                 .nla_type       = TCA_RATE,
1769                         },
1770                         .opt = {
1771                                 /* 4s interval, 16s averaging constant */
1772                                 .interval       = 2,
1773                                 .ewma_log       = 2,
1774                         },
1775                 };
1776
1777                 /* check for valid classid */
1778                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1779                     htb_find(classid, sch))
1780                         goto failure;
1781
1782                 /* check maximal depth */
1783                 if (parent && parent->parent && parent->parent->level < 2) {
1784                         pr_err("htb: tree is too deep\n");
1785                         goto failure;
1786                 }
1787                 err = -ENOBUFS;
1788                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1789                 if (!cl)
1790                         goto failure;
1791
1792                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1793                 if (err) {
1794                         kfree(cl);
1795                         goto failure;
1796                 }
1797                 if (htb_rate_est || tca[TCA_RATE]) {
1798                         err = gen_new_estimator(&cl->bstats, NULL,
1799                                                 &cl->rate_est,
1800                                                 NULL,
1801                                                 qdisc_root_sleeping_running(sch),
1802                                                 tca[TCA_RATE] ? : &est.nla);
1803                         if (err)
1804                                 goto err_block_put;
1805                 }
1806
1807                 cl->children = 0;
1808                 RB_CLEAR_NODE(&cl->pq_node);
1809
1810                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1811                         RB_CLEAR_NODE(&cl->node[prio]);
1812
1813                 cl->common.classid = classid;
1814
1815                 /* Make sure nothing interrupts us in between of two
1816                  * ndo_setup_tc calls.
1817                  */
1818                 ASSERT_RTNL();
1819
1820                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1821                  * so that can't be used inside of sch_tree_lock
1822                  * -- thanks to Karlis Peisenieks
1823                  */
1824                 if (!q->offload) {
1825                         dev_queue = sch->dev_queue;
1826                 } else if (!(parent && !parent->level)) {
1827                         /* Assign a dev_queue to this classid. */
1828                         offload_opt = (struct tc_htb_qopt_offload) {
1829                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1830                                 .classid = cl->common.classid,
1831                                 .parent_classid = parent ?
1832                                         TC_H_MIN(parent->common.classid) :
1833                                         TC_HTB_CLASSID_ROOT,
1834                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1835                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1836                                 .extack = extack,
1837                         };
1838                         err = htb_offload(dev, &offload_opt);
1839                         if (err) {
1840                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1841                                        err);
1842                                 goto err_kill_estimator;
1843                         }
1844                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1845                 } else { /* First child. */
1846                         dev_queue = parent->leaf.q->dev_queue;
1847                         old_q = htb_graft_helper(dev_queue, NULL);
1848                         WARN_ON(old_q != parent->leaf.q);
1849                         offload_opt = (struct tc_htb_qopt_offload) {
1850                                 .command = TC_HTB_LEAF_TO_INNER,
1851                                 .classid = cl->common.classid,
1852                                 .parent_classid =
1853                                         TC_H_MIN(parent->common.classid),
1854                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1855                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1856                                 .extack = extack,
1857                         };
1858                         err = htb_offload(dev, &offload_opt);
1859                         if (err) {
1860                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1861                                        err);
1862                                 htb_graft_helper(dev_queue, old_q);
1863                                 goto err_kill_estimator;
1864                         }
1865                         parent->bstats_bias.bytes += old_q->bstats.bytes;
1866                         parent->bstats_bias.packets += old_q->bstats.packets;
1867                         qdisc_put(old_q);
1868                 }
1869                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1870                                           classid, NULL);
1871                 if (q->offload) {
1872                         if (new_q) {
1873                                 htb_set_lockdep_class_child(new_q);
1874                                 /* One ref for cl->leaf.q, the other for
1875                                  * dev_queue->qdisc.
1876                                  */
1877                                 qdisc_refcount_inc(new_q);
1878                         }
1879                         old_q = htb_graft_helper(dev_queue, new_q);
1880                         /* No qdisc_put needed. */
1881                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1882                 }
1883                 sch_tree_lock(sch);
1884                 if (parent && !parent->level) {
1885                         /* turn parent into inner node */
1886                         qdisc_purge_queue(parent->leaf.q);
1887                         parent_qdisc = parent->leaf.q;
1888                         if (parent->prio_activity)
1889                                 htb_deactivate(q, parent);
1890
1891                         /* remove from evt list because of level change */
1892                         if (parent->cmode != HTB_CAN_SEND) {
1893                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1894                                 parent->cmode = HTB_CAN_SEND;
1895                         }
1896                         parent->level = (parent->parent ? parent->parent->level
1897                                          : TC_HTB_MAXDEPTH) - 1;
1898                         memset(&parent->inner, 0, sizeof(parent->inner));
1899                 }
1900
1901                 /* leaf (we) needs elementary qdisc */
1902                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1903
1904                 cl->parent = parent;
1905
1906                 /* set class to be in HTB_CAN_SEND state */
1907                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1908                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1909                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1910                 cl->t_c = ktime_get_ns();
1911                 cl->cmode = HTB_CAN_SEND;
1912
1913                 /* attach to the hash list and parent's family */
1914                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1915                 if (parent)
1916                         parent->children++;
1917                 if (cl->leaf.q != &noop_qdisc)
1918                         qdisc_hash_add(cl->leaf.q, true);
1919         } else {
1920                 if (tca[TCA_RATE]) {
1921                         err = gen_replace_estimator(&cl->bstats, NULL,
1922                                                     &cl->rate_est,
1923                                                     NULL,
1924                                                     qdisc_root_sleeping_running(sch),
1925                                                     tca[TCA_RATE]);
1926                         if (err)
1927                                 return err;
1928                 }
1929
1930                 if (q->offload) {
1931                         struct net_device *dev = qdisc_dev(sch);
1932
1933                         offload_opt = (struct tc_htb_qopt_offload) {
1934                                 .command = TC_HTB_NODE_MODIFY,
1935                                 .classid = cl->common.classid,
1936                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1937                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1938                                 .extack = extack,
1939                         };
1940                         err = htb_offload(dev, &offload_opt);
1941                         if (err)
1942                                 /* Estimator was replaced, and rollback may fail
1943                                  * as well, so we don't try to recover it, and
1944                                  * the estimator won't work property with the
1945                                  * offload anyway, because bstats are updated
1946                                  * only when the stats are queried.
1947                                  */
1948                                 return err;
1949                 }
1950
1951                 sch_tree_lock(sch);
1952         }
1953
1954         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1955         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1956
1957         /* it used to be a nasty bug here, we have to check that node
1958          * is really leaf before changing cl->leaf !
1959          */
1960         if (!cl->level) {
1961                 u64 quantum = cl->rate.rate_bytes_ps;
1962
1963                 do_div(quantum, q->rate2quantum);
1964                 cl->quantum = min_t(u64, quantum, INT_MAX);
1965
1966                 if (!hopt->quantum && cl->quantum < 1000) {
1967                         warn = -1;
1968                         cl->quantum = 1000;
1969                 }
1970                 if (!hopt->quantum && cl->quantum > 200000) {
1971                         warn = 1;
1972                         cl->quantum = 200000;
1973                 }
1974                 if (hopt->quantum)
1975                         cl->quantum = hopt->quantum;
1976                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1977                         cl->prio = TC_HTB_NUMPRIO - 1;
1978         }
1979
1980         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1981         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1982
1983         sch_tree_unlock(sch);
1984         qdisc_put(parent_qdisc);
1985
1986         if (warn)
1987                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1988                             cl->common.classid, (warn == -1 ? "small" : "big"));
1989
1990         qdisc_class_hash_grow(sch, &q->clhash);
1991
1992         *arg = (unsigned long)cl;
1993         return 0;
1994
1995 err_kill_estimator:
1996         gen_kill_estimator(&cl->rate_est);
1997 err_block_put:
1998         tcf_block_put(cl->block);
1999         kfree(cl);
2000 failure:
2001         return err;
2002 }
2003
2004 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2005                                        struct netlink_ext_ack *extack)
2006 {
2007         struct htb_sched *q = qdisc_priv(sch);
2008         struct htb_class *cl = (struct htb_class *)arg;
2009
2010         return cl ? cl->block : q->block;
2011 }
2012
2013 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2014                                      u32 classid)
2015 {
2016         struct htb_class *cl = htb_find(classid, sch);
2017
2018         /*if (cl && !cl->level) return 0;
2019          * The line above used to be there to prevent attaching filters to
2020          * leaves. But at least tc_index filter uses this just to get class
2021          * for other reasons so that we have to allow for it.
2022          * ----
2023          * 19.6.2002 As Werner explained it is ok - bind filter is just
2024          * another way to "lock" the class - unlike "get" this lock can
2025          * be broken by class during destroy IIUC.
2026          */
2027         if (cl)
2028                 cl->filter_cnt++;
2029         return (unsigned long)cl;
2030 }
2031
2032 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2033 {
2034         struct htb_class *cl = (struct htb_class *)arg;
2035
2036         if (cl)
2037                 cl->filter_cnt--;
2038 }
2039
2040 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2041 {
2042         struct htb_sched *q = qdisc_priv(sch);
2043         struct htb_class *cl;
2044         unsigned int i;
2045
2046         if (arg->stop)
2047                 return;
2048
2049         for (i = 0; i < q->clhash.hashsize; i++) {
2050                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2051                         if (arg->count < arg->skip) {
2052                                 arg->count++;
2053                                 continue;
2054                         }
2055                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2056                                 arg->stop = 1;
2057                                 return;
2058                         }
2059                         arg->count++;
2060                 }
2061         }
2062 }
2063
2064 static const struct Qdisc_class_ops htb_class_ops = {
2065         .select_queue   =       htb_select_queue,
2066         .graft          =       htb_graft,
2067         .leaf           =       htb_leaf,
2068         .qlen_notify    =       htb_qlen_notify,
2069         .find           =       htb_search,
2070         .change         =       htb_change_class,
2071         .delete         =       htb_delete,
2072         .walk           =       htb_walk,
2073         .tcf_block      =       htb_tcf_block,
2074         .bind_tcf       =       htb_bind_filter,
2075         .unbind_tcf     =       htb_unbind_filter,
2076         .dump           =       htb_dump_class,
2077         .dump_stats     =       htb_dump_class_stats,
2078 };
2079
2080 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2081         .cl_ops         =       &htb_class_ops,
2082         .id             =       "htb",
2083         .priv_size      =       sizeof(struct htb_sched),
2084         .enqueue        =       htb_enqueue,
2085         .dequeue        =       htb_dequeue,
2086         .peek           =       qdisc_peek_dequeued,
2087         .init           =       htb_init,
2088         .attach         =       htb_attach,
2089         .reset          =       htb_reset,
2090         .destroy        =       htb_destroy,
2091         .dump           =       htb_dump,
2092         .owner          =       THIS_MODULE,
2093 };
2094
2095 static int __init htb_module_init(void)
2096 {
2097         return register_qdisc(&htb_qdisc_ops);
2098 }
2099 static void __exit htb_module_exit(void)
2100 {
2101         unregister_qdisc(&htb_qdisc_ops);
2102 }
2103
2104 module_init(htb_module_init)
2105 module_exit(htb_module_exit)
2106 MODULE_LICENSE("GPL");