Merge branch 'work.elf-compat' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_packed bstats;
117         struct gnet_stats_basic_packed bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                 } leaf;
129                 struct htb_class_inner {
130                         struct htb_prio clprio[TC_HTB_NUMPRIO];
131                 } inner;
132         };
133         s64                     pq_key;
134
135         int                     prio_activity;  /* for which prios are we active */
136         enum htb_cmode          cmode;          /* current mode of the class */
137         struct rb_node          pq_node;        /* node for event queue */
138         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
139
140         unsigned int drops ____cacheline_aligned_in_smp;
141         unsigned int            overlimits;
142 };
143
144 struct htb_level {
145         struct rb_root  wait_pq;
146         struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148
149 struct htb_sched {
150         struct Qdisc_class_hash clhash;
151         int                     defcls;         /* class where unclassified flows go to */
152         int                     rate2quantum;   /* quant = rate / rate2quantum */
153
154         /* filters for qdisc itself */
155         struct tcf_proto __rcu  *filter_list;
156         struct tcf_block        *block;
157
158 #define HTB_WARN_TOOMANYEVENTS  0x1
159         unsigned int            warned; /* only one warning */
160         int                     direct_qlen;
161         struct work_struct      work;
162
163         /* non shaped skbs; let them go directly thru */
164         struct qdisc_skb_head   direct_queue;
165         u32                     direct_pkts;
166         u32                     overlimits;
167
168         struct qdisc_watchdog   watchdog;
169
170         s64                     now;    /* cached dequeue time */
171
172         /* time of nearest event per level (row) */
173         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
174
175         int                     row_mask[TC_HTB_MAXDEPTH];
176
177         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
178
179         struct Qdisc            **direct_qdiscs;
180         unsigned int            num_direct_qdiscs;
181
182         bool                    offload;
183 };
184
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188         struct htb_sched *q = qdisc_priv(sch);
189         struct Qdisc_class_common *clc;
190
191         clc = qdisc_class_find(&q->clhash, handle);
192         if (clc == NULL)
193                 return NULL;
194         return container_of(clc, struct htb_class, common);
195 }
196
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199         return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216                                       int *qerr)
217 {
218         struct htb_sched *q = qdisc_priv(sch);
219         struct htb_class *cl;
220         struct tcf_result res;
221         struct tcf_proto *tcf;
222         int result;
223
224         /* allow to select class by setting skb->priority to valid classid;
225          * note that nfmark can be used too by attaching filter fw with no
226          * rules in it
227          */
228         if (skb->priority == sch->handle)
229                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
230         cl = htb_find(skb->priority, sch);
231         if (cl) {
232                 if (cl->level == 0)
233                         return cl;
234                 /* Start with inner filter chain if a non-leaf class is selected */
235                 tcf = rcu_dereference_bh(cl->filter_list);
236         } else {
237                 tcf = rcu_dereference_bh(q->filter_list);
238         }
239
240         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241         while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243                 switch (result) {
244                 case TC_ACT_QUEUED:
245                 case TC_ACT_STOLEN:
246                 case TC_ACT_TRAP:
247                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248                         fallthrough;
249                 case TC_ACT_SHOT:
250                         return NULL;
251                 }
252 #endif
253                 cl = (void *)res.class;
254                 if (!cl) {
255                         if (res.classid == sch->handle)
256                                 return HTB_DIRECT;      /* X:0 (direct flow) */
257                         cl = htb_find(res.classid, sch);
258                         if (!cl)
259                                 break;  /* filter selected invalid classid */
260                 }
261                 if (!cl->level)
262                         return cl;      /* we hit leaf; return it */
263
264                 /* we have got inner class; apply inner filter chain */
265                 tcf = rcu_dereference_bh(cl->filter_list);
266         }
267         /* classification failed; try to use default class */
268         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269         if (!cl || cl->level)
270                 return HTB_DIRECT;      /* bad default .. this is safe bet */
271         return cl;
272 }
273
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  *
277  * Routine adds class to the list (actually tree) sorted by classid.
278  * Make sure that class is not already on such list for given prio.
279  */
280 static void htb_add_to_id_tree(struct rb_root *root,
281                                struct htb_class *cl, int prio)
282 {
283         struct rb_node **p = &root->rb_node, *parent = NULL;
284
285         while (*p) {
286                 struct htb_class *c;
287                 parent = *p;
288                 c = rb_entry(parent, struct htb_class, node[prio]);
289
290                 if (cl->common.classid > c->common.classid)
291                         p = &parent->rb_right;
292                 else
293                         p = &parent->rb_left;
294         }
295         rb_link_node(&cl->node[prio], parent, p);
296         rb_insert_color(&cl->node[prio], root);
297 }
298
299 /**
300  * htb_add_to_wait_tree - adds class to the event queue with delay
301  *
302  * The class is added to priority event queue to indicate that class will
303  * change its mode in cl->pq_key microseconds. Make sure that class is not
304  * already in the queue.
305  */
306 static void htb_add_to_wait_tree(struct htb_sched *q,
307                                  struct htb_class *cl, s64 delay)
308 {
309         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
310
311         cl->pq_key = q->now + delay;
312         if (cl->pq_key == q->now)
313                 cl->pq_key++;
314
315         /* update the nearest event cache */
316         if (q->near_ev_cache[cl->level] > cl->pq_key)
317                 q->near_ev_cache[cl->level] = cl->pq_key;
318
319         while (*p) {
320                 struct htb_class *c;
321                 parent = *p;
322                 c = rb_entry(parent, struct htb_class, pq_node);
323                 if (cl->pq_key >= c->pq_key)
324                         p = &parent->rb_right;
325                 else
326                         p = &parent->rb_left;
327         }
328         rb_link_node(&cl->pq_node, parent, p);
329         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
330 }
331
332 /**
333  * htb_next_rb_node - finds next node in binary tree
334  *
335  * When we are past last key we return NULL.
336  * Average complexity is 2 steps per call.
337  */
338 static inline void htb_next_rb_node(struct rb_node **n)
339 {
340         *n = rb_next(*n);
341 }
342
343 /**
344  * htb_add_class_to_row - add class to its row
345  *
346  * The class is added to row at priorities marked in mask.
347  * It does nothing if mask == 0.
348  */
349 static inline void htb_add_class_to_row(struct htb_sched *q,
350                                         struct htb_class *cl, int mask)
351 {
352         q->row_mask[cl->level] |= mask;
353         while (mask) {
354                 int prio = ffz(~mask);
355                 mask &= ~(1 << prio);
356                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
357         }
358 }
359
360 /* If this triggers, it is a bug in this code, but it need not be fatal */
361 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
362 {
363         if (RB_EMPTY_NODE(rb)) {
364                 WARN_ON(1);
365         } else {
366                 rb_erase(rb, root);
367                 RB_CLEAR_NODE(rb);
368         }
369 }
370
371
372 /**
373  * htb_remove_class_from_row - removes class from its row
374  *
375  * The class is removed from row at priorities marked in mask.
376  * It does nothing if mask == 0.
377  */
378 static inline void htb_remove_class_from_row(struct htb_sched *q,
379                                                  struct htb_class *cl, int mask)
380 {
381         int m = 0;
382         struct htb_level *hlevel = &q->hlevel[cl->level];
383
384         while (mask) {
385                 int prio = ffz(~mask);
386                 struct htb_prio *hprio = &hlevel->hprio[prio];
387
388                 mask &= ~(1 << prio);
389                 if (hprio->ptr == cl->node + prio)
390                         htb_next_rb_node(&hprio->ptr);
391
392                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
393                 if (!hprio->row.rb_node)
394                         m |= 1 << prio;
395         }
396         q->row_mask[cl->level] &= ~m;
397 }
398
399 /**
400  * htb_activate_prios - creates active classe's feed chain
401  *
402  * The class is connected to ancestors and/or appropriate rows
403  * for priorities it is participating on. cl->cmode must be new
404  * (activated) mode. It does nothing if cl->prio_activity == 0.
405  */
406 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
407 {
408         struct htb_class *p = cl->parent;
409         long m, mask = cl->prio_activity;
410
411         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
412                 m = mask;
413                 while (m) {
414                         int prio = ffz(~m);
415                         m &= ~(1 << prio);
416
417                         if (p->inner.clprio[prio].feed.rb_node)
418                                 /* parent already has its feed in use so that
419                                  * reset bit in mask as parent is already ok
420                                  */
421                                 mask &= ~(1 << prio);
422
423                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
424                 }
425                 p->prio_activity |= mask;
426                 cl = p;
427                 p = cl->parent;
428
429         }
430         if (cl->cmode == HTB_CAN_SEND && mask)
431                 htb_add_class_to_row(q, cl, mask);
432 }
433
434 /**
435  * htb_deactivate_prios - remove class from feed chain
436  *
437  * cl->cmode must represent old mode (before deactivation). It does
438  * nothing if cl->prio_activity == 0. Class is removed from all feed
439  * chains and rows.
440  */
441 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
442 {
443         struct htb_class *p = cl->parent;
444         long m, mask = cl->prio_activity;
445
446         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
447                 m = mask;
448                 mask = 0;
449                 while (m) {
450                         int prio = ffz(~m);
451                         m &= ~(1 << prio);
452
453                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
454                                 /* we are removing child which is pointed to from
455                                  * parent feed - forget the pointer but remember
456                                  * classid
457                                  */
458                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
459                                 p->inner.clprio[prio].ptr = NULL;
460                         }
461
462                         htb_safe_rb_erase(cl->node + prio,
463                                           &p->inner.clprio[prio].feed);
464
465                         if (!p->inner.clprio[prio].feed.rb_node)
466                                 mask |= 1 << prio;
467                 }
468
469                 p->prio_activity &= ~mask;
470                 cl = p;
471                 p = cl->parent;
472
473         }
474         if (cl->cmode == HTB_CAN_SEND && mask)
475                 htb_remove_class_from_row(q, cl, mask);
476 }
477
478 static inline s64 htb_lowater(const struct htb_class *cl)
479 {
480         if (htb_hysteresis)
481                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
482         else
483                 return 0;
484 }
485 static inline s64 htb_hiwater(const struct htb_class *cl)
486 {
487         if (htb_hysteresis)
488                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
489         else
490                 return 0;
491 }
492
493
494 /**
495  * htb_class_mode - computes and returns current class mode
496  *
497  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
498  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
499  * from now to time when cl will change its state.
500  * Also it is worth to note that class mode doesn't change simply
501  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
502  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
503  * mode transitions per time unit. The speed gain is about 1/6.
504  */
505 static inline enum htb_cmode
506 htb_class_mode(struct htb_class *cl, s64 *diff)
507 {
508         s64 toks;
509
510         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
511                 *diff = -toks;
512                 return HTB_CANT_SEND;
513         }
514
515         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
516                 return HTB_CAN_SEND;
517
518         *diff = -toks;
519         return HTB_MAY_BORROW;
520 }
521
522 /**
523  * htb_change_class_mode - changes classe's mode
524  *
525  * This should be the only way how to change classe's mode under normal
526  * cirsumstances. Routine will update feed lists linkage, change mode
527  * and add class to the wait event queue if appropriate. New mode should
528  * be different from old one and cl->pq_key has to be valid if changing
529  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
530  */
531 static void
532 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
533 {
534         enum htb_cmode new_mode = htb_class_mode(cl, diff);
535
536         if (new_mode == cl->cmode)
537                 return;
538
539         if (new_mode == HTB_CANT_SEND) {
540                 cl->overlimits++;
541                 q->overlimits++;
542         }
543
544         if (cl->prio_activity) {        /* not necessary: speed optimization */
545                 if (cl->cmode != HTB_CANT_SEND)
546                         htb_deactivate_prios(q, cl);
547                 cl->cmode = new_mode;
548                 if (new_mode != HTB_CANT_SEND)
549                         htb_activate_prios(q, cl);
550         } else
551                 cl->cmode = new_mode;
552 }
553
554 /**
555  * htb_activate - inserts leaf cl into appropriate active feeds
556  *
557  * Routine learns (new) priority of leaf and activates feed chain
558  * for the prio. It can be called on already active leaf safely.
559  * It also adds leaf into droplist.
560  */
561 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
562 {
563         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
564
565         if (!cl->prio_activity) {
566                 cl->prio_activity = 1 << cl->prio;
567                 htb_activate_prios(q, cl);
568         }
569 }
570
571 /**
572  * htb_deactivate - remove leaf cl from active feeds
573  *
574  * Make sure that leaf is active. In the other words it can't be called
575  * with non-active leaf. It also removes class from the drop list.
576  */
577 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
578 {
579         WARN_ON(!cl->prio_activity);
580
581         htb_deactivate_prios(q, cl);
582         cl->prio_activity = 0;
583 }
584
585 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
586                        struct sk_buff **to_free)
587 {
588         int ret;
589         unsigned int len = qdisc_pkt_len(skb);
590         struct htb_sched *q = qdisc_priv(sch);
591         struct htb_class *cl = htb_classify(skb, sch, &ret);
592
593         if (cl == HTB_DIRECT) {
594                 /* enqueue to helper queue */
595                 if (q->direct_queue.qlen < q->direct_qlen) {
596                         __qdisc_enqueue_tail(skb, &q->direct_queue);
597                         q->direct_pkts++;
598                 } else {
599                         return qdisc_drop(skb, sch, to_free);
600                 }
601 #ifdef CONFIG_NET_CLS_ACT
602         } else if (!cl) {
603                 if (ret & __NET_XMIT_BYPASS)
604                         qdisc_qstats_drop(sch);
605                 __qdisc_drop(skb, to_free);
606                 return ret;
607 #endif
608         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
609                                         to_free)) != NET_XMIT_SUCCESS) {
610                 if (net_xmit_drop_count(ret)) {
611                         qdisc_qstats_drop(sch);
612                         cl->drops++;
613                 }
614                 return ret;
615         } else {
616                 htb_activate(q, cl);
617         }
618
619         sch->qstats.backlog += len;
620         sch->q.qlen++;
621         return NET_XMIT_SUCCESS;
622 }
623
624 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
625 {
626         s64 toks = diff + cl->tokens;
627
628         if (toks > cl->buffer)
629                 toks = cl->buffer;
630         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
631         if (toks <= -cl->mbuffer)
632                 toks = 1 - cl->mbuffer;
633
634         cl->tokens = toks;
635 }
636
637 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
638 {
639         s64 toks = diff + cl->ctokens;
640
641         if (toks > cl->cbuffer)
642                 toks = cl->cbuffer;
643         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
644         if (toks <= -cl->mbuffer)
645                 toks = 1 - cl->mbuffer;
646
647         cl->ctokens = toks;
648 }
649
650 /**
651  * htb_charge_class - charges amount "bytes" to leaf and ancestors
652  *
653  * Routine assumes that packet "bytes" long was dequeued from leaf cl
654  * borrowing from "level". It accounts bytes to ceil leaky bucket for
655  * leaf and all ancestors and to rate bucket for ancestors at levels
656  * "level" and higher. It also handles possible change of mode resulting
657  * from the update. Note that mode can also increase here (MAY_BORROW to
658  * CAN_SEND) because we can use more precise clock that event queue here.
659  * In such case we remove class from event queue first.
660  */
661 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
662                              int level, struct sk_buff *skb)
663 {
664         int bytes = qdisc_pkt_len(skb);
665         enum htb_cmode old_mode;
666         s64 diff;
667
668         while (cl) {
669                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
670                 if (cl->level >= level) {
671                         if (cl->level == level)
672                                 cl->xstats.lends++;
673                         htb_accnt_tokens(cl, bytes, diff);
674                 } else {
675                         cl->xstats.borrows++;
676                         cl->tokens += diff;     /* we moved t_c; update tokens */
677                 }
678                 htb_accnt_ctokens(cl, bytes, diff);
679                 cl->t_c = q->now;
680
681                 old_mode = cl->cmode;
682                 diff = 0;
683                 htb_change_class_mode(q, cl, &diff);
684                 if (old_mode != cl->cmode) {
685                         if (old_mode != HTB_CAN_SEND)
686                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
687                         if (cl->cmode != HTB_CAN_SEND)
688                                 htb_add_to_wait_tree(q, cl, diff);
689                 }
690
691                 /* update basic stats except for leaves which are already updated */
692                 if (cl->level)
693                         bstats_update(&cl->bstats, skb);
694
695                 cl = cl->parent;
696         }
697 }
698
699 /**
700  * htb_do_events - make mode changes to classes at the level
701  *
702  * Scans event queue for pending events and applies them. Returns time of
703  * next pending event (0 for no event in pq, q->now for too many events).
704  * Note: Applied are events whose have cl->pq_key <= q->now.
705  */
706 static s64 htb_do_events(struct htb_sched *q, const int level,
707                          unsigned long start)
708 {
709         /* don't run for longer than 2 jiffies; 2 is used instead of
710          * 1 to simplify things when jiffy is going to be incremented
711          * too soon
712          */
713         unsigned long stop_at = start + 2;
714         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
715
716         while (time_before(jiffies, stop_at)) {
717                 struct htb_class *cl;
718                 s64 diff;
719                 struct rb_node *p = rb_first(wait_pq);
720
721                 if (!p)
722                         return 0;
723
724                 cl = rb_entry(p, struct htb_class, pq_node);
725                 if (cl->pq_key > q->now)
726                         return cl->pq_key;
727
728                 htb_safe_rb_erase(p, wait_pq);
729                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
730                 htb_change_class_mode(q, cl, &diff);
731                 if (cl->cmode != HTB_CAN_SEND)
732                         htb_add_to_wait_tree(q, cl, diff);
733         }
734
735         /* too much load - let's continue after a break for scheduling */
736         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
737                 pr_warn("htb: too many events!\n");
738                 q->warned |= HTB_WARN_TOOMANYEVENTS;
739         }
740
741         return q->now;
742 }
743
744 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
745  * is no such one exists.
746  */
747 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
748                                               u32 id)
749 {
750         struct rb_node *r = NULL;
751         while (n) {
752                 struct htb_class *cl =
753                     rb_entry(n, struct htb_class, node[prio]);
754
755                 if (id > cl->common.classid) {
756                         n = n->rb_right;
757                 } else if (id < cl->common.classid) {
758                         r = n;
759                         n = n->rb_left;
760                 } else {
761                         return n;
762                 }
763         }
764         return r;
765 }
766
767 /**
768  * htb_lookup_leaf - returns next leaf class in DRR order
769  *
770  * Find leaf where current feed pointers points to.
771  */
772 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
773 {
774         int i;
775         struct {
776                 struct rb_node *root;
777                 struct rb_node **pptr;
778                 u32 *pid;
779         } stk[TC_HTB_MAXDEPTH], *sp = stk;
780
781         BUG_ON(!hprio->row.rb_node);
782         sp->root = hprio->row.rb_node;
783         sp->pptr = &hprio->ptr;
784         sp->pid = &hprio->last_ptr_id;
785
786         for (i = 0; i < 65535; i++) {
787                 if (!*sp->pptr && *sp->pid) {
788                         /* ptr was invalidated but id is valid - try to recover
789                          * the original or next ptr
790                          */
791                         *sp->pptr =
792                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
793                 }
794                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
795                                  * can become out of date quickly
796                                  */
797                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
798                         *sp->pptr = sp->root;
799                         while ((*sp->pptr)->rb_left)
800                                 *sp->pptr = (*sp->pptr)->rb_left;
801                         if (sp > stk) {
802                                 sp--;
803                                 if (!*sp->pptr) {
804                                         WARN_ON(1);
805                                         return NULL;
806                                 }
807                                 htb_next_rb_node(sp->pptr);
808                         }
809                 } else {
810                         struct htb_class *cl;
811                         struct htb_prio *clp;
812
813                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
814                         if (!cl->level)
815                                 return cl;
816                         clp = &cl->inner.clprio[prio];
817                         (++sp)->root = clp->feed.rb_node;
818                         sp->pptr = &clp->ptr;
819                         sp->pid = &clp->last_ptr_id;
820                 }
821         }
822         WARN_ON(1);
823         return NULL;
824 }
825
826 /* dequeues packet at given priority and level; call only if
827  * you are sure that there is active class at prio/level
828  */
829 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
830                                         const int level)
831 {
832         struct sk_buff *skb = NULL;
833         struct htb_class *cl, *start;
834         struct htb_level *hlevel = &q->hlevel[level];
835         struct htb_prio *hprio = &hlevel->hprio[prio];
836
837         /* look initial class up in the row */
838         start = cl = htb_lookup_leaf(hprio, prio);
839
840         do {
841 next:
842                 if (unlikely(!cl))
843                         return NULL;
844
845                 /* class can be empty - it is unlikely but can be true if leaf
846                  * qdisc drops packets in enqueue routine or if someone used
847                  * graft operation on the leaf since last dequeue;
848                  * simply deactivate and skip such class
849                  */
850                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
851                         struct htb_class *next;
852                         htb_deactivate(q, cl);
853
854                         /* row/level might become empty */
855                         if ((q->row_mask[level] & (1 << prio)) == 0)
856                                 return NULL;
857
858                         next = htb_lookup_leaf(hprio, prio);
859
860                         if (cl == start)        /* fix start if we just deleted it */
861                                 start = next;
862                         cl = next;
863                         goto next;
864                 }
865
866                 skb = cl->leaf.q->dequeue(cl->leaf.q);
867                 if (likely(skb != NULL))
868                         break;
869
870                 qdisc_warn_nonwc("htb", cl->leaf.q);
871                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
872                                          &q->hlevel[0].hprio[prio].ptr);
873                 cl = htb_lookup_leaf(hprio, prio);
874
875         } while (cl != start);
876
877         if (likely(skb != NULL)) {
878                 bstats_update(&cl->bstats, skb);
879                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
880                 if (cl->leaf.deficit[level] < 0) {
881                         cl->leaf.deficit[level] += cl->quantum;
882                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
883                                                  &q->hlevel[0].hprio[prio].ptr);
884                 }
885                 /* this used to be after charge_class but this constelation
886                  * gives us slightly better performance
887                  */
888                 if (!cl->leaf.q->q.qlen)
889                         htb_deactivate(q, cl);
890                 htb_charge_class(q, cl, level, skb);
891         }
892         return skb;
893 }
894
895 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
896 {
897         struct sk_buff *skb;
898         struct htb_sched *q = qdisc_priv(sch);
899         int level;
900         s64 next_event;
901         unsigned long start_at;
902
903         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
904         skb = __qdisc_dequeue_head(&q->direct_queue);
905         if (skb != NULL) {
906 ok:
907                 qdisc_bstats_update(sch, skb);
908                 qdisc_qstats_backlog_dec(sch, skb);
909                 sch->q.qlen--;
910                 return skb;
911         }
912
913         if (!sch->q.qlen)
914                 goto fin;
915         q->now = ktime_get_ns();
916         start_at = jiffies;
917
918         next_event = q->now + 5LLU * NSEC_PER_SEC;
919
920         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
921                 /* common case optimization - skip event handler quickly */
922                 int m;
923                 s64 event = q->near_ev_cache[level];
924
925                 if (q->now >= event) {
926                         event = htb_do_events(q, level, start_at);
927                         if (!event)
928                                 event = q->now + NSEC_PER_SEC;
929                         q->near_ev_cache[level] = event;
930                 }
931
932                 if (next_event > event)
933                         next_event = event;
934
935                 m = ~q->row_mask[level];
936                 while (m != (int)(-1)) {
937                         int prio = ffz(m);
938
939                         m |= 1 << prio;
940                         skb = htb_dequeue_tree(q, prio, level);
941                         if (likely(skb != NULL))
942                                 goto ok;
943                 }
944         }
945         if (likely(next_event > q->now))
946                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
947         else
948                 schedule_work(&q->work);
949 fin:
950         return skb;
951 }
952
953 /* reset all classes */
954 /* always caled under BH & queue lock */
955 static void htb_reset(struct Qdisc *sch)
956 {
957         struct htb_sched *q = qdisc_priv(sch);
958         struct htb_class *cl;
959         unsigned int i;
960
961         for (i = 0; i < q->clhash.hashsize; i++) {
962                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
963                         if (cl->level)
964                                 memset(&cl->inner, 0, sizeof(cl->inner));
965                         else {
966                                 if (cl->leaf.q && !q->offload)
967                                         qdisc_reset(cl->leaf.q);
968                         }
969                         cl->prio_activity = 0;
970                         cl->cmode = HTB_CAN_SEND;
971                 }
972         }
973         qdisc_watchdog_cancel(&q->watchdog);
974         __qdisc_reset_queue(&q->direct_queue);
975         sch->q.qlen = 0;
976         sch->qstats.backlog = 0;
977         memset(q->hlevel, 0, sizeof(q->hlevel));
978         memset(q->row_mask, 0, sizeof(q->row_mask));
979 }
980
981 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
982         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
983         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
984         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
985         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
986         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
987         [TCA_HTB_RATE64] = { .type = NLA_U64 },
988         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
989         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
990 };
991
992 static void htb_work_func(struct work_struct *work)
993 {
994         struct htb_sched *q = container_of(work, struct htb_sched, work);
995         struct Qdisc *sch = q->watchdog.qdisc;
996
997         rcu_read_lock();
998         __netif_schedule(qdisc_root(sch));
999         rcu_read_unlock();
1000 }
1001
1002 static void htb_set_lockdep_class_child(struct Qdisc *q)
1003 {
1004         static struct lock_class_key child_key;
1005
1006         lockdep_set_class(qdisc_lock(q), &child_key);
1007 }
1008
1009 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1010 {
1011         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1012 }
1013
1014 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1015                     struct netlink_ext_ack *extack)
1016 {
1017         struct net_device *dev = qdisc_dev(sch);
1018         struct tc_htb_qopt_offload offload_opt;
1019         struct htb_sched *q = qdisc_priv(sch);
1020         struct nlattr *tb[TCA_HTB_MAX + 1];
1021         struct tc_htb_glob *gopt;
1022         unsigned int ntx;
1023         int err;
1024
1025         qdisc_watchdog_init(&q->watchdog, sch);
1026         INIT_WORK(&q->work, htb_work_func);
1027
1028         if (!opt)
1029                 return -EINVAL;
1030
1031         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1032         if (err)
1033                 return err;
1034
1035         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1036                                           NULL);
1037         if (err < 0)
1038                 return err;
1039
1040         if (!tb[TCA_HTB_INIT])
1041                 return -EINVAL;
1042
1043         gopt = nla_data(tb[TCA_HTB_INIT]);
1044         if (gopt->version != HTB_VER >> 16)
1045                 return -EINVAL;
1046
1047         q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1048
1049         if (q->offload) {
1050                 if (sch->parent != TC_H_ROOT)
1051                         return -EOPNOTSUPP;
1052
1053                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1054                         return -EOPNOTSUPP;
1055
1056                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1057                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1058                                            sizeof(*q->direct_qdiscs),
1059                                            GFP_KERNEL);
1060                 if (!q->direct_qdiscs)
1061                         return -ENOMEM;
1062         }
1063
1064         err = qdisc_class_hash_init(&q->clhash);
1065         if (err < 0)
1066                 goto err_free_direct_qdiscs;
1067
1068         qdisc_skb_head_init(&q->direct_queue);
1069
1070         if (tb[TCA_HTB_DIRECT_QLEN])
1071                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1072         else
1073                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1074
1075         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1076                 q->rate2quantum = 1;
1077         q->defcls = gopt->defcls;
1078
1079         if (!q->offload)
1080                 return 0;
1081
1082         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1083                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1084                 struct Qdisc *qdisc;
1085
1086                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1087                                           TC_H_MAKE(sch->handle, 0), extack);
1088                 if (!qdisc) {
1089                         err = -ENOMEM;
1090                         goto err_free_qdiscs;
1091                 }
1092
1093                 htb_set_lockdep_class_child(qdisc);
1094                 q->direct_qdiscs[ntx] = qdisc;
1095                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1096         }
1097
1098         sch->flags |= TCQ_F_MQROOT;
1099
1100         offload_opt = (struct tc_htb_qopt_offload) {
1101                 .command = TC_HTB_CREATE,
1102                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1103                 .classid = TC_H_MIN(q->defcls),
1104                 .extack = extack,
1105         };
1106         err = htb_offload(dev, &offload_opt);
1107         if (err)
1108                 goto err_free_qdiscs;
1109
1110         return 0;
1111
1112 err_free_qdiscs:
1113         /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */
1114         q->offload = false;
1115
1116         for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1117              ntx++)
1118                 qdisc_put(q->direct_qdiscs[ntx]);
1119
1120         qdisc_class_hash_destroy(&q->clhash);
1121         /* Prevent use-after-free and double-free when htb_destroy gets called.
1122          */
1123         q->clhash.hash = NULL;
1124         q->clhash.hashsize = 0;
1125
1126 err_free_direct_qdiscs:
1127         kfree(q->direct_qdiscs);
1128         q->direct_qdiscs = NULL;
1129         return err;
1130 }
1131
1132 static void htb_attach_offload(struct Qdisc *sch)
1133 {
1134         struct net_device *dev = qdisc_dev(sch);
1135         struct htb_sched *q = qdisc_priv(sch);
1136         unsigned int ntx;
1137
1138         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1139                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1140
1141                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1142                 qdisc_put(old);
1143                 qdisc_hash_add(qdisc, false);
1144         }
1145         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1146                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1147                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1148
1149                 qdisc_put(old);
1150         }
1151
1152         kfree(q->direct_qdiscs);
1153         q->direct_qdiscs = NULL;
1154 }
1155
1156 static void htb_attach_software(struct Qdisc *sch)
1157 {
1158         struct net_device *dev = qdisc_dev(sch);
1159         unsigned int ntx;
1160
1161         /* Resemble qdisc_graft behavior. */
1162         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1163                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1164                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1165
1166                 qdisc_refcount_inc(sch);
1167
1168                 qdisc_put(old);
1169         }
1170 }
1171
1172 static void htb_attach(struct Qdisc *sch)
1173 {
1174         struct htb_sched *q = qdisc_priv(sch);
1175
1176         if (q->offload)
1177                 htb_attach_offload(sch);
1178         else
1179                 htb_attach_software(sch);
1180 }
1181
1182 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1183 {
1184         struct htb_sched *q = qdisc_priv(sch);
1185         struct nlattr *nest;
1186         struct tc_htb_glob gopt;
1187
1188         if (q->offload)
1189                 sch->flags |= TCQ_F_OFFLOADED;
1190         else
1191                 sch->flags &= ~TCQ_F_OFFLOADED;
1192
1193         sch->qstats.overlimits = q->overlimits;
1194         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1195          * no change can happen on the qdisc parameters.
1196          */
1197
1198         gopt.direct_pkts = q->direct_pkts;
1199         gopt.version = HTB_VER;
1200         gopt.rate2quantum = q->rate2quantum;
1201         gopt.defcls = q->defcls;
1202         gopt.debug = 0;
1203
1204         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1205         if (nest == NULL)
1206                 goto nla_put_failure;
1207         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1208             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1209                 goto nla_put_failure;
1210         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1211                 goto nla_put_failure;
1212
1213         return nla_nest_end(skb, nest);
1214
1215 nla_put_failure:
1216         nla_nest_cancel(skb, nest);
1217         return -1;
1218 }
1219
1220 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1221                           struct sk_buff *skb, struct tcmsg *tcm)
1222 {
1223         struct htb_class *cl = (struct htb_class *)arg;
1224         struct htb_sched *q = qdisc_priv(sch);
1225         struct nlattr *nest;
1226         struct tc_htb_opt opt;
1227
1228         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1229          * no change can happen on the class parameters.
1230          */
1231         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1232         tcm->tcm_handle = cl->common.classid;
1233         if (!cl->level && cl->leaf.q)
1234                 tcm->tcm_info = cl->leaf.q->handle;
1235
1236         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1237         if (nest == NULL)
1238                 goto nla_put_failure;
1239
1240         memset(&opt, 0, sizeof(opt));
1241
1242         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1243         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1244         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1245         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1246         opt.quantum = cl->quantum;
1247         opt.prio = cl->prio;
1248         opt.level = cl->level;
1249         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1250                 goto nla_put_failure;
1251         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1252                 goto nla_put_failure;
1253         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1254             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1255                               TCA_HTB_PAD))
1256                 goto nla_put_failure;
1257         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1258             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1259                               TCA_HTB_PAD))
1260                 goto nla_put_failure;
1261
1262         return nla_nest_end(skb, nest);
1263
1264 nla_put_failure:
1265         nla_nest_cancel(skb, nest);
1266         return -1;
1267 }
1268
1269 static void htb_offload_aggregate_stats(struct htb_sched *q,
1270                                         struct htb_class *cl)
1271 {
1272         struct htb_class *c;
1273         unsigned int i;
1274
1275         memset(&cl->bstats, 0, sizeof(cl->bstats));
1276
1277         for (i = 0; i < q->clhash.hashsize; i++) {
1278                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1279                         struct htb_class *p = c;
1280
1281                         while (p && p->level < cl->level)
1282                                 p = p->parent;
1283
1284                         if (p != cl)
1285                                 continue;
1286
1287                         cl->bstats.bytes += c->bstats_bias.bytes;
1288                         cl->bstats.packets += c->bstats_bias.packets;
1289                         if (c->level == 0) {
1290                                 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1291                                 cl->bstats.packets += c->leaf.q->bstats.packets;
1292                         }
1293                 }
1294         }
1295 }
1296
1297 static int
1298 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1299 {
1300         struct htb_class *cl = (struct htb_class *)arg;
1301         struct htb_sched *q = qdisc_priv(sch);
1302         struct gnet_stats_queue qs = {
1303                 .drops = cl->drops,
1304                 .overlimits = cl->overlimits,
1305         };
1306         __u32 qlen = 0;
1307
1308         if (!cl->level && cl->leaf.q)
1309                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1310
1311         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1312                                     INT_MIN, INT_MAX);
1313         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1314                                      INT_MIN, INT_MAX);
1315
1316         if (q->offload) {
1317                 if (!cl->level) {
1318                         if (cl->leaf.q)
1319                                 cl->bstats = cl->leaf.q->bstats;
1320                         else
1321                                 memset(&cl->bstats, 0, sizeof(cl->bstats));
1322                         cl->bstats.bytes += cl->bstats_bias.bytes;
1323                         cl->bstats.packets += cl->bstats_bias.packets;
1324                 } else {
1325                         htb_offload_aggregate_stats(q, cl);
1326                 }
1327         }
1328
1329         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1330                                   d, NULL, &cl->bstats) < 0 ||
1331             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1332             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1333                 return -1;
1334
1335         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1336 }
1337
1338 static struct netdev_queue *
1339 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1340 {
1341         struct net_device *dev = qdisc_dev(sch);
1342         struct tc_htb_qopt_offload offload_opt;
1343         int err;
1344
1345         offload_opt = (struct tc_htb_qopt_offload) {
1346                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1347                 .classid = TC_H_MIN(tcm->tcm_parent),
1348         };
1349         err = htb_offload(dev, &offload_opt);
1350         if (err || offload_opt.qid >= dev->num_tx_queues)
1351                 return NULL;
1352         return netdev_get_tx_queue(dev, offload_opt.qid);
1353 }
1354
1355 static struct Qdisc *
1356 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1357 {
1358         struct net_device *dev = dev_queue->dev;
1359         struct Qdisc *old_q;
1360
1361         if (dev->flags & IFF_UP)
1362                 dev_deactivate(dev);
1363         old_q = dev_graft_qdisc(dev_queue, new_q);
1364         if (new_q)
1365                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1366         if (dev->flags & IFF_UP)
1367                 dev_activate(dev);
1368
1369         return old_q;
1370 }
1371
1372 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1373 {
1374         struct netdev_queue *queue_old, *queue_new;
1375         struct net_device *dev = qdisc_dev(sch);
1376         struct Qdisc *qdisc;
1377
1378         queue_old = netdev_get_tx_queue(dev, qid_old);
1379         queue_new = netdev_get_tx_queue(dev, qid_new);
1380
1381         if (dev->flags & IFF_UP)
1382                 dev_deactivate(dev);
1383         qdisc = dev_graft_qdisc(queue_old, NULL);
1384         qdisc->dev_queue = queue_new;
1385         qdisc = dev_graft_qdisc(queue_new, qdisc);
1386         if (dev->flags & IFF_UP)
1387                 dev_activate(dev);
1388
1389         WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1390 }
1391
1392 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1393                      struct Qdisc **old, struct netlink_ext_ack *extack)
1394 {
1395         struct netdev_queue *dev_queue = sch->dev_queue;
1396         struct htb_class *cl = (struct htb_class *)arg;
1397         struct htb_sched *q = qdisc_priv(sch);
1398         struct Qdisc *old_q;
1399
1400         if (cl->level)
1401                 return -EINVAL;
1402
1403         if (q->offload) {
1404                 dev_queue = new->dev_queue;
1405                 WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1406         }
1407
1408         if (!new) {
1409                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1410                                         cl->common.classid, extack);
1411                 if (!new)
1412                         return -ENOBUFS;
1413         }
1414
1415         if (q->offload) {
1416                 htb_set_lockdep_class_child(new);
1417                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1418                 qdisc_refcount_inc(new);
1419                 old_q = htb_graft_helper(dev_queue, new);
1420         }
1421
1422         *old = qdisc_replace(sch, new, &cl->leaf.q);
1423
1424         if (q->offload) {
1425                 WARN_ON(old_q != *old);
1426                 qdisc_put(old_q);
1427         }
1428
1429         return 0;
1430 }
1431
1432 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1433 {
1434         struct htb_class *cl = (struct htb_class *)arg;
1435         return !cl->level ? cl->leaf.q : NULL;
1436 }
1437
1438 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1439 {
1440         struct htb_class *cl = (struct htb_class *)arg;
1441
1442         htb_deactivate(qdisc_priv(sch), cl);
1443 }
1444
1445 static inline int htb_parent_last_child(struct htb_class *cl)
1446 {
1447         if (!cl->parent)
1448                 /* the root class */
1449                 return 0;
1450         if (cl->parent->children > 1)
1451                 /* not the last child */
1452                 return 0;
1453         return 1;
1454 }
1455
1456 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1457                                struct Qdisc *new_q)
1458 {
1459         struct htb_sched *q = qdisc_priv(sch);
1460         struct htb_class *parent = cl->parent;
1461
1462         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1463
1464         if (parent->cmode != HTB_CAN_SEND)
1465                 htb_safe_rb_erase(&parent->pq_node,
1466                                   &q->hlevel[parent->level].wait_pq);
1467
1468         parent->level = 0;
1469         memset(&parent->inner, 0, sizeof(parent->inner));
1470         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1471         parent->tokens = parent->buffer;
1472         parent->ctokens = parent->cbuffer;
1473         parent->t_c = ktime_get_ns();
1474         parent->cmode = HTB_CAN_SEND;
1475 }
1476
1477 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1478                                        struct netdev_queue *dev_queue,
1479                                        struct Qdisc *new_q)
1480 {
1481         struct Qdisc *old_q;
1482
1483         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1484         qdisc_refcount_inc(new_q);
1485         old_q = htb_graft_helper(dev_queue, new_q);
1486         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1487 }
1488
1489 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1490                                      bool last_child, bool destroying,
1491                                      struct netlink_ext_ack *extack)
1492 {
1493         struct tc_htb_qopt_offload offload_opt;
1494         struct Qdisc *q = cl->leaf.q;
1495         struct Qdisc *old = NULL;
1496         int err;
1497
1498         if (cl->level)
1499                 return -EINVAL;
1500
1501         WARN_ON(!q);
1502         if (!destroying) {
1503                 /* On destroy of HTB, two cases are possible:
1504                  * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1505                  * 2. q is a noop qdisc (for nodes that were inner),
1506                  *    q->dev_queue is noop_netdev_queue.
1507                  */
1508                 old = htb_graft_helper(q->dev_queue, NULL);
1509                 WARN_ON(!old);
1510                 WARN_ON(old != q);
1511         }
1512
1513         if (cl->parent) {
1514                 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1515                 cl->parent->bstats_bias.packets += q->bstats.packets;
1516         }
1517
1518         offload_opt = (struct tc_htb_qopt_offload) {
1519                 .command = !last_child ? TC_HTB_LEAF_DEL :
1520                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1521                            TC_HTB_LEAF_DEL_LAST,
1522                 .classid = cl->common.classid,
1523                 .extack = extack,
1524         };
1525         err = htb_offload(qdisc_dev(sch), &offload_opt);
1526
1527         if (!err || destroying)
1528                 qdisc_put(old);
1529         else
1530                 htb_graft_helper(q->dev_queue, old);
1531
1532         if (last_child)
1533                 return err;
1534
1535         if (!err && offload_opt.moved_qid != 0) {
1536                 if (destroying)
1537                         q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1538                                                            offload_opt.qid);
1539                 else
1540                         htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1541                                                offload_opt.qid);
1542         }
1543
1544         return err;
1545 }
1546
1547 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1548 {
1549         if (!cl->level) {
1550                 WARN_ON(!cl->leaf.q);
1551                 qdisc_put(cl->leaf.q);
1552         }
1553         gen_kill_estimator(&cl->rate_est);
1554         tcf_block_put(cl->block);
1555         kfree(cl);
1556 }
1557
1558 static void htb_destroy(struct Qdisc *sch)
1559 {
1560         struct net_device *dev = qdisc_dev(sch);
1561         struct tc_htb_qopt_offload offload_opt;
1562         struct htb_sched *q = qdisc_priv(sch);
1563         struct hlist_node *next;
1564         bool nonempty, changed;
1565         struct htb_class *cl;
1566         unsigned int i;
1567
1568         cancel_work_sync(&q->work);
1569         qdisc_watchdog_cancel(&q->watchdog);
1570         /* This line used to be after htb_destroy_class call below
1571          * and surprisingly it worked in 2.4. But it must precede it
1572          * because filter need its target class alive to be able to call
1573          * unbind_filter on it (without Oops).
1574          */
1575         tcf_block_put(q->block);
1576
1577         for (i = 0; i < q->clhash.hashsize; i++) {
1578                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1579                         tcf_block_put(cl->block);
1580                         cl->block = NULL;
1581                 }
1582         }
1583
1584         do {
1585                 nonempty = false;
1586                 changed = false;
1587                 for (i = 0; i < q->clhash.hashsize; i++) {
1588                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1589                                                   common.hnode) {
1590                                 bool last_child;
1591
1592                                 if (!q->offload) {
1593                                         htb_destroy_class(sch, cl);
1594                                         continue;
1595                                 }
1596
1597                                 nonempty = true;
1598
1599                                 if (cl->level)
1600                                         continue;
1601
1602                                 changed = true;
1603
1604                                 last_child = htb_parent_last_child(cl);
1605                                 htb_destroy_class_offload(sch, cl, last_child,
1606                                                           true, NULL);
1607                                 qdisc_class_hash_remove(&q->clhash,
1608                                                         &cl->common);
1609                                 if (cl->parent)
1610                                         cl->parent->children--;
1611                                 if (last_child)
1612                                         htb_parent_to_leaf(sch, cl, NULL);
1613                                 htb_destroy_class(sch, cl);
1614                         }
1615                 }
1616         } while (changed);
1617         WARN_ON(nonempty);
1618
1619         qdisc_class_hash_destroy(&q->clhash);
1620         __qdisc_reset_queue(&q->direct_queue);
1621
1622         if (!q->offload)
1623                 return;
1624
1625         offload_opt = (struct tc_htb_qopt_offload) {
1626                 .command = TC_HTB_DESTROY,
1627         };
1628         htb_offload(dev, &offload_opt);
1629
1630         if (!q->direct_qdiscs)
1631                 return;
1632         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1633                 qdisc_put(q->direct_qdiscs[i]);
1634         kfree(q->direct_qdiscs);
1635 }
1636
1637 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1638                       struct netlink_ext_ack *extack)
1639 {
1640         struct htb_sched *q = qdisc_priv(sch);
1641         struct htb_class *cl = (struct htb_class *)arg;
1642         struct Qdisc *new_q = NULL;
1643         int last_child = 0;
1644         int err;
1645
1646         /* TODO: why don't allow to delete subtree ? references ? does
1647          * tc subsys guarantee us that in htb_destroy it holds no class
1648          * refs so that we can remove children safely there ?
1649          */
1650         if (cl->children || cl->filter_cnt)
1651                 return -EBUSY;
1652
1653         if (!cl->level && htb_parent_last_child(cl))
1654                 last_child = 1;
1655
1656         if (q->offload) {
1657                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1658                                                 extack);
1659                 if (err)
1660                         return err;
1661         }
1662
1663         if (last_child) {
1664                 struct netdev_queue *dev_queue;
1665
1666                 dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1667                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1668                                           cl->parent->common.classid,
1669                                           NULL);
1670                 if (q->offload) {
1671                         if (new_q)
1672                                 htb_set_lockdep_class_child(new_q);
1673                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1674                 }
1675         }
1676
1677         sch_tree_lock(sch);
1678
1679         if (!cl->level)
1680                 qdisc_purge_queue(cl->leaf.q);
1681
1682         /* delete from hash and active; remainder in destroy_class */
1683         qdisc_class_hash_remove(&q->clhash, &cl->common);
1684         if (cl->parent)
1685                 cl->parent->children--;
1686
1687         if (cl->prio_activity)
1688                 htb_deactivate(q, cl);
1689
1690         if (cl->cmode != HTB_CAN_SEND)
1691                 htb_safe_rb_erase(&cl->pq_node,
1692                                   &q->hlevel[cl->level].wait_pq);
1693
1694         if (last_child)
1695                 htb_parent_to_leaf(sch, cl, new_q);
1696
1697         sch_tree_unlock(sch);
1698
1699         htb_destroy_class(sch, cl);
1700         return 0;
1701 }
1702
1703 static int htb_change_class(struct Qdisc *sch, u32 classid,
1704                             u32 parentid, struct nlattr **tca,
1705                             unsigned long *arg, struct netlink_ext_ack *extack)
1706 {
1707         int err = -EINVAL;
1708         struct htb_sched *q = qdisc_priv(sch);
1709         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1710         struct tc_htb_qopt_offload offload_opt;
1711         struct nlattr *opt = tca[TCA_OPTIONS];
1712         struct nlattr *tb[TCA_HTB_MAX + 1];
1713         struct Qdisc *parent_qdisc = NULL;
1714         struct netdev_queue *dev_queue;
1715         struct tc_htb_opt *hopt;
1716         u64 rate64, ceil64;
1717         int warn = 0;
1718
1719         /* extract all subattrs from opt attr */
1720         if (!opt)
1721                 goto failure;
1722
1723         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1724                                           NULL);
1725         if (err < 0)
1726                 goto failure;
1727
1728         err = -EINVAL;
1729         if (tb[TCA_HTB_PARMS] == NULL)
1730                 goto failure;
1731
1732         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1733
1734         hopt = nla_data(tb[TCA_HTB_PARMS]);
1735         if (!hopt->rate.rate || !hopt->ceil.rate)
1736                 goto failure;
1737
1738         /* Keeping backward compatible with rate_table based iproute2 tc */
1739         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1740                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1741                                               NULL));
1742
1743         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1744                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1745                                               NULL));
1746
1747         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1748         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1749
1750         if (!cl) {              /* new class */
1751                 struct net_device *dev = qdisc_dev(sch);
1752                 struct Qdisc *new_q, *old_q;
1753                 int prio;
1754                 struct {
1755                         struct nlattr           nla;
1756                         struct gnet_estimator   opt;
1757                 } est = {
1758                         .nla = {
1759                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1760                                 .nla_type       = TCA_RATE,
1761                         },
1762                         .opt = {
1763                                 /* 4s interval, 16s averaging constant */
1764                                 .interval       = 2,
1765                                 .ewma_log       = 2,
1766                         },
1767                 };
1768
1769                 /* check for valid classid */
1770                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1771                     htb_find(classid, sch))
1772                         goto failure;
1773
1774                 /* check maximal depth */
1775                 if (parent && parent->parent && parent->parent->level < 2) {
1776                         pr_err("htb: tree is too deep\n");
1777                         goto failure;
1778                 }
1779                 err = -ENOBUFS;
1780                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1781                 if (!cl)
1782                         goto failure;
1783
1784                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1785                 if (err) {
1786                         kfree(cl);
1787                         goto failure;
1788                 }
1789                 if (htb_rate_est || tca[TCA_RATE]) {
1790                         err = gen_new_estimator(&cl->bstats, NULL,
1791                                                 &cl->rate_est,
1792                                                 NULL,
1793                                                 qdisc_root_sleeping_running(sch),
1794                                                 tca[TCA_RATE] ? : &est.nla);
1795                         if (err)
1796                                 goto err_block_put;
1797                 }
1798
1799                 cl->children = 0;
1800                 RB_CLEAR_NODE(&cl->pq_node);
1801
1802                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1803                         RB_CLEAR_NODE(&cl->node[prio]);
1804
1805                 cl->common.classid = classid;
1806
1807                 /* Make sure nothing interrupts us in between of two
1808                  * ndo_setup_tc calls.
1809                  */
1810                 ASSERT_RTNL();
1811
1812                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1813                  * so that can't be used inside of sch_tree_lock
1814                  * -- thanks to Karlis Peisenieks
1815                  */
1816                 if (!q->offload) {
1817                         dev_queue = sch->dev_queue;
1818                 } else if (!(parent && !parent->level)) {
1819                         /* Assign a dev_queue to this classid. */
1820                         offload_opt = (struct tc_htb_qopt_offload) {
1821                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1822                                 .classid = cl->common.classid,
1823                                 .parent_classid = parent ?
1824                                         TC_H_MIN(parent->common.classid) :
1825                                         TC_HTB_CLASSID_ROOT,
1826                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1827                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1828                                 .extack = extack,
1829                         };
1830                         err = htb_offload(dev, &offload_opt);
1831                         if (err) {
1832                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1833                                        err);
1834                                 goto err_kill_estimator;
1835                         }
1836                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1837                 } else { /* First child. */
1838                         dev_queue = parent->leaf.q->dev_queue;
1839                         old_q = htb_graft_helper(dev_queue, NULL);
1840                         WARN_ON(old_q != parent->leaf.q);
1841                         offload_opt = (struct tc_htb_qopt_offload) {
1842                                 .command = TC_HTB_LEAF_TO_INNER,
1843                                 .classid = cl->common.classid,
1844                                 .parent_classid =
1845                                         TC_H_MIN(parent->common.classid),
1846                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1847                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1848                                 .extack = extack,
1849                         };
1850                         err = htb_offload(dev, &offload_opt);
1851                         if (err) {
1852                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1853                                        err);
1854                                 htb_graft_helper(dev_queue, old_q);
1855                                 goto err_kill_estimator;
1856                         }
1857                         parent->bstats_bias.bytes += old_q->bstats.bytes;
1858                         parent->bstats_bias.packets += old_q->bstats.packets;
1859                         qdisc_put(old_q);
1860                 }
1861                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1862                                           classid, NULL);
1863                 if (q->offload) {
1864                         if (new_q) {
1865                                 htb_set_lockdep_class_child(new_q);
1866                                 /* One ref for cl->leaf.q, the other for
1867                                  * dev_queue->qdisc.
1868                                  */
1869                                 qdisc_refcount_inc(new_q);
1870                         }
1871                         old_q = htb_graft_helper(dev_queue, new_q);
1872                         /* No qdisc_put needed. */
1873                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1874                 }
1875                 sch_tree_lock(sch);
1876                 if (parent && !parent->level) {
1877                         /* turn parent into inner node */
1878                         qdisc_purge_queue(parent->leaf.q);
1879                         parent_qdisc = parent->leaf.q;
1880                         if (parent->prio_activity)
1881                                 htb_deactivate(q, parent);
1882
1883                         /* remove from evt list because of level change */
1884                         if (parent->cmode != HTB_CAN_SEND) {
1885                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1886                                 parent->cmode = HTB_CAN_SEND;
1887                         }
1888                         parent->level = (parent->parent ? parent->parent->level
1889                                          : TC_HTB_MAXDEPTH) - 1;
1890                         memset(&parent->inner, 0, sizeof(parent->inner));
1891                 }
1892
1893                 /* leaf (we) needs elementary qdisc */
1894                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1895
1896                 cl->parent = parent;
1897
1898                 /* set class to be in HTB_CAN_SEND state */
1899                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1900                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1901                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1902                 cl->t_c = ktime_get_ns();
1903                 cl->cmode = HTB_CAN_SEND;
1904
1905                 /* attach to the hash list and parent's family */
1906                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1907                 if (parent)
1908                         parent->children++;
1909                 if (cl->leaf.q != &noop_qdisc)
1910                         qdisc_hash_add(cl->leaf.q, true);
1911         } else {
1912                 if (tca[TCA_RATE]) {
1913                         err = gen_replace_estimator(&cl->bstats, NULL,
1914                                                     &cl->rate_est,
1915                                                     NULL,
1916                                                     qdisc_root_sleeping_running(sch),
1917                                                     tca[TCA_RATE]);
1918                         if (err)
1919                                 return err;
1920                 }
1921
1922                 if (q->offload) {
1923                         struct net_device *dev = qdisc_dev(sch);
1924
1925                         offload_opt = (struct tc_htb_qopt_offload) {
1926                                 .command = TC_HTB_NODE_MODIFY,
1927                                 .classid = cl->common.classid,
1928                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1929                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1930                                 .extack = extack,
1931                         };
1932                         err = htb_offload(dev, &offload_opt);
1933                         if (err)
1934                                 /* Estimator was replaced, and rollback may fail
1935                                  * as well, so we don't try to recover it, and
1936                                  * the estimator won't work property with the
1937                                  * offload anyway, because bstats are updated
1938                                  * only when the stats are queried.
1939                                  */
1940                                 return err;
1941                 }
1942
1943                 sch_tree_lock(sch);
1944         }
1945
1946         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1947         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1948
1949         /* it used to be a nasty bug here, we have to check that node
1950          * is really leaf before changing cl->leaf !
1951          */
1952         if (!cl->level) {
1953                 u64 quantum = cl->rate.rate_bytes_ps;
1954
1955                 do_div(quantum, q->rate2quantum);
1956                 cl->quantum = min_t(u64, quantum, INT_MAX);
1957
1958                 if (!hopt->quantum && cl->quantum < 1000) {
1959                         warn = -1;
1960                         cl->quantum = 1000;
1961                 }
1962                 if (!hopt->quantum && cl->quantum > 200000) {
1963                         warn = 1;
1964                         cl->quantum = 200000;
1965                 }
1966                 if (hopt->quantum)
1967                         cl->quantum = hopt->quantum;
1968                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1969                         cl->prio = TC_HTB_NUMPRIO - 1;
1970         }
1971
1972         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1973         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1974
1975         sch_tree_unlock(sch);
1976         qdisc_put(parent_qdisc);
1977
1978         if (warn)
1979                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1980                             cl->common.classid, (warn == -1 ? "small" : "big"));
1981
1982         qdisc_class_hash_grow(sch, &q->clhash);
1983
1984         *arg = (unsigned long)cl;
1985         return 0;
1986
1987 err_kill_estimator:
1988         gen_kill_estimator(&cl->rate_est);
1989 err_block_put:
1990         tcf_block_put(cl->block);
1991         kfree(cl);
1992 failure:
1993         return err;
1994 }
1995
1996 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1997                                        struct netlink_ext_ack *extack)
1998 {
1999         struct htb_sched *q = qdisc_priv(sch);
2000         struct htb_class *cl = (struct htb_class *)arg;
2001
2002         return cl ? cl->block : q->block;
2003 }
2004
2005 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2006                                      u32 classid)
2007 {
2008         struct htb_class *cl = htb_find(classid, sch);
2009
2010         /*if (cl && !cl->level) return 0;
2011          * The line above used to be there to prevent attaching filters to
2012          * leaves. But at least tc_index filter uses this just to get class
2013          * for other reasons so that we have to allow for it.
2014          * ----
2015          * 19.6.2002 As Werner explained it is ok - bind filter is just
2016          * another way to "lock" the class - unlike "get" this lock can
2017          * be broken by class during destroy IIUC.
2018          */
2019         if (cl)
2020                 cl->filter_cnt++;
2021         return (unsigned long)cl;
2022 }
2023
2024 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2025 {
2026         struct htb_class *cl = (struct htb_class *)arg;
2027
2028         if (cl)
2029                 cl->filter_cnt--;
2030 }
2031
2032 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2033 {
2034         struct htb_sched *q = qdisc_priv(sch);
2035         struct htb_class *cl;
2036         unsigned int i;
2037
2038         if (arg->stop)
2039                 return;
2040
2041         for (i = 0; i < q->clhash.hashsize; i++) {
2042                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2043                         if (arg->count < arg->skip) {
2044                                 arg->count++;
2045                                 continue;
2046                         }
2047                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2048                                 arg->stop = 1;
2049                                 return;
2050                         }
2051                         arg->count++;
2052                 }
2053         }
2054 }
2055
2056 static const struct Qdisc_class_ops htb_class_ops = {
2057         .select_queue   =       htb_select_queue,
2058         .graft          =       htb_graft,
2059         .leaf           =       htb_leaf,
2060         .qlen_notify    =       htb_qlen_notify,
2061         .find           =       htb_search,
2062         .change         =       htb_change_class,
2063         .delete         =       htb_delete,
2064         .walk           =       htb_walk,
2065         .tcf_block      =       htb_tcf_block,
2066         .bind_tcf       =       htb_bind_filter,
2067         .unbind_tcf     =       htb_unbind_filter,
2068         .dump           =       htb_dump_class,
2069         .dump_stats     =       htb_dump_class_stats,
2070 };
2071
2072 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2073         .cl_ops         =       &htb_class_ops,
2074         .id             =       "htb",
2075         .priv_size      =       sizeof(struct htb_sched),
2076         .enqueue        =       htb_enqueue,
2077         .dequeue        =       htb_dequeue,
2078         .peek           =       qdisc_peek_dequeued,
2079         .init           =       htb_init,
2080         .attach         =       htb_attach,
2081         .reset          =       htb_reset,
2082         .destroy        =       htb_destroy,
2083         .dump           =       htb_dump,
2084         .owner          =       THIS_MODULE,
2085 };
2086
2087 static int __init htb_module_init(void)
2088 {
2089         return register_qdisc(&htb_qdisc_ops);
2090 }
2091 static void __exit htb_module_exit(void)
2092 {
2093         unregister_qdisc(&htb_qdisc_ops);
2094 }
2095
2096 module_init(htb_module_init)
2097 module_exit(htb_module_exit)
2098 MODULE_LICENSE("GPL");