Merge tag 'docs-5.15' of git://git.lwn.net/linux
[linux-2.6-microblaze.git] / net / sched / sch_red.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c  Random Early Detection queue.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914: computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21
22
23 /*      Parameters, settable by user:
24         -----------------------------
25
26         limit           - bytes (must be > qth_max + burst)
27
28         Hard limit on queue length, should be chosen >qth_max
29         to allow packet bursts. This parameter does not
30         affect the algorithms behaviour and can be chosen
31         arbitrarily high (well, less than ram size)
32         Really, this limit will never be reached
33         if RED works correctly.
34  */
35
36 struct red_sched_data {
37         u32                     limit;          /* HARD maximal queue length */
38
39         unsigned char           flags;
40         /* Non-flags in tc_red_qopt.flags. */
41         unsigned char           userbits;
42
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49         struct tcf_qevent       qe_early_drop;
50         struct tcf_qevent       qe_mark;
51 };
52
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57         return q->flags & TC_RED_ECN;
58 }
59
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62         return q->flags & TC_RED_HARDDROP;
63 }
64
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67         return q->flags & TC_RED_NODROP;
68 }
69
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71                        struct sk_buff **to_free)
72 {
73         struct red_sched_data *q = qdisc_priv(sch);
74         struct Qdisc *child = q->qdisc;
75         int ret;
76
77         q->vars.qavg = red_calc_qavg(&q->parms,
78                                      &q->vars,
79                                      child->qstats.backlog);
80
81         if (red_is_idling(&q->vars))
82                 red_end_of_idle_period(&q->vars);
83
84         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85         case RED_DONT_MARK:
86                 break;
87
88         case RED_PROB_MARK:
89                 qdisc_qstats_overlimit(sch);
90                 if (!red_use_ecn(q)) {
91                         q->stats.prob_drop++;
92                         goto congestion_drop;
93                 }
94
95                 if (INET_ECN_set_ce(skb)) {
96                         q->stats.prob_mark++;
97                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98                         if (!skb)
99                                 return NET_XMIT_CN | ret;
100                 } else if (!red_use_nodrop(q)) {
101                         q->stats.prob_drop++;
102                         goto congestion_drop;
103                 }
104
105                 /* Non-ECT packet in ECN nodrop mode: queue it. */
106                 break;
107
108         case RED_HARD_MARK:
109                 qdisc_qstats_overlimit(sch);
110                 if (red_use_harddrop(q) || !red_use_ecn(q)) {
111                         q->stats.forced_drop++;
112                         goto congestion_drop;
113                 }
114
115                 if (INET_ECN_set_ce(skb)) {
116                         q->stats.forced_mark++;
117                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118                         if (!skb)
119                                 return NET_XMIT_CN | ret;
120                 } else if (!red_use_nodrop(q)) {
121                         q->stats.forced_drop++;
122                         goto congestion_drop;
123                 }
124
125                 /* Non-ECT packet in ECN nodrop mode: queue it. */
126                 break;
127         }
128
129         ret = qdisc_enqueue(skb, child, to_free);
130         if (likely(ret == NET_XMIT_SUCCESS)) {
131                 qdisc_qstats_backlog_inc(sch, skb);
132                 sch->q.qlen++;
133         } else if (net_xmit_drop_count(ret)) {
134                 q->stats.pdrop++;
135                 qdisc_qstats_drop(sch);
136         }
137         return ret;
138
139 congestion_drop:
140         skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141         if (!skb)
142                 return NET_XMIT_CN | ret;
143
144         qdisc_drop(skb, sch, to_free);
145         return NET_XMIT_CN;
146 }
147
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150         struct sk_buff *skb;
151         struct red_sched_data *q = qdisc_priv(sch);
152         struct Qdisc *child = q->qdisc;
153
154         skb = child->dequeue(child);
155         if (skb) {
156                 qdisc_bstats_update(sch, skb);
157                 qdisc_qstats_backlog_dec(sch, skb);
158                 sch->q.qlen--;
159         } else {
160                 if (!red_is_idling(&q->vars))
161                         red_start_of_idle_period(&q->vars);
162         }
163         return skb;
164 }
165
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168         struct red_sched_data *q = qdisc_priv(sch);
169         struct Qdisc *child = q->qdisc;
170
171         return child->ops->peek(child);
172 }
173
174 static void red_reset(struct Qdisc *sch)
175 {
176         struct red_sched_data *q = qdisc_priv(sch);
177
178         qdisc_reset(q->qdisc);
179         sch->qstats.backlog = 0;
180         sch->q.qlen = 0;
181         red_restart(&q->vars);
182 }
183
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186         struct red_sched_data *q = qdisc_priv(sch);
187         struct net_device *dev = qdisc_dev(sch);
188         struct tc_red_qopt_offload opt = {
189                 .handle = sch->handle,
190                 .parent = sch->parent,
191         };
192
193         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194                 return -EOPNOTSUPP;
195
196         if (enable) {
197                 opt.command = TC_RED_REPLACE;
198                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200                 opt.set.probability = q->parms.max_P;
201                 opt.set.limit = q->limit;
202                 opt.set.is_ecn = red_use_ecn(q);
203                 opt.set.is_harddrop = red_use_harddrop(q);
204                 opt.set.is_nodrop = red_use_nodrop(q);
205                 opt.set.qstats = &sch->qstats;
206         } else {
207                 opt.command = TC_RED_DESTROY;
208         }
209
210         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212
213 static void red_destroy(struct Qdisc *sch)
214 {
215         struct red_sched_data *q = qdisc_priv(sch);
216
217         tcf_qevent_destroy(&q->qe_mark, sch);
218         tcf_qevent_destroy(&q->qe_early_drop, sch);
219         del_timer_sync(&q->adapt_timer);
220         red_offload(sch, false);
221         qdisc_put(q->qdisc);
222 }
223
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225         [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
227         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
228         [TCA_RED_MAX_P] = { .type = NLA_U32 },
229         [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230         [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231         [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235                         struct netlink_ext_ack *extack)
236 {
237         struct Qdisc *old_child = NULL, *child = NULL;
238         struct red_sched_data *q = qdisc_priv(sch);
239         struct nla_bitfield32 flags_bf;
240         struct tc_red_qopt *ctl;
241         unsigned char userbits;
242         unsigned char flags;
243         int err;
244         u32 max_P;
245         u8 *stab;
246
247         if (tb[TCA_RED_PARMS] == NULL ||
248             tb[TCA_RED_STAB] == NULL)
249                 return -EINVAL;
250
251         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
252
253         ctl = nla_data(tb[TCA_RED_PARMS]);
254         stab = nla_data(tb[TCA_RED_STAB]);
255         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
256                               ctl->Scell_log, stab))
257                 return -EINVAL;
258
259         err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
260                             tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
261                             &flags_bf, &userbits, extack);
262         if (err)
263                 return err;
264
265         if (ctl->limit > 0) {
266                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
267                                          extack);
268                 if (IS_ERR(child))
269                         return PTR_ERR(child);
270
271                 /* child is fifo, no need to check for noop_qdisc */
272                 qdisc_hash_add(child, true);
273         }
274
275         sch_tree_lock(sch);
276
277         flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
278         err = red_validate_flags(flags, extack);
279         if (err)
280                 goto unlock_out;
281
282         q->flags = flags;
283         q->userbits = userbits;
284         q->limit = ctl->limit;
285         if (child) {
286                 qdisc_tree_flush_backlog(q->qdisc);
287                 old_child = q->qdisc;
288                 q->qdisc = child;
289         }
290
291         red_set_parms(&q->parms,
292                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
293                       ctl->Plog, ctl->Scell_log,
294                       stab,
295                       max_P);
296         red_set_vars(&q->vars);
297
298         del_timer(&q->adapt_timer);
299         if (ctl->flags & TC_RED_ADAPTATIVE)
300                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
301
302         if (!q->qdisc->q.qlen)
303                 red_start_of_idle_period(&q->vars);
304
305         sch_tree_unlock(sch);
306
307         red_offload(sch, true);
308
309         if (old_child)
310                 qdisc_put(old_child);
311         return 0;
312
313 unlock_out:
314         sch_tree_unlock(sch);
315         if (child)
316                 qdisc_put(child);
317         return err;
318 }
319
320 static inline void red_adaptative_timer(struct timer_list *t)
321 {
322         struct red_sched_data *q = from_timer(q, t, adapt_timer);
323         struct Qdisc *sch = q->sch;
324         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
325
326         spin_lock(root_lock);
327         red_adaptative_algo(&q->parms, &q->vars);
328         mod_timer(&q->adapt_timer, jiffies + HZ/2);
329         spin_unlock(root_lock);
330 }
331
332 static int red_init(struct Qdisc *sch, struct nlattr *opt,
333                     struct netlink_ext_ack *extack)
334 {
335         struct red_sched_data *q = qdisc_priv(sch);
336         struct nlattr *tb[TCA_RED_MAX + 1];
337         int err;
338
339         q->qdisc = &noop_qdisc;
340         q->sch = sch;
341         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
342
343         if (!opt)
344                 return -EINVAL;
345
346         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
347                                           extack);
348         if (err < 0)
349                 return err;
350
351         err = __red_change(sch, tb, extack);
352         if (err)
353                 return err;
354
355         err = tcf_qevent_init(&q->qe_early_drop, sch,
356                               FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
357                               tb[TCA_RED_EARLY_DROP_BLOCK], extack);
358         if (err)
359                 return err;
360
361         return tcf_qevent_init(&q->qe_mark, sch,
362                                FLOW_BLOCK_BINDER_TYPE_RED_MARK,
363                                tb[TCA_RED_MARK_BLOCK], extack);
364 }
365
366 static int red_change(struct Qdisc *sch, struct nlattr *opt,
367                       struct netlink_ext_ack *extack)
368 {
369         struct red_sched_data *q = qdisc_priv(sch);
370         struct nlattr *tb[TCA_RED_MAX + 1];
371         int err;
372
373         if (!opt)
374                 return -EINVAL;
375
376         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
377                                           extack);
378         if (err < 0)
379                 return err;
380
381         err = tcf_qevent_validate_change(&q->qe_early_drop,
382                                          tb[TCA_RED_EARLY_DROP_BLOCK], extack);
383         if (err)
384                 return err;
385
386         err = tcf_qevent_validate_change(&q->qe_mark,
387                                          tb[TCA_RED_MARK_BLOCK], extack);
388         if (err)
389                 return err;
390
391         return __red_change(sch, tb, extack);
392 }
393
394 static int red_dump_offload_stats(struct Qdisc *sch)
395 {
396         struct tc_red_qopt_offload hw_stats = {
397                 .command = TC_RED_STATS,
398                 .handle = sch->handle,
399                 .parent = sch->parent,
400                 {
401                         .stats.bstats = &sch->bstats,
402                         .stats.qstats = &sch->qstats,
403                 },
404         };
405
406         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
407 }
408
409 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
410 {
411         struct red_sched_data *q = qdisc_priv(sch);
412         struct nlattr *opts = NULL;
413         struct tc_red_qopt opt = {
414                 .limit          = q->limit,
415                 .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
416                                   q->userbits,
417                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
418                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
419                 .Wlog           = q->parms.Wlog,
420                 .Plog           = q->parms.Plog,
421                 .Scell_log      = q->parms.Scell_log,
422         };
423         int err;
424
425         err = red_dump_offload_stats(sch);
426         if (err)
427                 goto nla_put_failure;
428
429         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
430         if (opts == NULL)
431                 goto nla_put_failure;
432         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
433             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
434             nla_put_bitfield32(skb, TCA_RED_FLAGS,
435                                q->flags, TC_RED_SUPPORTED_FLAGS) ||
436             tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
437             tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
438                 goto nla_put_failure;
439         return nla_nest_end(skb, opts);
440
441 nla_put_failure:
442         nla_nest_cancel(skb, opts);
443         return -EMSGSIZE;
444 }
445
446 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
447 {
448         struct red_sched_data *q = qdisc_priv(sch);
449         struct net_device *dev = qdisc_dev(sch);
450         struct tc_red_xstats st = {0};
451
452         if (sch->flags & TCQ_F_OFFLOADED) {
453                 struct tc_red_qopt_offload hw_stats_request = {
454                         .command = TC_RED_XSTATS,
455                         .handle = sch->handle,
456                         .parent = sch->parent,
457                         {
458                                 .xstats = &q->stats,
459                         },
460                 };
461                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
462                                               &hw_stats_request);
463         }
464         st.early = q->stats.prob_drop + q->stats.forced_drop;
465         st.pdrop = q->stats.pdrop;
466         st.other = q->stats.other;
467         st.marked = q->stats.prob_mark + q->stats.forced_mark;
468
469         return gnet_stats_copy_app(d, &st, sizeof(st));
470 }
471
472 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
473                           struct sk_buff *skb, struct tcmsg *tcm)
474 {
475         struct red_sched_data *q = qdisc_priv(sch);
476
477         tcm->tcm_handle |= TC_H_MIN(1);
478         tcm->tcm_info = q->qdisc->handle;
479         return 0;
480 }
481
482 static void red_graft_offload(struct Qdisc *sch,
483                               struct Qdisc *new, struct Qdisc *old,
484                               struct netlink_ext_ack *extack)
485 {
486         struct tc_red_qopt_offload graft_offload = {
487                 .handle         = sch->handle,
488                 .parent         = sch->parent,
489                 .child_handle   = new->handle,
490                 .command        = TC_RED_GRAFT,
491         };
492
493         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
494                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
495 }
496
497 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
498                      struct Qdisc **old, struct netlink_ext_ack *extack)
499 {
500         struct red_sched_data *q = qdisc_priv(sch);
501
502         if (new == NULL)
503                 new = &noop_qdisc;
504
505         *old = qdisc_replace(sch, new, &q->qdisc);
506
507         red_graft_offload(sch, new, *old, extack);
508         return 0;
509 }
510
511 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
512 {
513         struct red_sched_data *q = qdisc_priv(sch);
514         return q->qdisc;
515 }
516
517 static unsigned long red_find(struct Qdisc *sch, u32 classid)
518 {
519         return 1;
520 }
521
522 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
523 {
524         if (!walker->stop) {
525                 if (walker->count >= walker->skip)
526                         if (walker->fn(sch, 1, walker) < 0) {
527                                 walker->stop = 1;
528                                 return;
529                         }
530                 walker->count++;
531         }
532 }
533
534 static const struct Qdisc_class_ops red_class_ops = {
535         .graft          =       red_graft,
536         .leaf           =       red_leaf,
537         .find           =       red_find,
538         .walk           =       red_walk,
539         .dump           =       red_dump_class,
540 };
541
542 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
543         .id             =       "red",
544         .priv_size      =       sizeof(struct red_sched_data),
545         .cl_ops         =       &red_class_ops,
546         .enqueue        =       red_enqueue,
547         .dequeue        =       red_dequeue,
548         .peek           =       red_peek,
549         .init           =       red_init,
550         .reset          =       red_reset,
551         .destroy        =       red_destroy,
552         .change         =       red_change,
553         .dump           =       red_dump,
554         .dump_stats     =       red_dump_stats,
555         .owner          =       THIS_MODULE,
556 };
557
558 static int __init red_module_init(void)
559 {
560         return register_qdisc(&red_qdisc_ops);
561 }
562
563 static void __exit red_module_exit(void)
564 {
565         unregister_qdisc(&red_qdisc_ops);
566 }
567
568 module_init(red_module_init)
569 module_exit(red_module_exit)
570
571 MODULE_LICENSE("GPL");