nvme-multipath: support io stats on the mpath device
[linux-2.6-microblaze.git] / net / sched / sch_red.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c  Random Early Detection queue.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914: computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21
22
23 /*      Parameters, settable by user:
24         -----------------------------
25
26         limit           - bytes (must be > qth_max + burst)
27
28         Hard limit on queue length, should be chosen >qth_max
29         to allow packet bursts. This parameter does not
30         affect the algorithms behaviour and can be chosen
31         arbitrarily high (well, less than ram size)
32         Really, this limit will never be reached
33         if RED works correctly.
34  */
35
36 struct red_sched_data {
37         u32                     limit;          /* HARD maximal queue length */
38
39         unsigned char           flags;
40         /* Non-flags in tc_red_qopt.flags. */
41         unsigned char           userbits;
42
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49         struct tcf_qevent       qe_early_drop;
50         struct tcf_qevent       qe_mark;
51 };
52
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57         return q->flags & TC_RED_ECN;
58 }
59
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62         return q->flags & TC_RED_HARDDROP;
63 }
64
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67         return q->flags & TC_RED_NODROP;
68 }
69
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71                        struct sk_buff **to_free)
72 {
73         struct red_sched_data *q = qdisc_priv(sch);
74         struct Qdisc *child = q->qdisc;
75         int ret;
76
77         q->vars.qavg = red_calc_qavg(&q->parms,
78                                      &q->vars,
79                                      child->qstats.backlog);
80
81         if (red_is_idling(&q->vars))
82                 red_end_of_idle_period(&q->vars);
83
84         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
85         case RED_DONT_MARK:
86                 break;
87
88         case RED_PROB_MARK:
89                 qdisc_qstats_overlimit(sch);
90                 if (!red_use_ecn(q)) {
91                         q->stats.prob_drop++;
92                         goto congestion_drop;
93                 }
94
95                 if (INET_ECN_set_ce(skb)) {
96                         q->stats.prob_mark++;
97                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
98                         if (!skb)
99                                 return NET_XMIT_CN | ret;
100                 } else if (!red_use_nodrop(q)) {
101                         q->stats.prob_drop++;
102                         goto congestion_drop;
103                 }
104
105                 /* Non-ECT packet in ECN nodrop mode: queue it. */
106                 break;
107
108         case RED_HARD_MARK:
109                 qdisc_qstats_overlimit(sch);
110                 if (red_use_harddrop(q) || !red_use_ecn(q)) {
111                         q->stats.forced_drop++;
112                         goto congestion_drop;
113                 }
114
115                 if (INET_ECN_set_ce(skb)) {
116                         q->stats.forced_mark++;
117                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118                         if (!skb)
119                                 return NET_XMIT_CN | ret;
120                 } else if (!red_use_nodrop(q)) {
121                         q->stats.forced_drop++;
122                         goto congestion_drop;
123                 }
124
125                 /* Non-ECT packet in ECN nodrop mode: queue it. */
126                 break;
127         }
128
129         ret = qdisc_enqueue(skb, child, to_free);
130         if (likely(ret == NET_XMIT_SUCCESS)) {
131                 qdisc_qstats_backlog_inc(sch, skb);
132                 sch->q.qlen++;
133         } else if (net_xmit_drop_count(ret)) {
134                 q->stats.pdrop++;
135                 qdisc_qstats_drop(sch);
136         }
137         return ret;
138
139 congestion_drop:
140         skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141         if (!skb)
142                 return NET_XMIT_CN | ret;
143
144         qdisc_drop(skb, sch, to_free);
145         return NET_XMIT_CN;
146 }
147
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150         struct sk_buff *skb;
151         struct red_sched_data *q = qdisc_priv(sch);
152         struct Qdisc *child = q->qdisc;
153
154         skb = child->dequeue(child);
155         if (skb) {
156                 qdisc_bstats_update(sch, skb);
157                 qdisc_qstats_backlog_dec(sch, skb);
158                 sch->q.qlen--;
159         } else {
160                 if (!red_is_idling(&q->vars))
161                         red_start_of_idle_period(&q->vars);
162         }
163         return skb;
164 }
165
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168         struct red_sched_data *q = qdisc_priv(sch);
169         struct Qdisc *child = q->qdisc;
170
171         return child->ops->peek(child);
172 }
173
174 static void red_reset(struct Qdisc *sch)
175 {
176         struct red_sched_data *q = qdisc_priv(sch);
177
178         qdisc_reset(q->qdisc);
179         red_restart(&q->vars);
180 }
181
182 static int red_offload(struct Qdisc *sch, bool enable)
183 {
184         struct red_sched_data *q = qdisc_priv(sch);
185         struct net_device *dev = qdisc_dev(sch);
186         struct tc_red_qopt_offload opt = {
187                 .handle = sch->handle,
188                 .parent = sch->parent,
189         };
190
191         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
192                 return -EOPNOTSUPP;
193
194         if (enable) {
195                 opt.command = TC_RED_REPLACE;
196                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
197                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
198                 opt.set.probability = q->parms.max_P;
199                 opt.set.limit = q->limit;
200                 opt.set.is_ecn = red_use_ecn(q);
201                 opt.set.is_harddrop = red_use_harddrop(q);
202                 opt.set.is_nodrop = red_use_nodrop(q);
203                 opt.set.qstats = &sch->qstats;
204         } else {
205                 opt.command = TC_RED_DESTROY;
206         }
207
208         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
209 }
210
211 static void red_destroy(struct Qdisc *sch)
212 {
213         struct red_sched_data *q = qdisc_priv(sch);
214
215         tcf_qevent_destroy(&q->qe_mark, sch);
216         tcf_qevent_destroy(&q->qe_early_drop, sch);
217         del_timer_sync(&q->adapt_timer);
218         red_offload(sch, false);
219         qdisc_put(q->qdisc);
220 }
221
222 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
223         [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
224         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
225         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
226         [TCA_RED_MAX_P] = { .type = NLA_U32 },
227         [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
228         [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
229         [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
230 };
231
232 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
233                         struct netlink_ext_ack *extack)
234 {
235         struct Qdisc *old_child = NULL, *child = NULL;
236         struct red_sched_data *q = qdisc_priv(sch);
237         struct nla_bitfield32 flags_bf;
238         struct tc_red_qopt *ctl;
239         unsigned char userbits;
240         unsigned char flags;
241         int err;
242         u32 max_P;
243         u8 *stab;
244
245         if (tb[TCA_RED_PARMS] == NULL ||
246             tb[TCA_RED_STAB] == NULL)
247                 return -EINVAL;
248
249         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
250
251         ctl = nla_data(tb[TCA_RED_PARMS]);
252         stab = nla_data(tb[TCA_RED_STAB]);
253         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
254                               ctl->Scell_log, stab))
255                 return -EINVAL;
256
257         err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
258                             tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
259                             &flags_bf, &userbits, extack);
260         if (err)
261                 return err;
262
263         if (ctl->limit > 0) {
264                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
265                                          extack);
266                 if (IS_ERR(child))
267                         return PTR_ERR(child);
268
269                 /* child is fifo, no need to check for noop_qdisc */
270                 qdisc_hash_add(child, true);
271         }
272
273         sch_tree_lock(sch);
274
275         flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
276         err = red_validate_flags(flags, extack);
277         if (err)
278                 goto unlock_out;
279
280         q->flags = flags;
281         q->userbits = userbits;
282         q->limit = ctl->limit;
283         if (child) {
284                 qdisc_tree_flush_backlog(q->qdisc);
285                 old_child = q->qdisc;
286                 q->qdisc = child;
287         }
288
289         red_set_parms(&q->parms,
290                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
291                       ctl->Plog, ctl->Scell_log,
292                       stab,
293                       max_P);
294         red_set_vars(&q->vars);
295
296         del_timer(&q->adapt_timer);
297         if (ctl->flags & TC_RED_ADAPTATIVE)
298                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
299
300         if (!q->qdisc->q.qlen)
301                 red_start_of_idle_period(&q->vars);
302
303         sch_tree_unlock(sch);
304
305         red_offload(sch, true);
306
307         if (old_child)
308                 qdisc_put(old_child);
309         return 0;
310
311 unlock_out:
312         sch_tree_unlock(sch);
313         if (child)
314                 qdisc_put(child);
315         return err;
316 }
317
318 static inline void red_adaptative_timer(struct timer_list *t)
319 {
320         struct red_sched_data *q = from_timer(q, t, adapt_timer);
321         struct Qdisc *sch = q->sch;
322         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
323
324         spin_lock(root_lock);
325         red_adaptative_algo(&q->parms, &q->vars);
326         mod_timer(&q->adapt_timer, jiffies + HZ/2);
327         spin_unlock(root_lock);
328 }
329
330 static int red_init(struct Qdisc *sch, struct nlattr *opt,
331                     struct netlink_ext_ack *extack)
332 {
333         struct red_sched_data *q = qdisc_priv(sch);
334         struct nlattr *tb[TCA_RED_MAX + 1];
335         int err;
336
337         q->qdisc = &noop_qdisc;
338         q->sch = sch;
339         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
340
341         if (!opt)
342                 return -EINVAL;
343
344         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
345                                           extack);
346         if (err < 0)
347                 return err;
348
349         err = __red_change(sch, tb, extack);
350         if (err)
351                 return err;
352
353         err = tcf_qevent_init(&q->qe_early_drop, sch,
354                               FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
355                               tb[TCA_RED_EARLY_DROP_BLOCK], extack);
356         if (err)
357                 return err;
358
359         return tcf_qevent_init(&q->qe_mark, sch,
360                                FLOW_BLOCK_BINDER_TYPE_RED_MARK,
361                                tb[TCA_RED_MARK_BLOCK], extack);
362 }
363
364 static int red_change(struct Qdisc *sch, struct nlattr *opt,
365                       struct netlink_ext_ack *extack)
366 {
367         struct red_sched_data *q = qdisc_priv(sch);
368         struct nlattr *tb[TCA_RED_MAX + 1];
369         int err;
370
371         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
372                                           extack);
373         if (err < 0)
374                 return err;
375
376         err = tcf_qevent_validate_change(&q->qe_early_drop,
377                                          tb[TCA_RED_EARLY_DROP_BLOCK], extack);
378         if (err)
379                 return err;
380
381         err = tcf_qevent_validate_change(&q->qe_mark,
382                                          tb[TCA_RED_MARK_BLOCK], extack);
383         if (err)
384                 return err;
385
386         return __red_change(sch, tb, extack);
387 }
388
389 static int red_dump_offload_stats(struct Qdisc *sch)
390 {
391         struct tc_red_qopt_offload hw_stats = {
392                 .command = TC_RED_STATS,
393                 .handle = sch->handle,
394                 .parent = sch->parent,
395                 {
396                         .stats.bstats = &sch->bstats,
397                         .stats.qstats = &sch->qstats,
398                 },
399         };
400
401         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
402 }
403
404 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
405 {
406         struct red_sched_data *q = qdisc_priv(sch);
407         struct nlattr *opts = NULL;
408         struct tc_red_qopt opt = {
409                 .limit          = q->limit,
410                 .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
411                                   q->userbits,
412                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
413                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
414                 .Wlog           = q->parms.Wlog,
415                 .Plog           = q->parms.Plog,
416                 .Scell_log      = q->parms.Scell_log,
417         };
418         int err;
419
420         err = red_dump_offload_stats(sch);
421         if (err)
422                 goto nla_put_failure;
423
424         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
425         if (opts == NULL)
426                 goto nla_put_failure;
427         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
428             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
429             nla_put_bitfield32(skb, TCA_RED_FLAGS,
430                                q->flags, TC_RED_SUPPORTED_FLAGS) ||
431             tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
432             tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
433                 goto nla_put_failure;
434         return nla_nest_end(skb, opts);
435
436 nla_put_failure:
437         nla_nest_cancel(skb, opts);
438         return -EMSGSIZE;
439 }
440
441 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
442 {
443         struct red_sched_data *q = qdisc_priv(sch);
444         struct net_device *dev = qdisc_dev(sch);
445         struct tc_red_xstats st = {0};
446
447         if (sch->flags & TCQ_F_OFFLOADED) {
448                 struct tc_red_qopt_offload hw_stats_request = {
449                         .command = TC_RED_XSTATS,
450                         .handle = sch->handle,
451                         .parent = sch->parent,
452                         {
453                                 .xstats = &q->stats,
454                         },
455                 };
456                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
457                                               &hw_stats_request);
458         }
459         st.early = q->stats.prob_drop + q->stats.forced_drop;
460         st.pdrop = q->stats.pdrop;
461         st.marked = q->stats.prob_mark + q->stats.forced_mark;
462
463         return gnet_stats_copy_app(d, &st, sizeof(st));
464 }
465
466 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
467                           struct sk_buff *skb, struct tcmsg *tcm)
468 {
469         struct red_sched_data *q = qdisc_priv(sch);
470
471         tcm->tcm_handle |= TC_H_MIN(1);
472         tcm->tcm_info = q->qdisc->handle;
473         return 0;
474 }
475
476 static void red_graft_offload(struct Qdisc *sch,
477                               struct Qdisc *new, struct Qdisc *old,
478                               struct netlink_ext_ack *extack)
479 {
480         struct tc_red_qopt_offload graft_offload = {
481                 .handle         = sch->handle,
482                 .parent         = sch->parent,
483                 .child_handle   = new->handle,
484                 .command        = TC_RED_GRAFT,
485         };
486
487         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
488                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
489 }
490
491 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
492                      struct Qdisc **old, struct netlink_ext_ack *extack)
493 {
494         struct red_sched_data *q = qdisc_priv(sch);
495
496         if (new == NULL)
497                 new = &noop_qdisc;
498
499         *old = qdisc_replace(sch, new, &q->qdisc);
500
501         red_graft_offload(sch, new, *old, extack);
502         return 0;
503 }
504
505 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
506 {
507         struct red_sched_data *q = qdisc_priv(sch);
508         return q->qdisc;
509 }
510
511 static unsigned long red_find(struct Qdisc *sch, u32 classid)
512 {
513         return 1;
514 }
515
516 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
517 {
518         if (!walker->stop) {
519                 tc_qdisc_stats_dump(sch, 1, walker);
520         }
521 }
522
523 static const struct Qdisc_class_ops red_class_ops = {
524         .graft          =       red_graft,
525         .leaf           =       red_leaf,
526         .find           =       red_find,
527         .walk           =       red_walk,
528         .dump           =       red_dump_class,
529 };
530
531 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
532         .id             =       "red",
533         .priv_size      =       sizeof(struct red_sched_data),
534         .cl_ops         =       &red_class_ops,
535         .enqueue        =       red_enqueue,
536         .dequeue        =       red_dequeue,
537         .peek           =       red_peek,
538         .init           =       red_init,
539         .reset          =       red_reset,
540         .destroy        =       red_destroy,
541         .change         =       red_change,
542         .dump           =       red_dump,
543         .dump_stats     =       red_dump_stats,
544         .owner          =       THIS_MODULE,
545 };
546
547 static int __init red_module_init(void)
548 {
549         return register_qdisc(&red_qdisc_ops);
550 }
551
552 static void __exit red_module_exit(void)
553 {
554         unregister_qdisc(&red_qdisc_ops);
555 }
556
557 module_init(red_module_init)
558 module_exit(red_module_exit)
559
560 MODULE_LICENSE("GPL");