Merge tag 'printk-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek...
[linux-2.6-microblaze.git] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25
26
27 /*      Parameters, settable by user:
28         -----------------------------
29
30         limit           - bytes (must be > qth_max + burst)
31
32         Hard limit on queue length, should be chosen >qth_max
33         to allow packet bursts. This parameter does not
34         affect the algorithms behaviour and can be chosen
35         arbitrarily high (well, less than ram size)
36         Really, this limit will never be reached
37         if RED works correctly.
38  */
39
40 struct red_sched_data {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49 };
50
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53         return q->flags & TC_RED_ECN;
54 }
55
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58         return q->flags & TC_RED_HARDDROP;
59 }
60
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62                        struct sk_buff **to_free)
63 {
64         struct red_sched_data *q = qdisc_priv(sch);
65         struct Qdisc *child = q->qdisc;
66         int ret;
67
68         q->vars.qavg = red_calc_qavg(&q->parms,
69                                      &q->vars,
70                                      child->qstats.backlog);
71
72         if (red_is_idling(&q->vars))
73                 red_end_of_idle_period(&q->vars);
74
75         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76         case RED_DONT_MARK:
77                 break;
78
79         case RED_PROB_MARK:
80                 qdisc_qstats_overlimit(sch);
81                 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82                         q->stats.prob_drop++;
83                         goto congestion_drop;
84                 }
85
86                 q->stats.prob_mark++;
87                 break;
88
89         case RED_HARD_MARK:
90                 qdisc_qstats_overlimit(sch);
91                 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92                     !INET_ECN_set_ce(skb)) {
93                         q->stats.forced_drop++;
94                         goto congestion_drop;
95                 }
96
97                 q->stats.forced_mark++;
98                 break;
99         }
100
101         ret = qdisc_enqueue(skb, child, to_free);
102         if (likely(ret == NET_XMIT_SUCCESS)) {
103                 qdisc_qstats_backlog_inc(sch, skb);
104                 sch->q.qlen++;
105         } else if (net_xmit_drop_count(ret)) {
106                 q->stats.pdrop++;
107                 qdisc_qstats_drop(sch);
108         }
109         return ret;
110
111 congestion_drop:
112         qdisc_drop(skb, sch, to_free);
113         return NET_XMIT_CN;
114 }
115
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118         struct sk_buff *skb;
119         struct red_sched_data *q = qdisc_priv(sch);
120         struct Qdisc *child = q->qdisc;
121
122         skb = child->dequeue(child);
123         if (skb) {
124                 qdisc_bstats_update(sch, skb);
125                 qdisc_qstats_backlog_dec(sch, skb);
126                 sch->q.qlen--;
127         } else {
128                 if (!red_is_idling(&q->vars))
129                         red_start_of_idle_period(&q->vars);
130         }
131         return skb;
132 }
133
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136         struct red_sched_data *q = qdisc_priv(sch);
137         struct Qdisc *child = q->qdisc;
138
139         return child->ops->peek(child);
140 }
141
142 static void red_reset(struct Qdisc *sch)
143 {
144         struct red_sched_data *q = qdisc_priv(sch);
145
146         qdisc_reset(q->qdisc);
147         sch->qstats.backlog = 0;
148         sch->q.qlen = 0;
149         red_restart(&q->vars);
150 }
151
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154         struct red_sched_data *q = qdisc_priv(sch);
155         struct net_device *dev = qdisc_dev(sch);
156         struct tc_red_qopt_offload opt = {
157                 .handle = sch->handle,
158                 .parent = sch->parent,
159         };
160
161         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162                 return -EOPNOTSUPP;
163
164         if (enable) {
165                 opt.command = TC_RED_REPLACE;
166                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168                 opt.set.probability = q->parms.max_P;
169                 opt.set.limit = q->limit;
170                 opt.set.is_ecn = red_use_ecn(q);
171                 opt.set.is_harddrop = red_use_harddrop(q);
172                 opt.set.qstats = &sch->qstats;
173         } else {
174                 opt.command = TC_RED_DESTROY;
175         }
176
177         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
178 }
179
180 static void red_destroy(struct Qdisc *sch)
181 {
182         struct red_sched_data *q = qdisc_priv(sch);
183
184         del_timer_sync(&q->adapt_timer);
185         red_offload(sch, false);
186         qdisc_put(q->qdisc);
187 }
188
189 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
190         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
191         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
192         [TCA_RED_MAX_P] = { .type = NLA_U32 },
193 };
194
195 static int red_change(struct Qdisc *sch, struct nlattr *opt,
196                       struct netlink_ext_ack *extack)
197 {
198         struct Qdisc *old_child = NULL, *child = NULL;
199         struct red_sched_data *q = qdisc_priv(sch);
200         struct nlattr *tb[TCA_RED_MAX + 1];
201         struct tc_red_qopt *ctl;
202         int err;
203         u32 max_P;
204
205         if (opt == NULL)
206                 return -EINVAL;
207
208         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
209         if (err < 0)
210                 return err;
211
212         if (tb[TCA_RED_PARMS] == NULL ||
213             tb[TCA_RED_STAB] == NULL)
214                 return -EINVAL;
215
216         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
217
218         ctl = nla_data(tb[TCA_RED_PARMS]);
219         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
220                 return -EINVAL;
221
222         if (ctl->limit > 0) {
223                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
224                                          extack);
225                 if (IS_ERR(child))
226                         return PTR_ERR(child);
227
228                 /* child is fifo, no need to check for noop_qdisc */
229                 qdisc_hash_add(child, true);
230         }
231
232         sch_tree_lock(sch);
233         q->flags = ctl->flags;
234         q->limit = ctl->limit;
235         if (child) {
236                 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
237                                           q->qdisc->qstats.backlog);
238                 old_child = q->qdisc;
239                 q->qdisc = child;
240         }
241
242         red_set_parms(&q->parms,
243                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
244                       ctl->Plog, ctl->Scell_log,
245                       nla_data(tb[TCA_RED_STAB]),
246                       max_P);
247         red_set_vars(&q->vars);
248
249         del_timer(&q->adapt_timer);
250         if (ctl->flags & TC_RED_ADAPTATIVE)
251                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
252
253         if (!q->qdisc->q.qlen)
254                 red_start_of_idle_period(&q->vars);
255
256         sch_tree_unlock(sch);
257
258         red_offload(sch, true);
259
260         if (old_child)
261                 qdisc_put(old_child);
262         return 0;
263 }
264
265 static inline void red_adaptative_timer(struct timer_list *t)
266 {
267         struct red_sched_data *q = from_timer(q, t, adapt_timer);
268         struct Qdisc *sch = q->sch;
269         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
270
271         spin_lock(root_lock);
272         red_adaptative_algo(&q->parms, &q->vars);
273         mod_timer(&q->adapt_timer, jiffies + HZ/2);
274         spin_unlock(root_lock);
275 }
276
277 static int red_init(struct Qdisc *sch, struct nlattr *opt,
278                     struct netlink_ext_ack *extack)
279 {
280         struct red_sched_data *q = qdisc_priv(sch);
281
282         q->qdisc = &noop_qdisc;
283         q->sch = sch;
284         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
285         return red_change(sch, opt, extack);
286 }
287
288 static int red_dump_offload_stats(struct Qdisc *sch)
289 {
290         struct tc_red_qopt_offload hw_stats = {
291                 .command = TC_RED_STATS,
292                 .handle = sch->handle,
293                 .parent = sch->parent,
294                 {
295                         .stats.bstats = &sch->bstats,
296                         .stats.qstats = &sch->qstats,
297                 },
298         };
299
300         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
301 }
302
303 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
304 {
305         struct red_sched_data *q = qdisc_priv(sch);
306         struct nlattr *opts = NULL;
307         struct tc_red_qopt opt = {
308                 .limit          = q->limit,
309                 .flags          = q->flags,
310                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
311                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
312                 .Wlog           = q->parms.Wlog,
313                 .Plog           = q->parms.Plog,
314                 .Scell_log      = q->parms.Scell_log,
315         };
316         int err;
317
318         err = red_dump_offload_stats(sch);
319         if (err)
320                 goto nla_put_failure;
321
322         opts = nla_nest_start(skb, TCA_OPTIONS);
323         if (opts == NULL)
324                 goto nla_put_failure;
325         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
326             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
327                 goto nla_put_failure;
328         return nla_nest_end(skb, opts);
329
330 nla_put_failure:
331         nla_nest_cancel(skb, opts);
332         return -EMSGSIZE;
333 }
334
335 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
336 {
337         struct red_sched_data *q = qdisc_priv(sch);
338         struct net_device *dev = qdisc_dev(sch);
339         struct tc_red_xstats st = {0};
340
341         if (sch->flags & TCQ_F_OFFLOADED) {
342                 struct tc_red_qopt_offload hw_stats_request = {
343                         .command = TC_RED_XSTATS,
344                         .handle = sch->handle,
345                         .parent = sch->parent,
346                         {
347                                 .xstats = &q->stats,
348                         },
349                 };
350                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
351                                               &hw_stats_request);
352         }
353         st.early = q->stats.prob_drop + q->stats.forced_drop;
354         st.pdrop = q->stats.pdrop;
355         st.other = q->stats.other;
356         st.marked = q->stats.prob_mark + q->stats.forced_mark;
357
358         return gnet_stats_copy_app(d, &st, sizeof(st));
359 }
360
361 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
362                           struct sk_buff *skb, struct tcmsg *tcm)
363 {
364         struct red_sched_data *q = qdisc_priv(sch);
365
366         tcm->tcm_handle |= TC_H_MIN(1);
367         tcm->tcm_info = q->qdisc->handle;
368         return 0;
369 }
370
371 static void red_graft_offload(struct Qdisc *sch,
372                               struct Qdisc *new, struct Qdisc *old,
373                               struct netlink_ext_ack *extack)
374 {
375         struct tc_red_qopt_offload graft_offload = {
376                 .handle         = sch->handle,
377                 .parent         = sch->parent,
378                 .child_handle   = new->handle,
379                 .command        = TC_RED_GRAFT,
380         };
381
382         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
383                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
384 }
385
386 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
387                      struct Qdisc **old, struct netlink_ext_ack *extack)
388 {
389         struct red_sched_data *q = qdisc_priv(sch);
390
391         if (new == NULL)
392                 new = &noop_qdisc;
393
394         *old = qdisc_replace(sch, new, &q->qdisc);
395
396         red_graft_offload(sch, new, *old, extack);
397         return 0;
398 }
399
400 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
401 {
402         struct red_sched_data *q = qdisc_priv(sch);
403         return q->qdisc;
404 }
405
406 static unsigned long red_find(struct Qdisc *sch, u32 classid)
407 {
408         return 1;
409 }
410
411 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
412 {
413         if (!walker->stop) {
414                 if (walker->count >= walker->skip)
415                         if (walker->fn(sch, 1, walker) < 0) {
416                                 walker->stop = 1;
417                                 return;
418                         }
419                 walker->count++;
420         }
421 }
422
423 static const struct Qdisc_class_ops red_class_ops = {
424         .graft          =       red_graft,
425         .leaf           =       red_leaf,
426         .find           =       red_find,
427         .walk           =       red_walk,
428         .dump           =       red_dump_class,
429 };
430
431 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
432         .id             =       "red",
433         .priv_size      =       sizeof(struct red_sched_data),
434         .cl_ops         =       &red_class_ops,
435         .enqueue        =       red_enqueue,
436         .dequeue        =       red_dequeue,
437         .peek           =       red_peek,
438         .init           =       red_init,
439         .reset          =       red_reset,
440         .destroy        =       red_destroy,
441         .change         =       red_change,
442         .dump           =       red_dump,
443         .dump_stats     =       red_dump_stats,
444         .owner          =       THIS_MODULE,
445 };
446
447 static int __init red_module_init(void)
448 {
449         return register_qdisc(&red_qdisc_ops);
450 }
451
452 static void __exit red_module_exit(void)
453 {
454         unregister_qdisc(&red_qdisc_ops);
455 }
456
457 module_init(red_module_init)
458 module_exit(red_module_exit)
459
460 MODULE_LICENSE("GPL");