2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu *ht[256];
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
95 struct tcf_result res;
99 struct rsvp_session *sess;
101 struct work_struct work;
106 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
108 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
112 return (h ^ protocol ^ tunnelid) & 0xFF;
115 static inline unsigned int hash_src(__be32 *src)
117 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
125 #define RSVP_APPLY_RESULT() \
127 int r = tcf_exts_exec(skb, &f->exts, res); \
134 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
135 struct tcf_result *res)
137 struct rsvp_head *head = rcu_dereference_bh(tp->root);
138 struct rsvp_session *s;
139 struct rsvp_filter *f;
145 #if RSVP_DST_LEN == 4
146 struct ipv6hdr *nhptr;
148 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
150 nhptr = ipv6_hdr(skb);
154 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
160 #if RSVP_DST_LEN == 4
161 src = &nhptr->saddr.s6_addr32[0];
162 dst = &nhptr->daddr.s6_addr32[0];
163 protocol = nhptr->nexthdr;
164 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
168 protocol = nhptr->protocol;
169 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
170 if (ip_is_fragment(nhptr))
174 h1 = hash_dst(dst, protocol, tunnelid);
177 for (s = rcu_dereference_bh(head->ht[h1]); s;
178 s = rcu_dereference_bh(s->next)) {
179 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
180 protocol == s->protocol &&
182 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
183 #if RSVP_DST_LEN == 4
184 dst[0] == s->dst[0] &&
185 dst[1] == s->dst[1] &&
186 dst[2] == s->dst[2] &&
188 tunnelid == s->tunnelid) {
190 for (f = rcu_dereference_bh(s->ht[h2]); f;
191 f = rcu_dereference_bh(f->next)) {
192 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
193 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
194 #if RSVP_DST_LEN == 4
196 src[0] == f->src[0] &&
197 src[1] == f->src[1] &&
205 if (f->tunnelhdr == 0)
208 tunnelid = f->res.classid;
209 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
214 /* And wildcard bucket... */
215 for (f = rcu_dereference_bh(s->ht[16]); f;
216 f = rcu_dereference_bh(f->next)) {
227 static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
229 struct rsvp_head *head = rtnl_dereference(tp->root);
230 struct rsvp_session *s;
231 struct rsvp_filter __rcu **ins;
232 struct rsvp_filter *pins;
233 unsigned int h1 = h & 0xFF;
234 unsigned int h2 = (h >> 8) & 0xFF;
236 for (s = rtnl_dereference(head->ht[h1]); s;
237 s = rtnl_dereference(s->next)) {
238 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
239 ins = &pins->next, pins = rtnl_dereference(*ins)) {
240 if (pins->handle == h) {
241 RCU_INIT_POINTER(n->next, pins->next);
242 rcu_assign_pointer(*ins, n);
248 /* Something went wrong if we are trying to replace a non-existant
249 * node. Mind as well halt instead of silently failing.
254 static void *rsvp_get(struct tcf_proto *tp, u32 handle)
256 struct rsvp_head *head = rtnl_dereference(tp->root);
257 struct rsvp_session *s;
258 struct rsvp_filter *f;
259 unsigned int h1 = handle & 0xFF;
260 unsigned int h2 = (handle >> 8) & 0xFF;
265 for (s = rtnl_dereference(head->ht[h1]); s;
266 s = rtnl_dereference(s->next)) {
267 for (f = rtnl_dereference(s->ht[h2]); f;
268 f = rtnl_dereference(f->next)) {
269 if (f->handle == handle)
276 static int rsvp_init(struct tcf_proto *tp)
278 struct rsvp_head *data;
280 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
282 rcu_assign_pointer(tp->root, data);
288 static void __rsvp_delete_filter(struct rsvp_filter *f)
290 tcf_exts_destroy(&f->exts);
291 tcf_exts_put_net(&f->exts);
295 static void rsvp_delete_filter_work(struct work_struct *work)
297 struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
300 __rsvp_delete_filter(f);
304 static void rsvp_delete_filter_rcu(struct rcu_head *head)
306 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
308 INIT_WORK(&f->work, rsvp_delete_filter_work);
309 tcf_queue_work(&f->work);
312 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
314 tcf_unbind_filter(tp, &f->res);
315 /* all classifiers are required to call tcf_exts_destroy() after rcu
316 * grace period, since converted-to-rcu actions are relying on that
317 * in cleanup() callback
319 if (tcf_exts_get_net(&f->exts))
320 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
322 __rsvp_delete_filter(f);
325 static void rsvp_destroy(struct tcf_proto *tp)
327 struct rsvp_head *data = rtnl_dereference(tp->root);
333 for (h1 = 0; h1 < 256; h1++) {
334 struct rsvp_session *s;
336 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
337 RCU_INIT_POINTER(data->ht[h1], s->next);
339 for (h2 = 0; h2 <= 16; h2++) {
340 struct rsvp_filter *f;
342 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
343 rcu_assign_pointer(s->ht[h2], f->next);
344 rsvp_delete_filter(tp, f);
350 kfree_rcu(data, rcu);
353 static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
355 struct rsvp_head *head = rtnl_dereference(tp->root);
356 struct rsvp_filter *nfp, *f = arg;
357 struct rsvp_filter __rcu **fp;
358 unsigned int h = f->handle;
359 struct rsvp_session __rcu **sp;
360 struct rsvp_session *nsp, *s = f->sess;
363 fp = &s->ht[(h >> 8) & 0xFF];
364 for (nfp = rtnl_dereference(*fp); nfp;
365 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
367 RCU_INIT_POINTER(*fp, f->next);
368 rsvp_delete_filter(tp, f);
372 for (i = 0; i <= 16; i++)
376 /* OK, session has no flows */
377 sp = &head->ht[h & 0xFF];
378 for (nsp = rtnl_dereference(*sp); nsp;
379 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
381 RCU_INIT_POINTER(*sp, s->next);
393 for (h1 = 0; h1 < 256; h1++) {
394 if (rcu_access_pointer(head->ht[h1])) {
403 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
405 struct rsvp_head *data = rtnl_dereference(tp->root);
411 if ((data->hgenerator += 0x10000) == 0)
412 data->hgenerator = 0x10000;
413 h = data->hgenerator|salt;
414 if (!rsvp_get(tp, h))
420 static int tunnel_bts(struct rsvp_head *data)
422 int n = data->tgenerator >> 5;
423 u32 b = 1 << (data->tgenerator & 0x1F);
425 if (data->tmap[n] & b)
431 static void tunnel_recycle(struct rsvp_head *data)
433 struct rsvp_session __rcu **sht = data->ht;
437 memset(tmap, 0, sizeof(tmap));
439 for (h1 = 0; h1 < 256; h1++) {
440 struct rsvp_session *s;
441 for (s = rtnl_dereference(sht[h1]); s;
442 s = rtnl_dereference(s->next)) {
443 for (h2 = 0; h2 <= 16; h2++) {
444 struct rsvp_filter *f;
446 for (f = rtnl_dereference(s->ht[h2]); f;
447 f = rtnl_dereference(f->next)) {
448 if (f->tunnelhdr == 0)
450 data->tgenerator = f->res.classid;
457 memcpy(data->tmap, tmap, sizeof(tmap));
460 static u32 gen_tunnel(struct rsvp_head *data)
464 for (k = 0; k < 2; k++) {
465 for (i = 255; i > 0; i--) {
466 if (++data->tgenerator == 0)
467 data->tgenerator = 1;
468 if (tunnel_bts(data))
469 return data->tgenerator;
471 tunnel_recycle(data);
476 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
477 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
478 [TCA_RSVP_DST] = { .type = NLA_BINARY,
479 .len = RSVP_DST_LEN * sizeof(u32) },
480 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
481 .len = RSVP_DST_LEN * sizeof(u32) },
482 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
485 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
486 struct tcf_proto *tp, unsigned long base,
489 void **arg, bool ovr)
491 struct rsvp_head *data = rtnl_dereference(tp->root);
492 struct rsvp_filter *f, *nfp;
493 struct rsvp_filter __rcu **fp;
494 struct rsvp_session *nsp, *s;
495 struct rsvp_session __rcu **sp;
496 struct tc_rsvp_pinfo *pinfo = NULL;
497 struct nlattr *opt = tca[TCA_OPTIONS];
498 struct nlattr *tb[TCA_RSVP_MAX + 1];
505 return handle ? -EINVAL : 0;
507 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
511 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
514 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
520 /* Node exists: adjust only classid */
521 struct rsvp_filter *n;
523 if (f->handle != handle && handle)
526 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
532 err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
538 if (tb[TCA_RSVP_CLASSID]) {
539 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
540 tcf_bind_filter(tp, &n->res, base);
543 tcf_exts_change(&n->exts, &e);
544 rsvp_replace(tp, n, handle);
548 /* Now more serious part... */
552 if (tb[TCA_RSVP_DST] == NULL)
556 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
560 err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
564 if (tb[TCA_RSVP_SRC]) {
565 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
566 h2 = hash_src(f->src);
568 if (tb[TCA_RSVP_PINFO]) {
569 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
571 f->tunnelhdr = pinfo->tunnelhdr;
573 if (tb[TCA_RSVP_CLASSID])
574 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
576 dst = nla_data(tb[TCA_RSVP_DST]);
577 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
580 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
585 if (f->res.classid > 255)
589 if (f->res.classid == 0 &&
590 (f->res.classid = gen_tunnel(data)) == 0)
594 for (sp = &data->ht[h1];
595 (s = rtnl_dereference(*sp)) != NULL;
597 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
598 pinfo && pinfo->protocol == s->protocol &&
599 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
600 #if RSVP_DST_LEN == 4
601 dst[0] == s->dst[0] &&
602 dst[1] == s->dst[1] &&
603 dst[2] == s->dst[2] &&
605 pinfo->tunnelid == s->tunnelid) {
608 /* OK, we found appropriate session */
613 if (f->tunnelhdr == 0)
614 tcf_bind_filter(tp, &f->res, base);
616 tcf_exts_change(&f->exts, &e);
619 for (nfp = rtnl_dereference(*fp); nfp;
620 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
621 __u32 mask = nfp->spi.mask & f->spi.mask;
623 if (mask != f->spi.mask)
626 RCU_INIT_POINTER(f->next, nfp);
627 rcu_assign_pointer(*fp, f);
634 /* No session found. Create new one. */
637 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
640 memcpy(s->dst, dst, sizeof(s->dst));
644 s->protocol = pinfo->protocol;
645 s->tunnelid = pinfo->tunnelid;
648 for (nsp = rtnl_dereference(*sp); nsp;
649 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
650 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
653 RCU_INIT_POINTER(s->next, nsp);
654 rcu_assign_pointer(*sp, s);
659 tcf_exts_destroy(&f->exts);
662 tcf_exts_destroy(&e);
666 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
668 struct rsvp_head *head = rtnl_dereference(tp->root);
674 for (h = 0; h < 256; h++) {
675 struct rsvp_session *s;
677 for (s = rtnl_dereference(head->ht[h]); s;
678 s = rtnl_dereference(s->next)) {
679 for (h1 = 0; h1 <= 16; h1++) {
680 struct rsvp_filter *f;
682 for (f = rtnl_dereference(s->ht[h1]); f;
683 f = rtnl_dereference(f->next)) {
684 if (arg->count < arg->skip) {
688 if (arg->fn(tp, f, arg) < 0) {
699 static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
700 struct sk_buff *skb, struct tcmsg *t)
702 struct rsvp_filter *f = fh;
703 struct rsvp_session *s;
705 struct tc_rsvp_pinfo pinfo;
711 t->tcm_handle = f->handle;
713 nest = nla_nest_start(skb, TCA_OPTIONS);
715 goto nla_put_failure;
717 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
718 goto nla_put_failure;
721 pinfo.protocol = s->protocol;
722 pinfo.tunnelid = s->tunnelid;
723 pinfo.tunnelhdr = f->tunnelhdr;
725 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
726 goto nla_put_failure;
727 if (f->res.classid &&
728 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
729 goto nla_put_failure;
730 if (((f->handle >> 8) & 0xFF) != 16 &&
731 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
732 goto nla_put_failure;
734 if (tcf_exts_dump(skb, &f->exts) < 0)
735 goto nla_put_failure;
737 nla_nest_end(skb, nest);
739 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
740 goto nla_put_failure;
744 nla_nest_cancel(skb, nest);
748 static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
750 struct rsvp_filter *f = fh;
752 if (f && f->res.classid == classid)
756 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
758 .classify = rsvp_classify,
760 .destroy = rsvp_destroy,
762 .change = rsvp_change,
763 .delete = rsvp_delete,
766 .bind_class = rsvp_bind_class,
767 .owner = THIS_MODULE,
770 static int __init init_rsvp(void)
772 return register_tcf_proto_ops(&RSVP_OPS);
775 static void __exit exit_rsvp(void)
777 unregister_tcf_proto_ops(&RSVP_OPS);
780 module_init(init_rsvp)
781 module_exit(exit_rsvp)