2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu *ht[256];
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
95 struct tcf_result res;
99 struct rsvp_session *sess;
100 struct rcu_work rwork;
103 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
105 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
109 return (h ^ protocol ^ tunnelid) & 0xFF;
112 static inline unsigned int hash_src(__be32 *src)
114 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
122 #define RSVP_APPLY_RESULT() \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
131 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
132 struct tcf_result *res)
134 struct rsvp_head *head = rcu_dereference_bh(tp->root);
135 struct rsvp_session *s;
136 struct rsvp_filter *f;
142 #if RSVP_DST_LEN == 4
143 struct ipv6hdr *nhptr;
145 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
147 nhptr = ipv6_hdr(skb);
151 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
157 #if RSVP_DST_LEN == 4
158 src = &nhptr->saddr.s6_addr32[0];
159 dst = &nhptr->daddr.s6_addr32[0];
160 protocol = nhptr->nexthdr;
161 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
165 protocol = nhptr->protocol;
166 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
167 if (ip_is_fragment(nhptr))
171 h1 = hash_dst(dst, protocol, tunnelid);
174 for (s = rcu_dereference_bh(head->ht[h1]); s;
175 s = rcu_dereference_bh(s->next)) {
176 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
177 protocol == s->protocol &&
179 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
180 #if RSVP_DST_LEN == 4
181 dst[0] == s->dst[0] &&
182 dst[1] == s->dst[1] &&
183 dst[2] == s->dst[2] &&
185 tunnelid == s->tunnelid) {
187 for (f = rcu_dereference_bh(s->ht[h2]); f;
188 f = rcu_dereference_bh(f->next)) {
189 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
190 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
191 #if RSVP_DST_LEN == 4
193 src[0] == f->src[0] &&
194 src[1] == f->src[1] &&
202 if (f->tunnelhdr == 0)
205 tunnelid = f->res.classid;
206 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
211 /* And wildcard bucket... */
212 for (f = rcu_dereference_bh(s->ht[16]); f;
213 f = rcu_dereference_bh(f->next)) {
224 static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
226 struct rsvp_head *head = rtnl_dereference(tp->root);
227 struct rsvp_session *s;
228 struct rsvp_filter __rcu **ins;
229 struct rsvp_filter *pins;
230 unsigned int h1 = h & 0xFF;
231 unsigned int h2 = (h >> 8) & 0xFF;
233 for (s = rtnl_dereference(head->ht[h1]); s;
234 s = rtnl_dereference(s->next)) {
235 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
236 ins = &pins->next, pins = rtnl_dereference(*ins)) {
237 if (pins->handle == h) {
238 RCU_INIT_POINTER(n->next, pins->next);
239 rcu_assign_pointer(*ins, n);
245 /* Something went wrong if we are trying to replace a non-existant
246 * node. Mind as well halt instead of silently failing.
251 static void *rsvp_get(struct tcf_proto *tp, u32 handle)
253 struct rsvp_head *head = rtnl_dereference(tp->root);
254 struct rsvp_session *s;
255 struct rsvp_filter *f;
256 unsigned int h1 = handle & 0xFF;
257 unsigned int h2 = (handle >> 8) & 0xFF;
262 for (s = rtnl_dereference(head->ht[h1]); s;
263 s = rtnl_dereference(s->next)) {
264 for (f = rtnl_dereference(s->ht[h2]); f;
265 f = rtnl_dereference(f->next)) {
266 if (f->handle == handle)
273 static int rsvp_init(struct tcf_proto *tp)
275 struct rsvp_head *data;
277 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
279 rcu_assign_pointer(tp->root, data);
285 static void __rsvp_delete_filter(struct rsvp_filter *f)
287 tcf_exts_destroy(&f->exts);
288 tcf_exts_put_net(&f->exts);
292 static void rsvp_delete_filter_work(struct work_struct *work)
294 struct rsvp_filter *f = container_of(to_rcu_work(work),
298 __rsvp_delete_filter(f);
302 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
304 tcf_unbind_filter(tp, &f->res);
305 /* all classifiers are required to call tcf_exts_destroy() after rcu
306 * grace period, since converted-to-rcu actions are relying on that
307 * in cleanup() callback
309 if (tcf_exts_get_net(&f->exts))
310 tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
312 __rsvp_delete_filter(f);
315 static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
316 struct netlink_ext_ack *extack)
318 struct rsvp_head *data = rtnl_dereference(tp->root);
324 for (h1 = 0; h1 < 256; h1++) {
325 struct rsvp_session *s;
327 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
328 RCU_INIT_POINTER(data->ht[h1], s->next);
330 for (h2 = 0; h2 <= 16; h2++) {
331 struct rsvp_filter *f;
333 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
334 rcu_assign_pointer(s->ht[h2], f->next);
335 rsvp_delete_filter(tp, f);
341 kfree_rcu(data, rcu);
344 static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
345 bool rtnl_held, struct netlink_ext_ack *extack)
347 struct rsvp_head *head = rtnl_dereference(tp->root);
348 struct rsvp_filter *nfp, *f = arg;
349 struct rsvp_filter __rcu **fp;
350 unsigned int h = f->handle;
351 struct rsvp_session __rcu **sp;
352 struct rsvp_session *nsp, *s = f->sess;
355 fp = &s->ht[(h >> 8) & 0xFF];
356 for (nfp = rtnl_dereference(*fp); nfp;
357 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
359 RCU_INIT_POINTER(*fp, f->next);
360 rsvp_delete_filter(tp, f);
364 for (i = 0; i <= 16; i++)
368 /* OK, session has no flows */
369 sp = &head->ht[h & 0xFF];
370 for (nsp = rtnl_dereference(*sp); nsp;
371 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
373 RCU_INIT_POINTER(*sp, s->next);
385 for (h1 = 0; h1 < 256; h1++) {
386 if (rcu_access_pointer(head->ht[h1])) {
395 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
397 struct rsvp_head *data = rtnl_dereference(tp->root);
403 if ((data->hgenerator += 0x10000) == 0)
404 data->hgenerator = 0x10000;
405 h = data->hgenerator|salt;
406 if (!rsvp_get(tp, h))
412 static int tunnel_bts(struct rsvp_head *data)
414 int n = data->tgenerator >> 5;
415 u32 b = 1 << (data->tgenerator & 0x1F);
417 if (data->tmap[n] & b)
423 static void tunnel_recycle(struct rsvp_head *data)
425 struct rsvp_session __rcu **sht = data->ht;
429 memset(tmap, 0, sizeof(tmap));
431 for (h1 = 0; h1 < 256; h1++) {
432 struct rsvp_session *s;
433 for (s = rtnl_dereference(sht[h1]); s;
434 s = rtnl_dereference(s->next)) {
435 for (h2 = 0; h2 <= 16; h2++) {
436 struct rsvp_filter *f;
438 for (f = rtnl_dereference(s->ht[h2]); f;
439 f = rtnl_dereference(f->next)) {
440 if (f->tunnelhdr == 0)
442 data->tgenerator = f->res.classid;
449 memcpy(data->tmap, tmap, sizeof(tmap));
452 static u32 gen_tunnel(struct rsvp_head *data)
456 for (k = 0; k < 2; k++) {
457 for (i = 255; i > 0; i--) {
458 if (++data->tgenerator == 0)
459 data->tgenerator = 1;
460 if (tunnel_bts(data))
461 return data->tgenerator;
463 tunnel_recycle(data);
468 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
469 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
470 [TCA_RSVP_DST] = { .type = NLA_BINARY,
471 .len = RSVP_DST_LEN * sizeof(u32) },
472 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
473 .len = RSVP_DST_LEN * sizeof(u32) },
474 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
477 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
478 struct tcf_proto *tp, unsigned long base,
481 void **arg, bool ovr, bool rtnl_held,
482 struct netlink_ext_ack *extack)
484 struct rsvp_head *data = rtnl_dereference(tp->root);
485 struct rsvp_filter *f, *nfp;
486 struct rsvp_filter __rcu **fp;
487 struct rsvp_session *nsp, *s;
488 struct rsvp_session __rcu **sp;
489 struct tc_rsvp_pinfo *pinfo = NULL;
490 struct nlattr *opt = tca[TCA_OPTIONS];
491 struct nlattr *tb[TCA_RSVP_MAX + 1];
498 return handle ? -EINVAL : 0;
500 err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy,
505 err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
508 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
515 /* Node exists: adjust only classid */
516 struct rsvp_filter *n;
518 if (f->handle != handle && handle)
521 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
527 err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
534 if (tb[TCA_RSVP_CLASSID]) {
535 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
536 tcf_bind_filter(tp, &n->res, base);
539 tcf_exts_change(&n->exts, &e);
540 rsvp_replace(tp, n, handle);
544 /* Now more serious part... */
548 if (tb[TCA_RSVP_DST] == NULL)
552 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
556 err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
560 if (tb[TCA_RSVP_SRC]) {
561 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
562 h2 = hash_src(f->src);
564 if (tb[TCA_RSVP_PINFO]) {
565 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
567 f->tunnelhdr = pinfo->tunnelhdr;
569 if (tb[TCA_RSVP_CLASSID])
570 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
572 dst = nla_data(tb[TCA_RSVP_DST]);
573 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
576 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
581 if (f->res.classid > 255)
585 if (f->res.classid == 0 &&
586 (f->res.classid = gen_tunnel(data)) == 0)
590 for (sp = &data->ht[h1];
591 (s = rtnl_dereference(*sp)) != NULL;
593 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
594 pinfo && pinfo->protocol == s->protocol &&
595 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
596 #if RSVP_DST_LEN == 4
597 dst[0] == s->dst[0] &&
598 dst[1] == s->dst[1] &&
599 dst[2] == s->dst[2] &&
601 pinfo->tunnelid == s->tunnelid) {
604 /* OK, we found appropriate session */
609 if (f->tunnelhdr == 0)
610 tcf_bind_filter(tp, &f->res, base);
612 tcf_exts_change(&f->exts, &e);
615 for (nfp = rtnl_dereference(*fp); nfp;
616 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
617 __u32 mask = nfp->spi.mask & f->spi.mask;
619 if (mask != f->spi.mask)
622 RCU_INIT_POINTER(f->next, nfp);
623 rcu_assign_pointer(*fp, f);
630 /* No session found. Create new one. */
633 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
636 memcpy(s->dst, dst, sizeof(s->dst));
640 s->protocol = pinfo->protocol;
641 s->tunnelid = pinfo->tunnelid;
644 for (nsp = rtnl_dereference(*sp); nsp;
645 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
646 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
649 RCU_INIT_POINTER(s->next, nsp);
650 rcu_assign_pointer(*sp, s);
655 tcf_exts_destroy(&f->exts);
658 tcf_exts_destroy(&e);
662 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
665 struct rsvp_head *head = rtnl_dereference(tp->root);
671 for (h = 0; h < 256; h++) {
672 struct rsvp_session *s;
674 for (s = rtnl_dereference(head->ht[h]); s;
675 s = rtnl_dereference(s->next)) {
676 for (h1 = 0; h1 <= 16; h1++) {
677 struct rsvp_filter *f;
679 for (f = rtnl_dereference(s->ht[h1]); f;
680 f = rtnl_dereference(f->next)) {
681 if (arg->count < arg->skip) {
685 if (arg->fn(tp, f, arg) < 0) {
696 static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
697 struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
699 struct rsvp_filter *f = fh;
700 struct rsvp_session *s;
702 struct tc_rsvp_pinfo pinfo;
708 t->tcm_handle = f->handle;
710 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
712 goto nla_put_failure;
714 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
715 goto nla_put_failure;
718 pinfo.protocol = s->protocol;
719 pinfo.tunnelid = s->tunnelid;
720 pinfo.tunnelhdr = f->tunnelhdr;
722 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
723 goto nla_put_failure;
724 if (f->res.classid &&
725 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
726 goto nla_put_failure;
727 if (((f->handle >> 8) & 0xFF) != 16 &&
728 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
729 goto nla_put_failure;
731 if (tcf_exts_dump(skb, &f->exts) < 0)
732 goto nla_put_failure;
734 nla_nest_end(skb, nest);
736 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
737 goto nla_put_failure;
741 nla_nest_cancel(skb, nest);
745 static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
747 struct rsvp_filter *f = fh;
749 if (f && f->res.classid == classid)
753 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
755 .classify = rsvp_classify,
757 .destroy = rsvp_destroy,
759 .change = rsvp_change,
760 .delete = rsvp_delete,
763 .bind_class = rsvp_bind_class,
764 .owner = THIS_MODULE,
767 static int __init init_rsvp(void)
769 return register_tcf_proto_ops(&RSVP_OPS);
772 static void __exit exit_rsvp(void)
774 unregister_tcf_proto_ops(&RSVP_OPS);
777 module_init(init_rsvp)
778 module_exit(exit_rsvp)