2 * net/sched/cls_flower.c Flower classifier
4 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
17 #include <linux/if_ether.h>
18 #include <linux/in6.h>
21 #include <net/sch_generic.h>
22 #include <net/pkt_cls.h>
24 #include <net/flow_dissector.h>
28 struct flow_dissector_key_control control;
29 struct flow_dissector_key_basic basic;
30 struct flow_dissector_key_eth_addrs eth;
32 struct flow_dissector_key_addrs ipv4;
33 struct flow_dissector_key_ipv6_addrs ipv6;
35 struct flow_dissector_key_ports tp;
36 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
38 struct fl_flow_mask_range {
39 unsigned short int start;
40 unsigned short int end;
44 struct fl_flow_key key;
45 struct fl_flow_mask_range range;
51 struct fl_flow_mask mask;
52 struct flow_dissector dissector;
55 struct list_head filters;
56 struct rhashtable_params ht_params;
60 struct cls_fl_filter {
61 struct rhash_head ht_node;
62 struct fl_flow_key mkey;
64 struct tcf_result res;
65 struct fl_flow_key key;
66 struct list_head list;
71 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
73 return mask->range.end - mask->range.start;
76 static void fl_mask_update_range(struct fl_flow_mask *mask)
78 const u8 *bytes = (const u8 *) &mask->key;
79 size_t size = sizeof(mask->key);
80 size_t i, first = 0, last = size - 1;
82 for (i = 0; i < sizeof(mask->key); i++) {
89 mask->range.start = rounddown(first, sizeof(long));
90 mask->range.end = roundup(last + 1, sizeof(long));
93 static void *fl_key_get_start(struct fl_flow_key *key,
94 const struct fl_flow_mask *mask)
96 return (u8 *) key + mask->range.start;
99 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
100 struct fl_flow_mask *mask)
102 const long *lkey = fl_key_get_start(key, mask);
103 const long *lmask = fl_key_get_start(&mask->key, mask);
104 long *lmkey = fl_key_get_start(mkey, mask);
107 for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
108 *lmkey++ = *lkey++ & *lmask++;
111 static void fl_clear_masked_range(struct fl_flow_key *key,
112 struct fl_flow_mask *mask)
114 memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
117 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
118 struct tcf_result *res)
120 struct cls_fl_head *head = rcu_dereference_bh(tp->root);
121 struct cls_fl_filter *f;
122 struct fl_flow_key skb_key;
123 struct fl_flow_key skb_mkey;
125 fl_clear_masked_range(&skb_key, &head->mask);
126 skb_key.indev_ifindex = skb->skb_iif;
127 /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
128 * so do it rather here.
130 skb_key.basic.n_proto = skb->protocol;
131 skb_flow_dissect(skb, &head->dissector, &skb_key);
133 fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
135 f = rhashtable_lookup_fast(&head->ht,
136 fl_key_get_start(&skb_mkey, &head->mask),
140 return tcf_exts_exec(skb, &f->exts, res);
145 static int fl_init(struct tcf_proto *tp)
147 struct cls_fl_head *head;
149 head = kzalloc(sizeof(*head), GFP_KERNEL);
153 INIT_LIST_HEAD_RCU(&head->filters);
154 rcu_assign_pointer(tp->root, head);
159 static void fl_destroy_filter(struct rcu_head *head)
161 struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
163 tcf_exts_destroy(&f->exts);
167 static bool fl_destroy(struct tcf_proto *tp, bool force)
169 struct cls_fl_head *head = rtnl_dereference(tp->root);
170 struct cls_fl_filter *f, *next;
172 if (!force && !list_empty(&head->filters))
175 list_for_each_entry_safe(f, next, &head->filters, list) {
176 list_del_rcu(&f->list);
177 call_rcu(&f->rcu, fl_destroy_filter);
179 RCU_INIT_POINTER(tp->root, NULL);
180 if (head->mask_assigned)
181 rhashtable_destroy(&head->ht);
182 kfree_rcu(head, rcu);
186 static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
188 struct cls_fl_head *head = rtnl_dereference(tp->root);
189 struct cls_fl_filter *f;
191 list_for_each_entry(f, &head->filters, list)
192 if (f->handle == handle)
193 return (unsigned long) f;
197 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
198 [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
199 [TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
200 [TCA_FLOWER_INDEV] = { .type = NLA_STRING,
202 [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
203 [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
204 [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
205 [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
206 [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
207 [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
208 [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
209 [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
210 [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
211 [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
212 [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
213 [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
214 [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
215 [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
216 [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
217 [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
218 [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
219 [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
222 static void fl_set_key_val(struct nlattr **tb,
223 void *val, int val_type,
224 void *mask, int mask_type, int len)
228 memcpy(val, nla_data(tb[val_type]), len);
229 if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
230 memset(mask, 0xff, len);
232 memcpy(mask, nla_data(tb[mask_type]), len);
235 static int fl_set_key(struct net *net, struct nlattr **tb,
236 struct fl_flow_key *key, struct fl_flow_key *mask)
238 #ifdef CONFIG_NET_CLS_IND
239 if (tb[TCA_FLOWER_INDEV]) {
240 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
243 key->indev_ifindex = err;
244 mask->indev_ifindex = 0xffffffff;
248 fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
249 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
250 sizeof(key->eth.dst));
251 fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
252 mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
253 sizeof(key->eth.src));
254 fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
255 &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
256 sizeof(key->basic.n_proto));
257 if (key->basic.n_proto == htons(ETH_P_IP) ||
258 key->basic.n_proto == htons(ETH_P_IPV6)) {
259 fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
260 &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
261 sizeof(key->basic.ip_proto));
263 if (key->basic.n_proto == htons(ETH_P_IP)) {
264 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
265 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
266 sizeof(key->ipv4.src));
267 fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
268 &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
269 sizeof(key->ipv4.dst));
270 } else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
271 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
272 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
273 sizeof(key->ipv6.src));
274 fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
275 &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
276 sizeof(key->ipv6.dst));
278 if (key->basic.ip_proto == IPPROTO_TCP) {
279 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
280 &mask->tp.src, TCA_FLOWER_UNSPEC,
281 sizeof(key->tp.src));
282 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
283 &mask->tp.dst, TCA_FLOWER_UNSPEC,
284 sizeof(key->tp.dst));
285 } else if (key->basic.ip_proto == IPPROTO_UDP) {
286 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
287 &mask->tp.src, TCA_FLOWER_UNSPEC,
288 sizeof(key->tp.src));
289 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
290 &mask->tp.dst, TCA_FLOWER_UNSPEC,
291 sizeof(key->tp.dst));
297 static bool fl_mask_eq(struct fl_flow_mask *mask1,
298 struct fl_flow_mask *mask2)
300 const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
301 const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
303 return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
304 !memcmp(lmask1, lmask2, fl_mask_range(mask1));
307 static const struct rhashtable_params fl_ht_params = {
308 .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
309 .head_offset = offsetof(struct cls_fl_filter, ht_node),
310 .automatic_shrinking = true,
313 static int fl_init_hashtable(struct cls_fl_head *head,
314 struct fl_flow_mask *mask)
316 head->ht_params = fl_ht_params;
317 head->ht_params.key_len = fl_mask_range(mask);
318 head->ht_params.key_offset += mask->range.start;
320 return rhashtable_init(&head->ht, &head->ht_params);
323 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
324 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
325 #define FL_KEY_MEMBER_END_OFFSET(member) \
326 (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
328 #define FL_KEY_IN_RANGE(mask, member) \
329 (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
330 FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
332 #define FL_KEY_SET(keys, cnt, id, member) \
334 keys[cnt].key_id = id; \
335 keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
339 #define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
341 if (FL_KEY_IN_RANGE(mask, member)) \
342 FL_KEY_SET(keys, cnt, id, member); \
345 static void fl_init_dissector(struct cls_fl_head *head,
346 struct fl_flow_mask *mask)
348 struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
351 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
352 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
353 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
354 FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
355 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
356 FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
357 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
358 FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
359 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
360 FLOW_DISSECTOR_KEY_PORTS, tp);
362 skb_flow_dissector_init(&head->dissector, keys, cnt);
365 static int fl_check_assign_mask(struct cls_fl_head *head,
366 struct fl_flow_mask *mask)
370 if (head->mask_assigned) {
371 if (!fl_mask_eq(&head->mask, mask))
377 /* Mask is not assigned yet. So assign it and init hashtable
380 err = fl_init_hashtable(head, mask);
383 memcpy(&head->mask, mask, sizeof(head->mask));
384 head->mask_assigned = true;
386 fl_init_dissector(head, mask);
391 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
392 struct cls_fl_filter *f, struct fl_flow_mask *mask,
393 unsigned long base, struct nlattr **tb,
394 struct nlattr *est, bool ovr)
399 tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
400 err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
404 if (tb[TCA_FLOWER_CLASSID]) {
405 f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
406 tcf_bind_filter(tp, &f->res, base);
409 err = fl_set_key(net, tb, &f->key, &mask->key);
413 fl_mask_update_range(mask);
414 fl_set_masked_key(&f->mkey, &f->key, mask);
416 tcf_exts_change(tp, &f->exts, &e);
420 tcf_exts_destroy(&e);
424 static u32 fl_grab_new_handle(struct tcf_proto *tp,
425 struct cls_fl_head *head)
427 unsigned int i = 0x80000000;
431 if (++head->hgen == 0x7FFFFFFF)
433 } while (--i > 0 && fl_get(tp, head->hgen));
435 if (unlikely(i == 0)) {
436 pr_err("Insufficient number of handles\n");
445 static int fl_change(struct net *net, struct sk_buff *in_skb,
446 struct tcf_proto *tp, unsigned long base,
447 u32 handle, struct nlattr **tca,
448 unsigned long *arg, bool ovr)
450 struct cls_fl_head *head = rtnl_dereference(tp->root);
451 struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
452 struct cls_fl_filter *fnew;
453 struct nlattr *tb[TCA_FLOWER_MAX + 1];
454 struct fl_flow_mask mask = {};
457 if (!tca[TCA_OPTIONS])
460 err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
464 if (fold && handle && fold->handle != handle)
467 fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
471 tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
474 handle = fl_grab_new_handle(tp, head);
480 fnew->handle = handle;
482 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
486 err = fl_check_assign_mask(head, &mask);
490 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
495 rhashtable_remove_fast(&head->ht, &fold->ht_node,
498 *arg = (unsigned long) fnew;
501 list_replace_rcu(&fnew->list, &fold->list);
502 tcf_unbind_filter(tp, &fold->res);
503 call_rcu(&fold->rcu, fl_destroy_filter);
505 list_add_tail_rcu(&fnew->list, &head->filters);
515 static int fl_delete(struct tcf_proto *tp, unsigned long arg)
517 struct cls_fl_head *head = rtnl_dereference(tp->root);
518 struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
520 rhashtable_remove_fast(&head->ht, &f->ht_node,
522 list_del_rcu(&f->list);
523 tcf_unbind_filter(tp, &f->res);
524 call_rcu(&f->rcu, fl_destroy_filter);
528 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
530 struct cls_fl_head *head = rtnl_dereference(tp->root);
531 struct cls_fl_filter *f;
533 list_for_each_entry_rcu(f, &head->filters, list) {
534 if (arg->count < arg->skip)
536 if (arg->fn(tp, (unsigned long) f, arg) < 0) {
545 static int fl_dump_key_val(struct sk_buff *skb,
546 void *val, int val_type,
547 void *mask, int mask_type, int len)
551 if (!memchr_inv(mask, 0, len))
553 err = nla_put(skb, val_type, len, val);
556 if (mask_type != TCA_FLOWER_UNSPEC) {
557 err = nla_put(skb, mask_type, len, mask);
564 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
565 struct sk_buff *skb, struct tcmsg *t)
567 struct cls_fl_head *head = rtnl_dereference(tp->root);
568 struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
570 struct fl_flow_key *key, *mask;
575 t->tcm_handle = f->handle;
577 nest = nla_nest_start(skb, TCA_OPTIONS);
579 goto nla_put_failure;
581 if (f->res.classid &&
582 nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
583 goto nla_put_failure;
586 mask = &head->mask.key;
588 if (mask->indev_ifindex) {
589 struct net_device *dev;
591 dev = __dev_get_by_index(net, key->indev_ifindex);
592 if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
593 goto nla_put_failure;
596 if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
597 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
598 sizeof(key->eth.dst)) ||
599 fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
600 mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
601 sizeof(key->eth.src)) ||
602 fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
603 &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
604 sizeof(key->basic.n_proto)))
605 goto nla_put_failure;
606 if ((key->basic.n_proto == htons(ETH_P_IP) ||
607 key->basic.n_proto == htons(ETH_P_IPV6)) &&
608 fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
609 &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
610 sizeof(key->basic.ip_proto)))
611 goto nla_put_failure;
613 if (key->basic.n_proto == htons(ETH_P_IP) &&
614 (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
615 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
616 sizeof(key->ipv4.src)) ||
617 fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
618 &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
619 sizeof(key->ipv4.dst))))
620 goto nla_put_failure;
621 else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
622 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
623 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
624 sizeof(key->ipv6.src)) ||
625 fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
626 &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
627 sizeof(key->ipv6.dst))))
628 goto nla_put_failure;
630 if (key->basic.ip_proto == IPPROTO_TCP &&
631 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
632 &mask->tp.src, TCA_FLOWER_UNSPEC,
633 sizeof(key->tp.src)) ||
634 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
635 &mask->tp.dst, TCA_FLOWER_UNSPEC,
636 sizeof(key->tp.dst))))
637 goto nla_put_failure;
638 else if (key->basic.ip_proto == IPPROTO_UDP &&
639 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
640 &mask->tp.src, TCA_FLOWER_UNSPEC,
641 sizeof(key->tp.src)) ||
642 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
643 &mask->tp.dst, TCA_FLOWER_UNSPEC,
644 sizeof(key->tp.dst))))
645 goto nla_put_failure;
647 if (tcf_exts_dump(skb, &f->exts))
648 goto nla_put_failure;
650 nla_nest_end(skb, nest);
652 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
653 goto nla_put_failure;
658 nla_nest_cancel(skb, nest);
662 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
664 .classify = fl_classify,
666 .destroy = fl_destroy,
672 .owner = THIS_MODULE,
675 static int __init cls_fl_init(void)
677 return register_tcf_proto_ops(&cls_fl_ops);
680 static void __exit cls_fl_exit(void)
682 unregister_tcf_proto_ops(&cls_fl_ops);
685 module_init(cls_fl_init);
686 module_exit(cls_fl_exit);
688 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
689 MODULE_DESCRIPTION("Flower classifier");
690 MODULE_LICENSE("GPL v2");