2 * net/sched/cls_api.c Packet classifier API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/slab.h>
27 #include <linux/idr.h>
28 #include <net/net_namespace.h>
30 #include <net/netlink.h>
31 #include <net/pkt_sched.h>
32 #include <net/pkt_cls.h>
34 /* The list of all installed classifier types */
35 static LIST_HEAD(tcf_proto_base);
37 /* Protects list of registered TC modules. It is pure SMP lock. */
38 static DEFINE_RWLOCK(cls_mod_lock);
40 /* Find classifier type by string name */
42 static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
44 const struct tcf_proto_ops *t, *res = NULL;
47 read_lock(&cls_mod_lock);
48 list_for_each_entry(t, &tcf_proto_base, head) {
49 if (strcmp(kind, t->kind) == 0) {
50 if (try_module_get(t->owner))
55 read_unlock(&cls_mod_lock);
60 /* Register(unregister) new classifier type */
62 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
64 struct tcf_proto_ops *t;
67 write_lock(&cls_mod_lock);
68 list_for_each_entry(t, &tcf_proto_base, head)
69 if (!strcmp(ops->kind, t->kind))
72 list_add_tail(&ops->head, &tcf_proto_base);
75 write_unlock(&cls_mod_lock);
78 EXPORT_SYMBOL(register_tcf_proto_ops);
80 static struct workqueue_struct *tc_filter_wq;
82 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
84 struct tcf_proto_ops *t;
87 /* Wait for outstanding call_rcu()s, if any, from a
88 * tcf_proto_ops's destroy() handler.
91 flush_workqueue(tc_filter_wq);
93 write_lock(&cls_mod_lock);
94 list_for_each_entry(t, &tcf_proto_base, head) {
101 write_unlock(&cls_mod_lock);
104 EXPORT_SYMBOL(unregister_tcf_proto_ops);
106 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
108 INIT_RCU_WORK(rwork, func);
109 return queue_rcu_work(tc_filter_wq, rwork);
111 EXPORT_SYMBOL(tcf_queue_work);
113 /* Select new prio value from the range, managed by kernel. */
115 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
117 u32 first = TC_H_MAKE(0xC0000000U, 0U);
120 first = tp->prio - 1;
122 return TC_H_MAJ(first);
125 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
126 u32 prio, struct tcf_chain *chain,
127 struct netlink_ext_ack *extack)
129 struct tcf_proto *tp;
132 tp = kzalloc(sizeof(*tp), GFP_KERNEL);
134 return ERR_PTR(-ENOBUFS);
137 tp->ops = tcf_proto_lookup_ops(kind);
139 #ifdef CONFIG_MODULES
141 request_module("cls_%s", kind);
143 tp->ops = tcf_proto_lookup_ops(kind);
144 /* We dropped the RTNL semaphore in order to perform
145 * the module load. So, even if we succeeded in loading
146 * the module we have to replay the request. We indicate
147 * this using -EAGAIN.
150 module_put(tp->ops->owner);
153 NL_SET_ERR_MSG(extack, "TC classifier not found");
159 tp->classify = tp->ops->classify;
160 tp->protocol = protocol;
164 err = tp->ops->init(tp);
166 module_put(tp->ops->owner);
176 static void tcf_proto_destroy(struct tcf_proto *tp,
177 struct netlink_ext_ack *extack)
179 tp->ops->destroy(tp, extack);
180 module_put(tp->ops->owner);
184 struct tcf_filter_chain_list_item {
185 struct list_head list;
186 tcf_chain_head_change_t *chain_head_change;
187 void *chain_head_change_priv;
190 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
193 struct tcf_chain *chain;
195 chain = kzalloc(sizeof(*chain), GFP_KERNEL);
198 INIT_LIST_HEAD(&chain->filter_chain_list);
199 list_add_tail(&chain->list, &block->chain_list);
200 chain->block = block;
201 chain->index = chain_index;
206 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
207 struct tcf_proto *tp_head)
209 if (item->chain_head_change)
210 item->chain_head_change(tp_head, item->chain_head_change_priv);
212 static void tcf_chain_head_change(struct tcf_chain *chain,
213 struct tcf_proto *tp_head)
215 struct tcf_filter_chain_list_item *item;
217 list_for_each_entry(item, &chain->filter_chain_list, list)
218 tcf_chain_head_change_item(item, tp_head);
221 static void tcf_chain_flush(struct tcf_chain *chain)
223 struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
225 tcf_chain_head_change(chain, NULL);
227 RCU_INIT_POINTER(chain->filter_chain, tp->next);
228 tcf_proto_destroy(tp, NULL);
229 tp = rtnl_dereference(chain->filter_chain);
230 tcf_chain_put(chain);
234 static void tcf_chain_destroy(struct tcf_chain *chain)
236 struct tcf_block *block = chain->block;
238 list_del(&chain->list);
240 if (list_empty(&block->chain_list))
244 static void tcf_chain_hold(struct tcf_chain *chain)
249 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
252 struct tcf_chain *chain;
254 list_for_each_entry(chain, &block->chain_list, list) {
255 if (chain->index == chain_index) {
256 tcf_chain_hold(chain);
261 return create ? tcf_chain_create(block, chain_index) : NULL;
263 EXPORT_SYMBOL(tcf_chain_get);
265 void tcf_chain_put(struct tcf_chain *chain)
267 if (--chain->refcnt == 0)
268 tcf_chain_destroy(chain);
270 EXPORT_SYMBOL(tcf_chain_put);
272 static bool tcf_block_offload_in_use(struct tcf_block *block)
274 return block->offloadcnt;
277 static int tcf_block_offload_cmd(struct tcf_block *block,
278 struct net_device *dev,
279 struct tcf_block_ext_info *ei,
280 enum tc_block_command command)
282 struct tc_block_offload bo = {};
284 bo.command = command;
285 bo.binder_type = ei->binder_type;
287 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
290 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
291 struct tcf_block_ext_info *ei)
293 struct net_device *dev = q->dev_queue->dev;
296 if (!dev->netdev_ops->ndo_setup_tc)
297 goto no_offload_dev_inc;
299 /* If tc offload feature is disabled and the block we try to bind
300 * to already has some offloaded filters, forbid to bind.
302 if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
305 err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
306 if (err == -EOPNOTSUPP)
307 goto no_offload_dev_inc;
311 if (tcf_block_offload_in_use(block))
313 block->nooffloaddevcnt++;
317 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
318 struct tcf_block_ext_info *ei)
320 struct net_device *dev = q->dev_queue->dev;
323 if (!dev->netdev_ops->ndo_setup_tc)
324 goto no_offload_dev_dec;
325 err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
326 if (err == -EOPNOTSUPP)
327 goto no_offload_dev_dec;
331 WARN_ON(block->nooffloaddevcnt-- == 0);
335 tcf_chain_head_change_cb_add(struct tcf_chain *chain,
336 struct tcf_block_ext_info *ei,
337 struct netlink_ext_ack *extack)
339 struct tcf_filter_chain_list_item *item;
341 item = kmalloc(sizeof(*item), GFP_KERNEL);
343 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
346 item->chain_head_change = ei->chain_head_change;
347 item->chain_head_change_priv = ei->chain_head_change_priv;
348 if (chain->filter_chain)
349 tcf_chain_head_change_item(item, chain->filter_chain);
350 list_add(&item->list, &chain->filter_chain_list);
355 tcf_chain_head_change_cb_del(struct tcf_chain *chain,
356 struct tcf_block_ext_info *ei)
358 struct tcf_filter_chain_list_item *item;
360 list_for_each_entry(item, &chain->filter_chain_list, list) {
361 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
362 (item->chain_head_change == ei->chain_head_change &&
363 item->chain_head_change_priv == ei->chain_head_change_priv)) {
364 tcf_chain_head_change_item(item, NULL);
365 list_del(&item->list);
377 static unsigned int tcf_net_id;
379 static int tcf_block_insert(struct tcf_block *block, struct net *net,
380 struct netlink_ext_ack *extack)
382 struct tcf_net *tn = net_generic(net, tcf_net_id);
384 return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
388 static void tcf_block_remove(struct tcf_block *block, struct net *net)
390 struct tcf_net *tn = net_generic(net, tcf_net_id);
392 idr_remove(&tn->idr, block->index);
395 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
397 struct netlink_ext_ack *extack)
399 struct tcf_block *block;
400 struct tcf_chain *chain;
403 block = kzalloc(sizeof(*block), GFP_KERNEL);
405 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
406 return ERR_PTR(-ENOMEM);
408 INIT_LIST_HEAD(&block->chain_list);
409 INIT_LIST_HEAD(&block->cb_list);
410 INIT_LIST_HEAD(&block->owner_list);
412 /* Create chain 0 by default, it has to be always present. */
413 chain = tcf_chain_create(block, 0);
415 NL_SET_ERR_MSG(extack, "Failed to create new tcf chain");
417 goto err_chain_create;
421 block->index = block_index;
423 /* Don't store q pointer for blocks which are shared */
424 if (!tcf_block_shared(block))
433 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
435 struct tcf_net *tn = net_generic(net, tcf_net_id);
437 return idr_find(&tn->idr, block_index);
441 * Set q, parent, cl when appropriate.
444 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
445 u32 *parent, unsigned long *cl,
446 int ifindex, u32 block_index,
447 struct netlink_ext_ack *extack)
449 struct tcf_block *block;
451 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
452 block = tcf_block_lookup(net, block_index);
454 NL_SET_ERR_MSG(extack, "Block of given index was not found");
455 return ERR_PTR(-EINVAL);
458 const struct Qdisc_class_ops *cops;
459 struct net_device *dev;
462 dev = __dev_get_by_index(net, ifindex);
464 return ERR_PTR(-ENODEV);
469 *parent = (*q)->handle;
471 *q = qdisc_lookup(dev, TC_H_MAJ(*parent));
473 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
474 return ERR_PTR(-EINVAL);
478 /* Is it classful? */
479 cops = (*q)->ops->cl_ops;
481 NL_SET_ERR_MSG(extack, "Qdisc not classful");
482 return ERR_PTR(-EINVAL);
485 if (!cops->tcf_block) {
486 NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
487 return ERR_PTR(-EOPNOTSUPP);
490 /* Do we search for filter, attached to class? */
491 if (TC_H_MIN(*parent)) {
492 *cl = cops->find(*q, *parent);
494 NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
495 return ERR_PTR(-ENOENT);
499 /* And the last stroke */
500 block = cops->tcf_block(*q, *cl, extack);
502 return ERR_PTR(-EINVAL);
503 if (tcf_block_shared(block)) {
504 NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
505 return ERR_PTR(-EOPNOTSUPP);
512 static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
514 return list_first_entry(&block->chain_list, struct tcf_chain, list);
517 struct tcf_block_owner_item {
518 struct list_head list;
520 enum tcf_block_binder_type binder_type;
524 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
526 enum tcf_block_binder_type binder_type)
528 if (block->keep_dst &&
529 binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
530 binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
531 netif_keep_dst(qdisc_dev(q));
534 void tcf_block_netif_keep_dst(struct tcf_block *block)
536 struct tcf_block_owner_item *item;
538 block->keep_dst = true;
539 list_for_each_entry(item, &block->owner_list, list)
540 tcf_block_owner_netif_keep_dst(block, item->q,
543 EXPORT_SYMBOL(tcf_block_netif_keep_dst);
545 static int tcf_block_owner_add(struct tcf_block *block,
547 enum tcf_block_binder_type binder_type)
549 struct tcf_block_owner_item *item;
551 item = kmalloc(sizeof(*item), GFP_KERNEL);
555 item->binder_type = binder_type;
556 list_add(&item->list, &block->owner_list);
560 static void tcf_block_owner_del(struct tcf_block *block,
562 enum tcf_block_binder_type binder_type)
564 struct tcf_block_owner_item *item;
566 list_for_each_entry(item, &block->owner_list, list) {
567 if (item->q == q && item->binder_type == binder_type) {
568 list_del(&item->list);
576 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
577 struct tcf_block_ext_info *ei,
578 struct netlink_ext_ack *extack)
580 struct net *net = qdisc_net(q);
581 struct tcf_block *block = NULL;
582 bool created = false;
585 if (ei->block_index) {
586 /* block_index not 0 means the shared block is requested */
587 block = tcf_block_lookup(net, ei->block_index);
593 block = tcf_block_create(net, q, ei->block_index, extack);
595 return PTR_ERR(block);
597 if (tcf_block_shared(block)) {
598 err = tcf_block_insert(block, net, extack);
600 goto err_block_insert;
604 err = tcf_block_owner_add(block, q, ei->binder_type);
606 goto err_block_owner_add;
608 tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
610 err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
613 goto err_chain_head_change_cb_add;
615 err = tcf_block_offload_bind(block, q, ei);
617 goto err_block_offload_bind;
622 err_block_offload_bind:
623 tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
624 err_chain_head_change_cb_add:
625 tcf_block_owner_del(block, q, ei->binder_type);
628 if (tcf_block_shared(block))
629 tcf_block_remove(block, net);
631 kfree(tcf_block_chain_zero(block));
638 EXPORT_SYMBOL(tcf_block_get_ext);
640 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
642 struct tcf_proto __rcu **p_filter_chain = priv;
644 rcu_assign_pointer(*p_filter_chain, tp_head);
647 int tcf_block_get(struct tcf_block **p_block,
648 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
649 struct netlink_ext_ack *extack)
651 struct tcf_block_ext_info ei = {
652 .chain_head_change = tcf_chain_head_change_dflt,
653 .chain_head_change_priv = p_filter_chain,
656 WARN_ON(!p_filter_chain);
657 return tcf_block_get_ext(p_block, q, &ei, extack);
659 EXPORT_SYMBOL(tcf_block_get);
661 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
662 * actions should be all removed after flushing.
664 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
665 struct tcf_block_ext_info *ei)
667 struct tcf_chain *chain, *tmp;
671 tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
672 tcf_block_owner_del(block, q, ei->binder_type);
674 if (--block->refcnt == 0) {
675 if (tcf_block_shared(block))
676 tcf_block_remove(block, block->net);
678 /* Hold a refcnt for all chains, so that they don't disappear
679 * while we are iterating.
681 list_for_each_entry(chain, &block->chain_list, list)
682 tcf_chain_hold(chain);
684 list_for_each_entry(chain, &block->chain_list, list)
685 tcf_chain_flush(chain);
688 tcf_block_offload_unbind(block, q, ei);
690 if (block->refcnt == 0) {
691 /* At this point, all the chains should have refcnt >= 1. */
692 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
693 tcf_chain_put(chain);
695 /* Finally, put chain 0 and allow block to be freed. */
696 tcf_chain_put(tcf_block_chain_zero(block));
699 EXPORT_SYMBOL(tcf_block_put_ext);
701 void tcf_block_put(struct tcf_block *block)
703 struct tcf_block_ext_info ei = {0, };
707 tcf_block_put_ext(block, block->q, &ei);
710 EXPORT_SYMBOL(tcf_block_put);
712 struct tcf_block_cb {
713 struct list_head list;
720 void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
722 return block_cb->cb_priv;
724 EXPORT_SYMBOL(tcf_block_cb_priv);
726 struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
727 tc_setup_cb_t *cb, void *cb_ident)
728 { struct tcf_block_cb *block_cb;
730 list_for_each_entry(block_cb, &block->cb_list, list)
731 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
735 EXPORT_SYMBOL(tcf_block_cb_lookup);
737 void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
741 EXPORT_SYMBOL(tcf_block_cb_incref);
743 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
745 return --block_cb->refcnt;
747 EXPORT_SYMBOL(tcf_block_cb_decref);
749 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
750 tc_setup_cb_t *cb, void *cb_ident,
753 struct tcf_block_cb *block_cb;
755 /* At this point, playback of previous block cb calls is not supported,
756 * so forbid to register to block which already has some offloaded
759 if (tcf_block_offload_in_use(block))
760 return ERR_PTR(-EOPNOTSUPP);
762 block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
764 return ERR_PTR(-ENOMEM);
766 block_cb->cb_ident = cb_ident;
767 block_cb->cb_priv = cb_priv;
768 list_add(&block_cb->list, &block->cb_list);
771 EXPORT_SYMBOL(__tcf_block_cb_register);
773 int tcf_block_cb_register(struct tcf_block *block,
774 tc_setup_cb_t *cb, void *cb_ident,
777 struct tcf_block_cb *block_cb;
779 block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
780 return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
782 EXPORT_SYMBOL(tcf_block_cb_register);
784 void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
786 list_del(&block_cb->list);
789 EXPORT_SYMBOL(__tcf_block_cb_unregister);
791 void tcf_block_cb_unregister(struct tcf_block *block,
792 tc_setup_cb_t *cb, void *cb_ident)
794 struct tcf_block_cb *block_cb;
796 block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
799 __tcf_block_cb_unregister(block_cb);
801 EXPORT_SYMBOL(tcf_block_cb_unregister);
803 static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
804 void *type_data, bool err_stop)
806 struct tcf_block_cb *block_cb;
810 list_for_each_entry(block_cb, &block->cb_list, list) {
811 err = block_cb->cb(type, type_data, block_cb->cb_priv);
822 /* Main classifier routine: scans classifier chain attached
823 * to this qdisc, (optionally) tests for protocol and asks
824 * specific classifiers.
826 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
827 struct tcf_result *res, bool compat_mode)
829 __be16 protocol = tc_skb_protocol(skb);
830 #ifdef CONFIG_NET_CLS_ACT
831 const int max_reclassify_loop = 4;
832 const struct tcf_proto *orig_tp = tp;
833 const struct tcf_proto *first_tp;
838 for (; tp; tp = rcu_dereference_bh(tp->next)) {
841 if (tp->protocol != protocol &&
842 tp->protocol != htons(ETH_P_ALL))
845 err = tp->classify(skb, tp, res);
846 #ifdef CONFIG_NET_CLS_ACT
847 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
850 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
851 first_tp = res->goto_tp;
859 return TC_ACT_UNSPEC; /* signal: continue lookup */
860 #ifdef CONFIG_NET_CLS_ACT
862 if (unlikely(limit++ >= max_reclassify_loop)) {
863 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
864 tp->chain->block->index,
866 ntohs(tp->protocol));
871 protocol = tc_skb_protocol(skb);
875 EXPORT_SYMBOL(tcf_classify);
877 struct tcf_chain_info {
878 struct tcf_proto __rcu **pprev;
879 struct tcf_proto __rcu *next;
882 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
884 return rtnl_dereference(*chain_info->pprev);
887 static void tcf_chain_tp_insert(struct tcf_chain *chain,
888 struct tcf_chain_info *chain_info,
889 struct tcf_proto *tp)
891 if (*chain_info->pprev == chain->filter_chain)
892 tcf_chain_head_change(chain, tp);
893 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
894 rcu_assign_pointer(*chain_info->pprev, tp);
895 tcf_chain_hold(chain);
898 static void tcf_chain_tp_remove(struct tcf_chain *chain,
899 struct tcf_chain_info *chain_info,
900 struct tcf_proto *tp)
902 struct tcf_proto *next = rtnl_dereference(chain_info->next);
904 if (tp == chain->filter_chain)
905 tcf_chain_head_change(chain, next);
906 RCU_INIT_POINTER(*chain_info->pprev, next);
907 tcf_chain_put(chain);
910 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
911 struct tcf_chain_info *chain_info,
912 u32 protocol, u32 prio,
915 struct tcf_proto **pprev;
916 struct tcf_proto *tp;
918 /* Check the chain for existence of proto-tcf with this priority */
919 for (pprev = &chain->filter_chain;
920 (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
921 if (tp->prio >= prio) {
922 if (tp->prio == prio) {
924 (tp->protocol != protocol && protocol))
925 return ERR_PTR(-EINVAL);
932 chain_info->pprev = pprev;
933 chain_info->next = tp ? tp->next : NULL;
937 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
938 struct tcf_proto *tp, struct tcf_block *block,
939 struct Qdisc *q, u32 parent, void *fh,
940 u32 portid, u32 seq, u16 flags, int event)
943 struct nlmsghdr *nlh;
944 unsigned char *b = skb_tail_pointer(skb);
946 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
949 tcm = nlmsg_data(nlh);
950 tcm->tcm_family = AF_UNSPEC;
954 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
955 tcm->tcm_parent = parent;
957 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
958 tcm->tcm_block_index = block->index;
960 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
961 if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
962 goto nla_put_failure;
963 if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
964 goto nla_put_failure;
968 if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
969 goto nla_put_failure;
971 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
980 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
981 struct nlmsghdr *n, struct tcf_proto *tp,
982 struct tcf_block *block, struct Qdisc *q,
983 u32 parent, void *fh, int event, bool unicast)
986 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
988 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
992 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
993 n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
999 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1001 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1002 n->nlmsg_flags & NLM_F_ECHO);
1005 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1006 struct nlmsghdr *n, struct tcf_proto *tp,
1007 struct tcf_block *block, struct Qdisc *q,
1008 u32 parent, void *fh, bool unicast, bool *last,
1009 struct netlink_ext_ack *extack)
1011 struct sk_buff *skb;
1012 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1015 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1019 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1020 n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
1021 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1026 err = tp->ops->delete(tp, fh, last, extack);
1033 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1035 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1036 n->nlmsg_flags & NLM_F_ECHO);
1038 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1042 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1043 struct tcf_block *block, struct Qdisc *q,
1044 u32 parent, struct nlmsghdr *n,
1045 struct tcf_chain *chain, int event)
1047 struct tcf_proto *tp;
1049 for (tp = rtnl_dereference(chain->filter_chain);
1050 tp; tp = rtnl_dereference(tp->next))
1051 tfilter_notify(net, oskb, n, tp, block,
1052 q, parent, 0, event, false);
1055 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1056 struct netlink_ext_ack *extack)
1058 struct net *net = sock_net(skb->sk);
1059 struct nlattr *tca[TCA_MAX + 1];
1066 struct Qdisc *q = NULL;
1067 struct tcf_chain_info chain_info;
1068 struct tcf_chain *chain = NULL;
1069 struct tcf_block *block;
1070 struct tcf_proto *tp;
1076 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1082 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
1087 protocol = TC_H_MIN(t->tcm_info);
1088 prio = TC_H_MAJ(t->tcm_info);
1089 prio_allocate = false;
1090 parent = t->tcm_parent;
1094 /* If no priority is provided by the user,
1097 if (n->nlmsg_flags & NLM_F_CREATE) {
1098 prio = TC_H_MAKE(0x80000000U, 0U);
1099 prio_allocate = true;
1101 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1106 /* Find head of filter chain. */
1108 block = tcf_block_find(net, &q, &parent, &cl,
1109 t->tcm_ifindex, t->tcm_block_index, extack);
1110 if (IS_ERR(block)) {
1111 err = PTR_ERR(block);
1115 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1116 if (chain_index > TC_ACT_EXT_VAL_MASK) {
1117 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1121 chain = tcf_chain_get(block, chain_index, true);
1123 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1128 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1129 prio, prio_allocate);
1131 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1137 /* Proto-tcf does not exist, create new one */
1139 if (tca[TCA_KIND] == NULL || !protocol) {
1140 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
1145 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1146 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1152 prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
1154 tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
1155 protocol, prio, chain, extack);
1161 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1162 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1167 fh = tp->ops->get(tp, t->tcm_handle);
1170 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1171 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1175 } else if (n->nlmsg_flags & NLM_F_EXCL) {
1176 NL_SET_ERR_MSG(extack, "Filter already exists");
1181 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
1182 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
1186 tcf_chain_tp_insert(chain, &chain_info, tp);
1187 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1188 RTM_NEWTFILTER, false);
1191 tcf_proto_destroy(tp, NULL);
1196 tcf_chain_put(chain);
1198 /* Replay the request. */
1203 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1204 struct netlink_ext_ack *extack)
1206 struct net *net = sock_net(skb->sk);
1207 struct nlattr *tca[TCA_MAX + 1];
1213 struct Qdisc *q = NULL;
1214 struct tcf_chain_info chain_info;
1215 struct tcf_chain *chain = NULL;
1216 struct tcf_block *block;
1217 struct tcf_proto *tp = NULL;
1218 unsigned long cl = 0;
1222 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1225 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
1230 protocol = TC_H_MIN(t->tcm_info);
1231 prio = TC_H_MAJ(t->tcm_info);
1232 parent = t->tcm_parent;
1234 if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
1235 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
1239 /* Find head of filter chain. */
1241 block = tcf_block_find(net, &q, &parent, &cl,
1242 t->tcm_ifindex, t->tcm_block_index, extack);
1243 if (IS_ERR(block)) {
1244 err = PTR_ERR(block);
1248 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1249 if (chain_index > TC_ACT_EXT_VAL_MASK) {
1250 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1254 chain = tcf_chain_get(block, chain_index, false);
1256 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1262 tfilter_notify_chain(net, skb, block, q, parent, n,
1263 chain, RTM_DELTFILTER);
1264 tcf_chain_flush(chain);
1269 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1271 if (!tp || IS_ERR(tp)) {
1272 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1273 err = tp ? PTR_ERR(tp) : -ENOENT;
1275 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1276 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1281 fh = tp->ops->get(tp, t->tcm_handle);
1284 if (t->tcm_handle == 0) {
1285 tcf_chain_tp_remove(chain, &chain_info, tp);
1286 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1287 RTM_DELTFILTER, false);
1288 tcf_proto_destroy(tp, extack);
1291 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1297 err = tfilter_del_notify(net, skb, n, tp, block,
1298 q, parent, fh, false, &last,
1303 tcf_chain_tp_remove(chain, &chain_info, tp);
1304 tcf_proto_destroy(tp, extack);
1310 tcf_chain_put(chain);
1314 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1315 struct netlink_ext_ack *extack)
1317 struct net *net = sock_net(skb->sk);
1318 struct nlattr *tca[TCA_MAX + 1];
1324 struct Qdisc *q = NULL;
1325 struct tcf_chain_info chain_info;
1326 struct tcf_chain *chain = NULL;
1327 struct tcf_block *block;
1328 struct tcf_proto *tp = NULL;
1329 unsigned long cl = 0;
1333 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
1338 protocol = TC_H_MIN(t->tcm_info);
1339 prio = TC_H_MAJ(t->tcm_info);
1340 parent = t->tcm_parent;
1343 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1347 /* Find head of filter chain. */
1349 block = tcf_block_find(net, &q, &parent, &cl,
1350 t->tcm_ifindex, t->tcm_block_index, extack);
1351 if (IS_ERR(block)) {
1352 err = PTR_ERR(block);
1356 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1357 if (chain_index > TC_ACT_EXT_VAL_MASK) {
1358 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1362 chain = tcf_chain_get(block, chain_index, false);
1364 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1369 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1371 if (!tp || IS_ERR(tp)) {
1372 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1373 err = tp ? PTR_ERR(tp) : -ENOENT;
1375 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1376 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1381 fh = tp->ops->get(tp, t->tcm_handle);
1384 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1387 err = tfilter_notify(net, skb, n, tp, block, q, parent,
1388 fh, RTM_NEWTFILTER, true);
1390 NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
1395 tcf_chain_put(chain);
1399 struct tcf_dump_args {
1400 struct tcf_walker w;
1401 struct sk_buff *skb;
1402 struct netlink_callback *cb;
1403 struct tcf_block *block;
1408 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1410 struct tcf_dump_args *a = (void *)arg;
1411 struct net *net = sock_net(a->skb->sk);
1413 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
1414 n, NETLINK_CB(a->cb->skb).portid,
1415 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1419 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
1420 struct sk_buff *skb, struct netlink_callback *cb,
1421 long index_start, long *p_index)
1423 struct net *net = sock_net(skb->sk);
1424 struct tcf_block *block = chain->block;
1425 struct tcmsg *tcm = nlmsg_data(cb->nlh);
1426 struct tcf_dump_args arg;
1427 struct tcf_proto *tp;
1429 for (tp = rtnl_dereference(chain->filter_chain);
1430 tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
1431 if (*p_index < index_start)
1433 if (TC_H_MAJ(tcm->tcm_info) &&
1434 TC_H_MAJ(tcm->tcm_info) != tp->prio)
1436 if (TC_H_MIN(tcm->tcm_info) &&
1437 TC_H_MIN(tcm->tcm_info) != tp->protocol)
1439 if (*p_index > index_start)
1440 memset(&cb->args[1], 0,
1441 sizeof(cb->args) - sizeof(cb->args[0]));
1442 if (cb->args[1] == 0) {
1443 if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
1444 NETLINK_CB(cb->skb).portid,
1445 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1446 RTM_NEWTFILTER) <= 0)
1453 arg.w.fn = tcf_node_dump;
1458 arg.parent = parent;
1460 arg.w.skip = cb->args[1] - 1;
1462 tp->ops->walk(tp, &arg.w);
1463 cb->args[1] = arg.w.count + 1;
1470 /* called with RTNL */
1471 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
1473 struct net *net = sock_net(skb->sk);
1474 struct nlattr *tca[TCA_MAX + 1];
1475 struct Qdisc *q = NULL;
1476 struct tcf_block *block;
1477 struct tcf_chain *chain;
1478 struct tcmsg *tcm = nlmsg_data(cb->nlh);
1484 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1487 err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
1491 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1492 block = tcf_block_lookup(net, tcm->tcm_block_index);
1495 /* If we work with block index, q is NULL and parent value
1496 * will never be used in the following code. The check
1497 * in tcf_fill_node prevents it. However, compiler does not
1498 * see that far, so set parent to zero to silence the warning
1499 * about parent being uninitialized.
1503 const struct Qdisc_class_ops *cops;
1504 struct net_device *dev;
1505 unsigned long cl = 0;
1507 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1511 parent = tcm->tcm_parent;
1516 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1520 cops = q->ops->cl_ops;
1523 if (!cops->tcf_block)
1525 if (TC_H_MIN(tcm->tcm_parent)) {
1526 cl = cops->find(q, tcm->tcm_parent);
1530 block = cops->tcf_block(q, cl, NULL);
1533 if (tcf_block_shared(block))
1537 index_start = cb->args[0];
1540 list_for_each_entry(chain, &block->chain_list, list) {
1541 if (tca[TCA_CHAIN] &&
1542 nla_get_u32(tca[TCA_CHAIN]) != chain->index)
1544 if (!tcf_chain_dump(chain, q, parent, skb, cb,
1545 index_start, &index)) {
1551 cb->args[0] = index;
1554 /* If we did no progress, the error (EMSGSIZE) is real */
1555 if (skb->len == 0 && err)
1560 void tcf_exts_destroy(struct tcf_exts *exts)
1562 #ifdef CONFIG_NET_CLS_ACT
1566 tcf_exts_to_list(exts, &actions);
1567 tcf_action_destroy(&actions, TCA_ACT_UNBIND);
1568 kfree(exts->actions);
1569 exts->nr_actions = 0;
1572 EXPORT_SYMBOL(tcf_exts_destroy);
1574 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
1575 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
1576 struct netlink_ext_ack *extack)
1578 #ifdef CONFIG_NET_CLS_ACT
1580 struct tc_action *act;
1581 size_t attr_size = 0;
1583 if (exts->police && tb[exts->police]) {
1584 act = tcf_action_init_1(net, tp, tb[exts->police],
1585 rate_tlv, "police", ovr,
1586 TCA_ACT_BIND, extack);
1588 return PTR_ERR(act);
1590 act->type = exts->type = TCA_OLD_COMPAT;
1591 exts->actions[0] = act;
1592 exts->nr_actions = 1;
1593 } else if (exts->action && tb[exts->action]) {
1597 err = tcf_action_init(net, tp, tb[exts->action],
1598 rate_tlv, NULL, ovr, TCA_ACT_BIND,
1599 &actions, &attr_size, extack);
1602 list_for_each_entry(act, &actions, list)
1603 exts->actions[i++] = act;
1604 exts->nr_actions = i;
1609 if ((exts->action && tb[exts->action]) ||
1610 (exts->police && tb[exts->police])) {
1611 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
1618 EXPORT_SYMBOL(tcf_exts_validate);
1620 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
1622 #ifdef CONFIG_NET_CLS_ACT
1623 struct tcf_exts old = *dst;
1626 tcf_exts_destroy(&old);
1629 EXPORT_SYMBOL(tcf_exts_change);
1631 #ifdef CONFIG_NET_CLS_ACT
1632 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
1634 if (exts->nr_actions == 0)
1637 return exts->actions[0];
1641 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
1643 #ifdef CONFIG_NET_CLS_ACT
1644 struct nlattr *nest;
1646 if (exts->action && tcf_exts_has_actions(exts)) {
1648 * again for backward compatible mode - we want
1649 * to work with both old and new modes of entering
1650 * tc data even if iproute2 was newer - jhs
1652 if (exts->type != TCA_OLD_COMPAT) {
1655 nest = nla_nest_start(skb, exts->action);
1657 goto nla_put_failure;
1659 tcf_exts_to_list(exts, &actions);
1660 if (tcf_action_dump(skb, &actions, 0, 0) < 0)
1661 goto nla_put_failure;
1662 nla_nest_end(skb, nest);
1663 } else if (exts->police) {
1664 struct tc_action *act = tcf_exts_first_act(exts);
1665 nest = nla_nest_start(skb, exts->police);
1666 if (nest == NULL || !act)
1667 goto nla_put_failure;
1668 if (tcf_action_dump_old(skb, act, 0, 0) < 0)
1669 goto nla_put_failure;
1670 nla_nest_end(skb, nest);
1676 nla_nest_cancel(skb, nest);
1682 EXPORT_SYMBOL(tcf_exts_dump);
1685 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
1687 #ifdef CONFIG_NET_CLS_ACT
1688 struct tc_action *a = tcf_exts_first_act(exts);
1689 if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
1694 EXPORT_SYMBOL(tcf_exts_dump_stats);
1696 static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
1697 enum tc_setup_type type,
1698 void *type_data, bool err_stop)
1701 #ifdef CONFIG_NET_CLS_ACT
1702 const struct tc_action *a;
1703 struct net_device *dev;
1706 if (!tcf_exts_has_actions(exts))
1709 for (i = 0; i < exts->nr_actions; i++) {
1710 a = exts->actions[i];
1711 if (!a->ops->get_dev)
1713 dev = a->ops->get_dev(a);
1716 ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
1725 int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
1726 enum tc_setup_type type, void *type_data, bool err_stop)
1731 if (!block->nooffloaddevcnt) {
1732 ret = tcf_block_cb_call(block, type, type_data, err_stop);
1738 if (!exts || ok_count)
1741 ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
1747 /* if one of the netdevs sharing this block are not offload-capable
1748 * make sure we succeeded in egress instead.
1750 if (block->nooffloaddevcnt && !ok_count && err_stop)
1755 EXPORT_SYMBOL(tc_setup_cb_call);
1757 static __net_init int tcf_net_init(struct net *net)
1759 struct tcf_net *tn = net_generic(net, tcf_net_id);
1765 static void __net_exit tcf_net_exit(struct net *net)
1767 struct tcf_net *tn = net_generic(net, tcf_net_id);
1769 idr_destroy(&tn->idr);
1772 static struct pernet_operations tcf_net_ops = {
1773 .init = tcf_net_init,
1774 .exit = tcf_net_exit,
1776 .size = sizeof(struct tcf_net),
1779 static int __init tc_filter_init(void)
1783 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1787 err = register_pernet_subsys(&tcf_net_ops);
1789 goto err_register_pernet_subsys;
1791 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
1792 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
1793 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
1794 tc_dump_tfilter, 0);
1798 err_register_pernet_subsys:
1799 destroy_workqueue(tc_filter_wq);
1803 subsys_initcall(tc_filter_init);