X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=net%2Fnetfilter%2Fnf_tables_api.c;h=0a6eafa498795d309df0a28fd69ce299804309ed;hb=371ebcbb9ee62fb46a0a27f358941588f7048678;hp=91e80aa852d630b01ceede24f79dedbeca166ece;hpb=4f6b15c3a604c0addf1607c5482c46a0a5123066;p=linux-2.6-microblaze.git diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 91e80aa852d6..0a6eafa49879 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -28,6 +28,28 @@ static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static u64 table_handle; +enum { + NFT_VALIDATE_SKIP = 0, + NFT_VALIDATE_NEED, + NFT_VALIDATE_DO, +}; + +static void nft_validate_state_update(struct net *net, u8 new_validate_state) +{ + switch (net->nft.validate_state) { + case NFT_VALIDATE_SKIP: + WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); + break; + case NFT_VALIDATE_NEED: + break; + case NFT_VALIDATE_DO: + if (new_validate_state == NFT_VALIDATE_NEED) + return; + } + + net->nft.validate_state = new_validate_state; +} + static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, @@ -74,88 +96,43 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -/* removal requests are queued in the commit_list, but not acted upon - * until after all new rules are in place. - * - * Therefore, nf_register_net_hook(net, &nat_hook) runs before pending - * nf_unregister_net_hook(). - * - * nf_register_net_hook thus fails if a nat hook is already in place - * even if the conflicting hook is about to be removed. - * - * If collision is detected, search commit_log for DELCHAIN matching - * the new nat hooknum; if we find one collision is temporary: - * - * Either transaction is aborted (new/colliding hook is removed), or - * transaction is committed (old hook is removed). - */ -static bool nf_tables_allow_nat_conflict(const struct net *net, - const struct nf_hook_ops *ops) -{ - const struct nft_trans *trans; - bool ret = false; - - if (!ops->nat_hook) - return false; - - list_for_each_entry(trans, &net->nft.commit_list, list) { - const struct nf_hook_ops *pending_ops; - const struct nft_chain *pending; - - if (trans->msg_type != NFT_MSG_NEWCHAIN && - trans->msg_type != NFT_MSG_DELCHAIN) - continue; - - pending = trans->ctx.chain; - if (!nft_is_base_chain(pending)) - continue; - - pending_ops = &nft_base_chain(pending)->ops; - if (pending_ops->nat_hook && - pending_ops->pf == ops->pf && - pending_ops->hooknum == ops->hooknum) { - /* other hook registration already pending? */ - if (trans->msg_type == NFT_MSG_NEWCHAIN) - return false; - - ret = true; - } - } - - return ret; -} - static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { - struct nf_hook_ops *ops; - int ret; + const struct nft_base_chain *basechain; + const struct nf_hook_ops *ops; if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return 0; - ops = &nft_base_chain(chain)->ops; - ret = nf_register_net_hook(net, ops); - if (ret == -EBUSY && nf_tables_allow_nat_conflict(net, ops)) { - ops->nat_hook = false; - ret = nf_register_net_hook(net, ops); - ops->nat_hook = true; - } + basechain = nft_base_chain(chain); + ops = &basechain->ops; - return ret; + if (basechain->type->ops_register) + return basechain->type->ops_register(net, ops); + + return nf_register_net_hook(net, ops); } static void nf_tables_unregister_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { + const struct nft_base_chain *basechain; + const struct nf_hook_ops *ops; + if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return; + basechain = nft_base_chain(chain); + ops = &basechain->ops; - nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); + if (basechain->type->ops_unregister) + return basechain->type->ops_unregister(net, ops); + + nf_unregister_net_hook(net, ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -415,13 +392,17 @@ static struct nft_table *nft_table_lookup(const struct net *net, { struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry_rcu(table, &net->nft.tables, list) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) return table; } - return NULL; + + return ERR_PTR(-ENOENT); } static struct nft_table *nft_table_lookup_byhandle(const struct net *net, @@ -435,37 +416,6 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, nft_active_genmask(table, genmask)) return table; } - return NULL; -} - -static struct nft_table *nf_tables_table_lookup(const struct net *net, - const struct nlattr *nla, - u8 family, u8 genmask) -{ - struct nft_table *table; - - if (nla == NULL) - return ERR_PTR(-EINVAL); - - table = nft_table_lookup(net, nla, family, genmask); - if (table != NULL) - return table; - - return ERR_PTR(-ENOENT); -} - -static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net, - const struct nlattr *nla, - u8 genmask) -{ - struct nft_table *table; - - if (nla == NULL) - return ERR_PTR(-EINVAL); - - table = nft_table_lookup_byhandle(net, nla, genmask); - if (table != NULL) - return table; return ERR_PTR(-ENOENT); } @@ -618,6 +568,24 @@ done: return skb->len; } +static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *c) +{ + int err; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + rcu_read_unlock(); + err = netlink_dump_start(nlsk, skb, nlh, c); + rcu_read_lock(); + module_put(THIS_MODULE); + + return err; +} + +/* called with rcu_read_lock held */ static int nf_tables_gettable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -633,16 +601,19 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_tables, + .module = THIS_MODULE, }; - return netlink_dump_start(nlsk, skb, nlh, &c); + + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } - table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]); return PTR_ERR(table); + } - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; @@ -756,21 +727,23 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - const struct nlattr *name; - struct nft_table *table; int family = nfmsg->nfgen_family; + const struct nlattr *attr; + struct nft_table *table; u32 flags = 0; struct nft_ctx ctx; int err; - name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(net, name, family, genmask); + attr = nla[NFTA_TABLE_NAME]; + table = nft_table_lookup(net, attr, family, genmask); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) + if (nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, attr); return -EEXIST; + } if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; @@ -789,7 +762,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (table == NULL) goto err_kzalloc; - table->name = nla_strdup(name, GFP_KERNEL); + table->name = nla_strdup(attr, GFP_KERNEL); if (table->name == NULL) goto err_strdup; @@ -912,8 +885,9 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_table *table; int family = nfmsg->nfgen_family; + const struct nlattr *attr; + struct nft_table *table; struct nft_ctx ctx; nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla); @@ -921,16 +895,18 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE])) return nft_flush(&ctx, family); - if (nla[NFTA_TABLE_HANDLE]) - table = nf_tables_table_lookup_byhandle(net, - nla[NFTA_TABLE_HANDLE], - genmask); - else - table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], - family, genmask); + if (nla[NFTA_TABLE_HANDLE]) { + attr = nla[NFTA_TABLE_HANDLE]; + table = nft_table_lookup_byhandle(net, attr, genmask); + } else { + attr = nla[NFTA_TABLE_NAME]; + table = nft_table_lookup(net, attr, family, genmask); + } - if (IS_ERR(table)) + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(table); + } if (nlh->nlmsg_flags & NLM_F_NONREC && table->use > 0) @@ -978,8 +954,7 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type); */ static struct nft_chain * -nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle, - u8 genmask) +nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) { struct nft_chain *chain; @@ -992,16 +967,15 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle, return ERR_PTR(-ENOENT); } -static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, - const struct nlattr *nla, - u8 genmask) +static struct nft_chain *nft_chain_lookup(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) { struct nft_chain *chain; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry_rcu(chain, &table->chains, list) { if (!nla_strcmp(nla, chain->name) && nft_active_genmask(chain, genmask)) return chain; @@ -1203,6 +1177,7 @@ done: return skb->len; } +/* called with rcu_read_lock held */ static int nf_tables_getchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -1219,20 +1194,25 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_chains, + .module = THIS_MODULE, }; - return netlink_dump_start(nlsk, skb, nlh, &c); + + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } - table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); return PTR_ERR(table); + } - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); - if (IS_ERR(chain)) + chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); return PTR_ERR(chain); + } - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; @@ -1302,17 +1282,32 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, rcu_assign_pointer(chain->stats, newstats); } +static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) +{ + struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0); + struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1); + + if (g0 != g1) + kvfree(g1); + kvfree(g0); + + /* should be NULL either via abort or via successful commit */ + WARN_ON_ONCE(chain->rules_next); + kvfree(chain->rules_next); +} + static void nf_tables_chain_destroy(struct nft_ctx *ctx) { struct nft_chain *chain = ctx->chain; BUG_ON(chain->use > 0); + /* no concurrent access possible anymore */ + nf_tables_chain_free_chain_rules(chain); + if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (basechain->type->free) - basechain->type->free(ctx); module_put(basechain->type->owner); free_percpu(basechain->stats); if (basechain->stats) @@ -1402,6 +1397,27 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) module_put(hook->type->owner); } +struct nft_rules_old { + struct rcu_head h; + struct nft_rule **start; +}; + +static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain, + unsigned int alloc) +{ + if (alloc > INT_MAX) + return NULL; + + alloc += 1; /* NULL, ends rules */ + if (sizeof(struct nft_rule *) > INT_MAX / alloc) + return NULL; + + alloc *= sizeof(struct nft_rule *); + alloc += sizeof(struct nft_rules_old); + + return kvmalloc(alloc, GFP_KERNEL); +} + static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, u8 policy, bool create) { @@ -1411,6 +1427,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats; struct net *net = ctx->net; struct nft_chain *chain; + struct nft_rule **rules; int err; if (table->use == UINT_MAX) @@ -1445,9 +1462,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, } basechain->type = hook.type; - if (basechain->type->init) - basechain->type->init(ctx); - chain = &basechain->chain; ops = &basechain->ops; @@ -1458,9 +1472,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, ops->hook = hook.type->hooks[ops->hooknum]; ops->dev = hook.dev; - if (basechain->type->type == NFT_CHAIN_T_NAT) - ops->nat_hook = true; - chain->flags |= NFT_BASE_CHAIN; basechain->policy = policy; } else { @@ -1479,6 +1490,16 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err1; } + rules = nf_tables_chain_alloc_rules(chain, 0); + if (!rules) { + err = -ENOMEM; + goto err1; + } + + *rules = NULL; + rcu_assign_pointer(chain->rules_gen_0, rules); + rcu_assign_pointer(chain->rules_gen_1, rules); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err1; @@ -1542,8 +1563,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nla[NFTA_CHAIN_NAME]) { struct nft_chain *chain2; - chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], - genmask); + chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (!IS_ERR(chain2)) return -EEXIST; } @@ -1593,9 +1613,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nlattr * uninitialized_var(name); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; + const struct nlattr *attr; struct nft_table *table; struct nft_chain *chain; u8 policy = NF_ACCEPT; @@ -1605,36 +1625,46 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); return PTR_ERR(table); + } chain = NULL; - name = nla[NFTA_CHAIN_NAME]; + attr = nla[NFTA_CHAIN_NAME]; if (nla[NFTA_CHAIN_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); - chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); - if (IS_ERR(chain)) + chain = nft_chain_lookup_byhandle(table, handle, genmask); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]); return PTR_ERR(chain); + } + attr = nla[NFTA_CHAIN_HANDLE]; } else { - chain = nf_tables_chain_lookup(table, name, genmask); + chain = nft_chain_lookup(table, attr, genmask); if (IS_ERR(chain)) { - if (PTR_ERR(chain) != -ENOENT) + if (PTR_ERR(chain) != -ENOENT) { + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(chain); + } chain = NULL; } } if (nla[NFTA_CHAIN_POLICY]) { if (chain != NULL && - !nft_is_base_chain(chain)) + !nft_is_base_chain(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]); return -EOPNOTSUPP; + } if (chain == NULL && - nla[NFTA_CHAIN_HOOK] == NULL) + nla[NFTA_CHAIN_HOOK] == NULL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]); return -EOPNOTSUPP; + } policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY])); switch (policy) { @@ -1649,8 +1679,10 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain != NULL) { - if (nlh->nlmsg_flags & NLM_F_EXCL) + if (nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, attr); return -EEXIST; + } if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; @@ -1667,28 +1699,34 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + const struct nlattr *attr; struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule; - int family = nfmsg->nfgen_family; struct nft_ctx ctx; u64 handle; u32 use; int err; - table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); return PTR_ERR(table); + } if (nla[NFTA_CHAIN_HANDLE]) { - handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); - chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); + attr = nla[NFTA_CHAIN_HANDLE]; + handle = be64_to_cpu(nla_get_be64(attr)); + chain = nft_chain_lookup_byhandle(table, handle, genmask); } else { - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); + attr = nla[NFTA_CHAIN_NAME]; + chain = nft_chain_lookup(table, attr, genmask); } - if (IS_ERR(chain)) + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(chain); + } if (nlh->nlmsg_flags & NLM_F_NONREC && chain->use > 0) @@ -1710,8 +1748,10 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, /* There are rules and elements that are still holding references to us, * we cannot do a recursive removal in this case. */ - if (use > 0) + if (use > 0) { + NL_SET_BAD_ATTR(extack, attr); return -EBUSY; + } return nft_delchain(&ctx); } @@ -1903,19 +1943,7 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, goto err1; } - if (ops->validate) { - const struct nft_data *data = NULL; - - err = ops->validate(ctx, expr, &data); - if (err < 0) - goto err2; - } - return 0; - -err2: - if (ops->destroy) - ops->destroy(ctx, expr); err1: expr->ops = NULL; return err; @@ -1968,13 +1996,13 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) * Rules */ -static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, - u64 handle) +static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain, + u64 handle) { struct nft_rule *rule; // FIXME: this sucks - list_for_each_entry(rule, &chain->rules, list) { + list_for_each_entry_rcu(rule, &chain->rules, list) { if (handle == rule->handle) return rule; } @@ -1982,13 +2010,13 @@ static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, return ERR_PTR(-ENOENT); } -static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, - const struct nlattr *nla) +static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain, + const struct nlattr *nla) { if (nla == NULL) return ERR_PTR(-EINVAL); - return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); + return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); } static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { @@ -2170,6 +2198,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) return 0; } +/* called with rcu_read_lock held */ static int nf_tables_getrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -2188,18 +2217,19 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, struct netlink_dump_control c = { .dump = nf_tables_dump_rules, .done = nf_tables_dump_rules_done, + .module = THIS_MODULE, }; if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { struct nft_rule_dump_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); if (!ctx) return -ENOMEM; if (nla[NFTA_RULE_TABLE]) { ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], - GFP_KERNEL); + GFP_ATOMIC); if (!ctx->table) { kfree(ctx); return -ENOMEM; @@ -2207,7 +2237,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, } if (nla[NFTA_RULE_CHAIN]) { ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], - GFP_KERNEL); + GFP_ATOMIC); if (!ctx->chain) { kfree(ctx->table); kfree(ctx); @@ -2217,23 +2247,28 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, c.data = ctx; } - return netlink_dump_start(nlsk, skb, nlh, &c); + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } - table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); return PTR_ERR(table); + } - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); - if (IS_ERR(chain)) + chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); + } - rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); - if (IS_ERR(rule)) + rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); + } - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; @@ -2274,6 +2309,53 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, nf_tables_rule_destroy(ctx, rule); } +int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) +{ + struct nft_expr *expr, *last; + const struct nft_data *data; + struct nft_rule *rule; + int err; + + list_for_each_entry(rule, &chain->rules, list) { + if (!nft_is_active_next(ctx->net, rule)) + continue; + + nft_rule_for_each_expr(expr, last, rule) { + if (!expr->ops->validate) + continue; + + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate); + +static int nft_table_validate(struct net *net, const struct nft_table *table) +{ + struct nft_chain *chain; + struct nft_ctx ctx = { + .net = net, + .family = table->family, + }; + int err; + + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; + + ctx.chain = chain; + err = nft_chain_validate(&ctx, chain); + if (err < 0) + return err; + } + + return 0; +} + #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -2301,23 +2383,30 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); return PTR_ERR(table); + } - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); - if (IS_ERR(chain)) + chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); + } if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); - rule = __nf_tables_rule_lookup(chain, handle); - if (IS_ERR(rule)) + rule = __nft_rule_lookup(chain, handle); + if (IS_ERR(rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); + } - if (nlh->nlmsg_flags & NLM_F_EXCL) + if (nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return -EEXIST; + } if (nlh->nlmsg_flags & NLM_F_REPLACE) old_rule = rule; else @@ -2336,9 +2425,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EOPNOTSUPP; pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); - old_rule = __nf_tables_rule_lookup(chain, pos_handle); - if (IS_ERR(old_rule)) + old_rule = __nft_rule_lookup(chain, pos_handle); + if (IS_ERR(old_rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); + } } nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); @@ -2392,6 +2483,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, err = nf_tables_newexpr(&ctx, &info[i], expr); if (err < 0) goto err2; + + if (info[i].ops->validate) + nft_validate_state_update(net, NFT_VALIDATE_NEED); + info[i].ops = NULL; expr = nft_expr_next(expr); } @@ -2435,8 +2530,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } chain->use++; - return 0; + if (net->nft.validate_state == NFT_VALIDATE_DO) + return nft_table_validate(net, table); + + return 0; err2: nf_tables_rule_release(&ctx, rule); err1: @@ -2476,32 +2574,37 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; - table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); return PTR_ERR(table); + } if (nla[NFTA_RULE_CHAIN]) { - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], - genmask); - if (IS_ERR(chain)) + chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); + } } nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain) { if (nla[NFTA_RULE_HANDLE]) { - rule = nf_tables_rule_lookup(chain, - nla[NFTA_RULE_HANDLE]); - if (IS_ERR(rule)) + rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); + } err = nft_delrule(&ctx, rule); } else if (nla[NFTA_RULE_ID]) { rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); - if (IS_ERR(rule)) + if (IS_ERR(rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]); return PTR_ERR(rule); + } err = nft_delrule(&ctx, rule); } else { @@ -2546,14 +2649,12 @@ void nft_unregister_set(struct nft_set_type *type) EXPORT_SYMBOL_GPL(nft_unregister_set); #define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ - NFT_SET_TIMEOUT | NFT_SET_OBJECT) + NFT_SET_TIMEOUT | NFT_SET_OBJECT | \ + NFT_SET_EVAL) -static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags) +static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) { - if ((flags & NFT_SET_EVAL) && !ops->update) - return false; - - return (flags & ops->features) == (flags & NFT_SET_FEATURES); + return (flags & type->features) == (flags & NFT_SET_FEATURES); } /* @@ -2590,14 +2691,9 @@ nft_select_set_ops(const struct nft_ctx *ctx, best.space = ~0; list_for_each_entry(type, &nf_tables_set_types, list) { - if (!type->select_ops) - ops = type->ops; - else - ops = type->select_ops(ctx, desc, flags); - if (!ops) - continue; + ops = &type->ops; - if (!nft_set_ops_candidate(ops, flags)) + if (!nft_set_ops_candidate(type, flags)) continue; if (!ops->estimate(desc, flags, &est)) continue; @@ -2628,7 +2724,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, if (!try_module_get(type->owner)) continue; if (bops != NULL) - module_put(bops->type->owner); + module_put(to_set_type(bops)->owner); bops = ops; best = est; @@ -2669,6 +2765,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], + struct netlink_ext_ack *extack, u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2676,25 +2773,27 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, struct nft_table *table = NULL; if (nla[NFTA_SET_TABLE] != NULL) { - table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], - family, genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, + genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); return PTR_ERR(table); + } } nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); return 0; } -static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla, u8 genmask) +static struct nft_set *nft_set_lookup(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) { struct nft_set *set; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry(set, &table->sets, list) { + list_for_each_entry_rcu(set, &table->sets, list) { if (!nla_strcmp(nla, set->name) && nft_active_genmask(set, genmask)) return set; @@ -2702,14 +2801,12 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-ENOENT); } -static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table, - const struct nlattr *nla, u8 genmask) +static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask) { struct nft_set *set; - if (nla == NULL) - return ERR_PTR(-EINVAL); - list_for_each_entry(set, &table->sets, list) { if (be64_to_cpu(nla_get_be64(nla)) == set->handle && nft_active_genmask(set, genmask)) @@ -2718,9 +2815,8 @@ static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *tab return ERR_PTR(-ENOENT); } -static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla, - u8 genmask) +static struct nft_set *nft_set_lookup_byid(const struct net *net, + const struct nlattr *nla, u8 genmask) { struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); @@ -2744,12 +2840,12 @@ struct nft_set *nft_set_lookup_global(const struct net *net, { struct nft_set *set; - set = nf_tables_set_lookup(table, nla_set_name, genmask); + set = nft_set_lookup(table, nla_set_name, genmask); if (IS_ERR(set)) { if (!nla_set_id) return set; - set = nf_tables_set_lookup_byid(net, nla_set_id, genmask); + set = nft_set_lookup_byid(net, nla_set_id, genmask); } return set; } @@ -2809,6 +2905,27 @@ cont: return 0; } +static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) +{ + u64 ms = be64_to_cpu(nla_get_be64(nla)); + u64 max = (u64)(~((u64)0)); + + max = div_u64(max, NSEC_PER_MSEC); + if (ms >= max) + return -ERANGE; + + ms *= NSEC_PER_MSEC; + *result = nsecs_to_jiffies64(ms); + return 0; +} + +static __be64 nf_jiffies64_to_msecs(u64 input) +{ + u64 ms = jiffies64_to_nsecs(input); + + return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC)); +} + static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { @@ -2856,7 +2973,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (set->timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, - cpu_to_be64(jiffies_to_msecs(set->timeout)), + nf_jiffies64_to_msecs(set->timeout), NFTA_SET_PAD)) goto nla_put_failure; if (set->gc_int && @@ -2981,6 +3098,7 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) return 0; } +/* called with rcu_read_lock held */ static int nf_tables_getset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -2994,7 +3112,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, int err; /* Verify existence before starting dump */ - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack, + genmask); if (err < 0) return err; @@ -3002,17 +3121,18 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, struct netlink_dump_control c = { .dump = nf_tables_dump_sets, .done = nf_tables_dump_sets_done, + .module = THIS_MODULE, }; struct nft_ctx *ctx_dump; - ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL); + ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_ATOMIC); if (ctx_dump == NULL) return -ENOMEM; *ctx_dump = ctx; c.data = ctx_dump; - return netlink_dump_start(nlsk, skb, nlh, &c); + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } /* Only accept unspec with dump */ @@ -3021,11 +3141,11 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, if (!nla[NFTA_SET_TABLE]) return -EINVAL; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); + set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) return -ENOMEM; @@ -3151,8 +3271,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( - nla[NFTA_SET_TIMEOUT]))); + + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); + if (err) + return err; } gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { @@ -3173,22 +3295,28 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); return PTR_ERR(table); + } nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); - set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); + set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { - if (PTR_ERR(set) != -ENOENT) + if (PTR_ERR(set) != -ENOENT) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); + } } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) + if (nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return -EEXIST; + } if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + return 0; } @@ -3231,6 +3359,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); + set->table = table; + write_pnet(&set->net, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -3265,14 +3395,14 @@ err3: err2: kvfree(set); err1: - module_put(ops->type->owner); + module_put(to_set_type(ops)->owner); return err; } static void nft_set_destroy(struct nft_set *set) { set->ops->destroy(set); - module_put(set->ops->type->owner); + module_put(to_set_type(set->ops)->owner); kfree(set->name); kvfree(set); } @@ -3291,6 +3421,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); + const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; int err; @@ -3300,20 +3431,28 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack, + genmask); if (err < 0) return err; - if (nla[NFTA_SET_HANDLE]) - set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask); - else - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); - if (IS_ERR(set)) - return PTR_ERR(set); + if (nla[NFTA_SET_HANDLE]) { + attr = nla[NFTA_SET_HANDLE]; + set = nft_set_lookup_byhandle(ctx.table, attr, genmask); + } else { + attr = nla[NFTA_SET_NAME]; + set = nft_set_lookup(ctx.table, attr, genmask); + } + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, attr); + return PTR_ERR(set); + } if (!list_empty(&set->bindings) || - (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) + (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) { + NL_SET_BAD_ATTR(extack, attr); return -EBUSY; + } return nft_delset(&ctx, set); } @@ -3403,8 +3542,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = { .align = __alignof__(u64), }, [NFT_SET_EXT_EXPIRATION] = { - .len = sizeof(unsigned long), - .align = __alignof__(unsigned long), + .len = sizeof(u64), + .align = __alignof__(u64), }, [NFT_SET_EXT_USERDATA] = { .len = sizeof(struct nft_userdata), @@ -3441,16 +3580,19 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], + struct netlink_ext_ack *extack, u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int family = nfmsg->nfgen_family; struct nft_table *table; - table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], - family, genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, + genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); return PTR_ERR(table); + } nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); return 0; @@ -3494,22 +3636,21 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - cpu_to_be64(jiffies_to_msecs( - *nft_set_ext_timeout(ext))), + nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), NFTA_SET_ELEM_PAD)) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { - unsigned long expires, now = jiffies; + u64 expires, now = get_jiffies_64(); expires = *nft_set_ext_expiration(ext); - if (time_before(now, expires)) + if (time_before64(now, expires)) expires -= now; else expires = 0; if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, - cpu_to_be64(jiffies_to_msecs(expires)), + nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; } @@ -3747,7 +3888,7 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, ext = nft_set_elem_ext(set, &elem); err = -ENOMEM; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb == NULL) goto err1; @@ -3769,6 +3910,7 @@ err1: return err == -EAGAIN ? -ENOBUFS : err; } +/* called with rcu_read_lock held */ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -3780,12 +3922,12 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int rem, err = 0; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack, + genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); + set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); @@ -3793,10 +3935,11 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct netlink_dump_control c = { .dump = nf_tables_dump_set, .done = nf_tables_dump_set_done, + .module = THIS_MODULE, }; struct nft_set_dump_ctx *dump_ctx; - dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_ATOMIC); if (!dump_ctx) return -ENOMEM; @@ -3804,7 +3947,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, dump_ctx->ctx = ctx; c.data = dump_ctx; - return netlink_dump_start(nlsk, skb, nlh, &c); + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) @@ -3884,7 +4027,7 @@ void *nft_set_elem_init(const struct nft_set *set, memcpy(nft_set_ext_data(ext), data, set->dlen); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) *nft_set_ext_expiration(ext) = - jiffies + timeout; + get_jiffies_64() + timeout; if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) *nft_set_ext_timeout(ext) = timeout; @@ -3895,12 +4038,24 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_ctx ctx = { + .net = read_pnet(&set->net), + .family = set->table->family, + }; nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); - if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) - nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { + struct nft_expr *expr = nft_set_ext_expr(ext); + + if (expr->ops->destroy_clone) { + expr->ops->destroy_clone(&ctx, expr); + module_put(expr->ops->type->owner); + } else { + nf_tables_expr_destroy(&ctx, expr); + } + } if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) (*nft_set_ext_obj(ext))->use--; kfree(elem); @@ -3910,12 +4065,13 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy); /* Only called from commit path, nft_set_elem_deactivate() already deals with * the refcounting from the preparation phase. */ -static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) +static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) - nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext)); kfree(elem); } @@ -3971,8 +4127,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( - nla[NFTA_SET_ELEM_TIMEOUT]))); + err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT], + &timeout); + if (err) + return err; } else if (set->flags & NFT_SET_TIMEOUT) { timeout = set->timeout; } @@ -3997,8 +4155,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EINVAL; goto err2; } - obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], - set->objtype, genmask); + obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err2; @@ -4033,6 +4191,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, d2.type, d2.len); if (err < 0) goto err3; + + if (d2.type == NFT_DATA_VERDICT && + (data.verdict.code == NFT_GOTO || + data.verdict.code == NFT_JUMP)) + nft_validate_state_update(ctx->net, + NFT_VALIDATE_NEED); } nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); @@ -4132,12 +4296,13 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err = 0; + int rem, err; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack, + genmask); if (err < 0) return err; @@ -4152,9 +4317,13 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); if (err < 0) - break; + return err; } - return err; + + if (net->nft.validate_state == NFT_VALIDATE_DO) + return nft_table_validate(net, ctx.table); + + return 0; } /** @@ -4325,12 +4494,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int rem, err = 0; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack, + genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); + set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) @@ -4418,13 +4587,13 @@ void nft_unregister_obj(struct nft_object_type *obj_type) } EXPORT_SYMBOL_GPL(nft_unregister_obj); -struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, - const struct nlattr *nla, - u32 objtype, u8 genmask) +struct nft_object *nft_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask) { struct nft_object *obj; - list_for_each_entry(obj, &table->objects, list) { + list_for_each_entry_rcu(obj, &table->objects, list) { if (!nla_strcmp(nla, obj->name) && objtype == obj->ops->type->type && nft_active_genmask(obj, genmask)) @@ -4432,11 +4601,11 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, } return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); +EXPORT_SYMBOL_GPL(nft_obj_lookup); -static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, - const struct nlattr *nla, - u32 objtype, u8 genmask) +static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) { struct nft_object *obj; @@ -4580,22 +4749,25 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, !nla[NFTA_OBJ_DATA]) return -EINVAL; - table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); return PTR_ERR(table); + } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); - if (err != -ENOENT) + if (err != -ENOENT) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return err; - + } } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) + if (nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return -EEXIST; - + } return 0; } @@ -4630,7 +4802,7 @@ err3: kfree(obj->name); err2: if (obj->ops->destroy) - obj->ops->destroy(obj); + obj->ops->destroy(&ctx, obj); kfree(obj); err1: module_put(type->owner); @@ -4751,12 +4923,12 @@ nft_obj_filter_alloc(const struct nlattr * const nla[]) { struct nft_obj_filter *filter; - filter = kzalloc(sizeof(*filter), GFP_KERNEL); + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); if (!filter) return ERR_PTR(-ENOMEM); if (nla[NFTA_OBJ_TABLE]) { - filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL); + filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); if (!filter->table) { kfree(filter); return ERR_PTR(-ENOMEM); @@ -4768,6 +4940,7 @@ nft_obj_filter_alloc(const struct nlattr * const nla[]) return filter; } +/* called with rcu_read_lock held */ static int nf_tables_getobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -4787,6 +4960,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, struct netlink_dump_control c = { .dump = nf_tables_dump_obj, .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, }; if (nla[NFTA_OBJ_TABLE] || @@ -4799,24 +4973,27 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, c.data = filter; } - return netlink_dump_start(nlsk, skb, nlh, &c); + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } if (!nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_TYPE]) return -EINVAL; - table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); return PTR_ERR(table); + } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); - if (IS_ERR(obj)) + obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return PTR_ERR(obj); + } - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; @@ -4835,10 +5012,10 @@ err: return err; } -static void nft_obj_destroy(struct nft_object *obj) +static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { if (obj->ops->destroy) - obj->ops->destroy(obj); + obj->ops->destroy(ctx, obj); module_put(obj->ops->type->owner); kfree(obj->name); @@ -4853,6 +5030,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; + const struct nlattr *attr; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; @@ -4862,22 +5040,29 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE])) return -EINVAL; - table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, - genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); return PTR_ERR(table); + } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (nla[NFTA_OBJ_HANDLE]) - obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE], - objtype, genmask); - else - obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], - objtype, genmask); - if (IS_ERR(obj)) + if (nla[NFTA_OBJ_HANDLE]) { + attr = nla[NFTA_OBJ_HANDLE]; + obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask); + } else { + attr = nla[NFTA_OBJ_NAME]; + obj = nft_obj_lookup(table, attr, objtype, genmask); + } + + if (IS_ERR(obj)) { + NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(obj); - if (obj->use > 0) + } + if (obj->use > 0) { + NL_SET_BAD_ATTR(extack, attr); return -EBUSY; + } nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); @@ -4948,24 +5133,23 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, }; -struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, - const struct nlattr *nla, - u8 genmask) +struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) { struct nft_flowtable *flowtable; - list_for_each_entry(flowtable, &table->flowtables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { if (!nla_strcmp(nla, flowtable->name) && nft_active_genmask(flowtable, genmask)) return flowtable; } return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); +EXPORT_SYMBOL_GPL(nft_flowtable_lookup); static struct nft_flowtable * -nf_tables_flowtable_lookup_byhandle(const struct nft_table *table, - const struct nlattr *nla, u8 genmask) +nft_flowtable_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) { struct nft_flowtable *flowtable; @@ -5064,7 +5248,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, flowtable->ops[i].pf = NFPROTO_NETDEV; flowtable->ops[i].hooknum = hooknum; flowtable->ops[i].priority = priority; - flowtable->ops[i].priv = &flowtable->data.rhashtable; + flowtable->ops[i].priv = &flowtable->data; flowtable->ops[i].hook = flowtable->data.type->hook; flowtable->ops[i].dev = dev_array[i]; flowtable->dev_name[i] = kstrdup(dev_array[i]->name, @@ -5105,23 +5289,6 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family) return ERR_PTR(-ENOENT); } -void nft_flow_table_iterate(struct net *net, - void (*iter)(struct nf_flowtable *flowtable, void *data), - void *data) -{ - struct nft_flowtable *flowtable; - const struct nft_table *table; - - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_for_each_entry(table, &net->nft.tables, list) { - list_for_each_entry(flowtable, &table->flowtables, list) { - iter(&flowtable->data, data); - } - } - nfnl_unlock(NFNL_SUBSYS_NFTABLES); -} -EXPORT_SYMBOL_GPL(nft_flow_table_iterate); - static void nft_unregister_flowtable_net_hooks(struct net *net, struct nft_flowtable *flowtable) { @@ -5155,20 +5322,26 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, !nla[NFTA_FLOWTABLE_HOOK]) return -EINVAL; - table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], - family, genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, + genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); + } - flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], - genmask); + flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); if (IS_ERR(flowtable)) { err = PTR_ERR(flowtable); - if (err != -ENOENT) + if (err != -ENOENT) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return err; + } } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) + if (nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return -EEXIST; + } return 0; } @@ -5195,14 +5368,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, } flowtable->data.type = type; - err = rhashtable_init(&flowtable->data.rhashtable, type->params); + err = type->init(&flowtable->data); if (err < 0) goto err3; err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], flowtable); if (err < 0) - goto err3; + goto err4; for (i = 0; i < flowtable->ops_len; i++) { if (!flowtable->ops[i].dev) @@ -5216,37 +5389,35 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, if (flowtable->ops[i].dev == ft->ops[k].dev && flowtable->ops[i].pf == ft->ops[k].pf) { err = -EBUSY; - goto err4; + goto err5; } } } err = nf_register_net_hook(net, &flowtable->ops[i]); if (err < 0) - goto err4; + goto err5; } err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; - - INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc); - queue_delayed_work(system_power_efficient_wq, - &flowtable->data.gc_work, HZ); + goto err6; list_add_tail_rcu(&flowtable->list, &table->flowtables); table->use++; return 0; -err5: +err6: i = flowtable->ops_len; -err4: +err5: for (k = i - 1; k >= 0; k--) { kfree(flowtable->dev_name[k]); nf_unregister_net_hook(net, &flowtable->ops[k]); } kfree(flowtable->ops); +err4: + flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); err2: @@ -5266,6 +5437,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; + const struct nlattr *attr; struct nft_table *table; struct nft_ctx ctx; @@ -5274,23 +5446,29 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, !nla[NFTA_FLOWTABLE_HANDLE])) return -EINVAL; - table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], - family, genmask); - if (IS_ERR(table)) + table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, + genmask); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); + } - if (nla[NFTA_FLOWTABLE_HANDLE]) - flowtable = nf_tables_flowtable_lookup_byhandle(table, - nla[NFTA_FLOWTABLE_HANDLE], - genmask); - else - flowtable = nf_tables_flowtable_lookup(table, - nla[NFTA_FLOWTABLE_NAME], - genmask); - if (IS_ERR(flowtable)) - return PTR_ERR(flowtable); - if (flowtable->use > 0) + if (nla[NFTA_FLOWTABLE_HANDLE]) { + attr = nla[NFTA_FLOWTABLE_HANDLE]; + flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask); + } else { + attr = nla[NFTA_FLOWTABLE_NAME]; + flowtable = nft_flowtable_lookup(table, attr, genmask); + } + + if (IS_ERR(flowtable)) { + NL_SET_BAD_ATTR(extack, attr); + return PTR_ERR(flowtable); + } + if (flowtable->use > 0) { + NL_SET_BAD_ATTR(extack, attr); return -EBUSY; + } nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); @@ -5421,13 +5599,13 @@ nft_flowtable_filter_alloc(const struct nlattr * const nla[]) { struct nft_flowtable_filter *filter; - filter = kzalloc(sizeof(*filter), GFP_KERNEL); + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); if (!filter) return ERR_PTR(-ENOMEM); if (nla[NFTA_FLOWTABLE_TABLE]) { filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], - GFP_KERNEL); + GFP_ATOMIC); if (!filter->table) { kfree(filter); return ERR_PTR(-ENOMEM); @@ -5436,6 +5614,7 @@ nft_flowtable_filter_alloc(const struct nlattr * const nla[]) return filter; } +/* called with rcu_read_lock held */ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -5454,6 +5633,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, struct netlink_dump_control c = { .dump = nf_tables_dump_flowtable, .done = nf_tables_dump_flowtable_done, + .module = THIS_MODULE, }; if (nla[NFTA_FLOWTABLE_TABLE]) { @@ -5465,23 +5645,23 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, c.data = filter; } - return netlink_dump_start(nlsk, skb, nlh, &c); + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } if (!nla[NFTA_FLOWTABLE_NAME]) return -EINVAL; - table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], - family, genmask); + table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); - flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], - genmask); + flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); if (IS_ERR(flowtable)) return PTR_ERR(flowtable); - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; @@ -5530,11 +5710,9 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { - cancel_delayed_work_sync(&flowtable->data.gc_work); kfree(flowtable->ops); kfree(flowtable->name); flowtable->data.type->free(&flowtable->data); - rhashtable_destroy(&flowtable->data.rhashtable); module_put(flowtable->data.type->owner); } @@ -5647,7 +5825,7 @@ static int nf_tables_getgen(struct net *net, struct sock *nlsk, struct sk_buff *skb2; int err; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) return -ENOMEM; @@ -5669,7 +5847,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_table_policy, }, [NFT_MSG_GETTABLE] = { - .call = nf_tables_gettable, + .call_rcu = nf_tables_gettable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, @@ -5684,7 +5862,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_GETCHAIN] = { - .call = nf_tables_getchain, + .call_rcu = nf_tables_getchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -5699,7 +5877,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_GETRULE] = { - .call = nf_tables_getrule, + .call_rcu = nf_tables_getrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -5714,7 +5892,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_policy, }, [NFT_MSG_GETSET] = { - .call = nf_tables_getset, + .call_rcu = nf_tables_getset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, @@ -5729,7 +5907,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM] = { - .call = nf_tables_getsetelem, + .call_rcu = nf_tables_getsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, @@ -5739,7 +5917,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETGEN] = { - .call = nf_tables_getgen, + .call_rcu = nf_tables_getgen, }, [NFT_MSG_NEWOBJ] = { .call_batch = nf_tables_newobj, @@ -5747,7 +5925,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ] = { - .call = nf_tables_getobj, + .call_rcu = nf_tables_getobj, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, @@ -5757,7 +5935,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj, + .call_rcu = nf_tables_getobj, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, @@ -5767,7 +5945,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_flowtable_policy, }, [NFT_MSG_GETFLOWTABLE] = { - .call = nf_tables_getflowtable, + .call_rcu = nf_tables_getflowtable, .attr_count = NFTA_FLOWTABLE_MAX, .policy = nft_flowtable_policy, }, @@ -5778,6 +5956,27 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { }, }; +static int nf_tables_validate(struct net *net) +{ + struct nft_table *table; + + switch (net->nft.validate_state) { + case NFT_VALIDATE_SKIP: + break; + case NFT_VALIDATE_NEED: + nft_validate_state_update(net, NFT_VALIDATE_DO); + /* fall through */ + case NFT_VALIDATE_DO: + list_for_each_entry(table, &net->nft.tables, list) { + if (nft_table_validate(net, table) < 0) + return -EAGAIN; + } + break; + } + + return 0; +} + static void nft_chain_commit_update(struct nft_trans *trans) { struct nft_base_chain *basechain; @@ -5815,11 +6014,12 @@ static void nft_commit_release(struct nft_trans *trans) nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: - nf_tables_set_elem_destroy(nft_trans_elem_set(trans), + nf_tables_set_elem_destroy(&trans->ctx, + nft_trans_elem_set(trans), nft_trans_elem(trans).priv); break; case NFT_MSG_DELOBJ: - nft_obj_destroy(nft_trans_obj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); @@ -5843,21 +6043,166 @@ static void nf_tables_commit_release(struct net *net) } } +static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) +{ + struct nft_rule *rule; + unsigned int alloc = 0; + int i; + + /* already handled or inactive chain? */ + if (chain->rules_next || !nft_is_active_next(net, chain)) + return 0; + + rule = list_entry(&chain->rules, struct nft_rule, list); + i = 0; + + list_for_each_entry_continue(rule, &chain->rules, list) { + if (nft_is_active_next(net, rule)) + alloc++; + } + + chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc); + if (!chain->rules_next) + return -ENOMEM; + + list_for_each_entry_continue(rule, &chain->rules, list) { + if (nft_is_active_next(net, rule)) + chain->rules_next[i++] = rule; + } + + chain->rules_next[i] = NULL; + return 0; +} + +static void nf_tables_commit_chain_prepare_cancel(struct net *net) +{ + struct nft_trans *trans, *next; + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + struct nft_chain *chain = trans->ctx.chain; + + if (trans->msg_type == NFT_MSG_NEWRULE || + trans->msg_type == NFT_MSG_DELRULE) { + kvfree(chain->rules_next); + chain->rules_next = NULL; + } + } +} + +static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h) +{ + struct nft_rules_old *o = container_of(h, struct nft_rules_old, h); + + kvfree(o->start); +} + +static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules) +{ + struct nft_rule **r = rules; + struct nft_rules_old *old; + + while (*r) + r++; + + r++; /* rcu_head is after end marker */ + old = (void *) r; + old->start = rules; + + call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); +} + +static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain) +{ + struct nft_rule **g0, **g1; + bool next_genbit; + + next_genbit = nft_gencursor_next(net); + + g0 = rcu_dereference_protected(chain->rules_gen_0, + lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); + g1 = rcu_dereference_protected(chain->rules_gen_1, + lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); + + /* No changes to this chain? */ + if (chain->rules_next == NULL) { + /* chain had no change in last or next generation */ + if (g0 == g1) + return; + /* + * chain had no change in this generation; make sure next + * one uses same rules as current generation. + */ + if (next_genbit) { + rcu_assign_pointer(chain->rules_gen_1, g0); + nf_tables_commit_chain_free_rules_old(g1); + } else { + rcu_assign_pointer(chain->rules_gen_0, g1); + nf_tables_commit_chain_free_rules_old(g0); + } + + return; + } + + if (next_genbit) + rcu_assign_pointer(chain->rules_gen_1, chain->rules_next); + else + rcu_assign_pointer(chain->rules_gen_0, chain->rules_next); + + chain->rules_next = NULL; + + if (g0 == g1) + return; + + if (next_genbit) + nf_tables_commit_chain_free_rules_old(g1); + else + nf_tables_commit_chain_free_rules_old(g0); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; struct nft_trans_elem *te; + struct nft_chain *chain; + struct nft_table *table; - /* Bump generation counter, invalidate any dump in progress */ - while (++net->nft.base_seq == 0); + /* 0. Validate ruleset, otherwise roll back for error reporting. */ + if (nf_tables_validate(net) < 0) + return -EAGAIN; - /* A new generation has just started */ - net->nft.gencursor = nft_gencursor_next(net); + /* 1. Allocate space for next generation rules_gen_X[] */ + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + int ret; + + if (trans->msg_type == NFT_MSG_NEWRULE || + trans->msg_type == NFT_MSG_DELRULE) { + chain = trans->ctx.chain; + + ret = nf_tables_commit_chain_prepare(net, chain); + if (ret < 0) { + nf_tables_commit_chain_prepare_cancel(net); + return ret; + } + } + } + + /* step 2. Make rules_gen_X visible to packet path */ + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; + nf_tables_commit_chain_active(net, chain); + } + } - /* Make sure all packets have left the previous generation before - * purging old rules. + /* + * Bump generation counter, invalidate any dump in progress. + * Cannot fail after this point. */ - synchronize_rcu(); + while (++net->nft.base_seq == 0); + + /* step 3. Start new generation, rules_gen_X now in use. */ + net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -5999,7 +6344,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nft_trans_elem(trans).priv, true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(nft_trans_obj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); @@ -6119,6 +6464,11 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) return 0; } +static void nf_tables_cleanup(struct net *net) +{ + nft_validate_state_update(net, NFT_VALIDATE_SKIP); +} + static bool nf_tables_valid_genid(struct net *net, u32 genid) { return net->nft.base_seq == genid; @@ -6131,6 +6481,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, + .cleanup = nf_tables_cleanup, .valid_genid = nf_tables_valid_genid, }; @@ -6214,19 +6565,18 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, list_for_each_entry(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - const struct nft_data *data = NULL; + struct nft_immediate_expr *priv; + const struct nft_data *data; int err; - if (!expr->ops->validate) + if (strcmp(expr->ops->type->name, "immediate")) continue; - err = expr->ops->validate(ctx, expr, &data); - if (err < 0) - return err; - - if (data == NULL) + priv = nft_expr_priv(expr); + if (priv->dreg != NFT_REG_VERDICT) continue; + data = &priv->data; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: @@ -6459,8 +6809,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, case NFT_GOTO: if (!tb[NFTA_VERDICT_CHAIN]) return -EINVAL; - chain = nf_tables_chain_lookup(ctx->table, - tb[NFTA_VERDICT_CHAIN], genmask); + chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN], + genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (nft_is_base_chain(chain)) @@ -6688,7 +7038,7 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(obj, ne, &table->objects, list) { list_del(&obj->list); table->use--; - nft_obj_destroy(obj); + nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { ctx.chain = chain; @@ -6706,6 +7056,8 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); net->nft.base_seq = 1; + net->nft.validate_state = NFT_VALIDATE_SKIP; + return 0; }