Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
authorJakub Kicinski <kuba@kernel.org>
Sun, 1 Nov 2020 00:28:17 +0000 (17:28 -0700)
committerJakub Kicinski <kuba@kernel.org>
Sun, 1 Nov 2020 00:34:19 +0000 (17:34 -0700)
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Incorrect netlink report logic in flowtable and genID.

2) Add a selftest to check that wireguard passes the right sk
   to ip_route_me_harder, from Jason A. Donenfeld.

3) Pass the actual sk to ip_route_me_harder(), also from Jason.

4) Missing expression validation of updates via nft --check.

5) Update byte and packet counters regardless of whether they
   match, from Stefano Brivio.
====================

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
18 files changed:
include/linux/netfilter/nfnetlink.h
include/linux/netfilter_ipv4.h
include/linux/netfilter_ipv6.h
net/ipv4/netfilter.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/netfilter/nf_reject_ipv4.c
net/ipv6/netfilter.c
net/ipv6/netfilter/ip6table_mangle.c
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/nf_nat_proto.c
net/netfilter/nf_synproxy_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink.c
net/netfilter/nft_chain_route.c
net/netfilter/utils.c
tools/testing/selftests/wireguard/netns.sh
tools/testing/selftests/wireguard/qemu/kernel.config

index 89016d0..f6267e2 100644 (file)
@@ -24,6 +24,12 @@ struct nfnl_callback {
        const u_int16_t attr_count;             /* number of nlattr's */
 };
 
+enum nfnl_abort_action {
+       NFNL_ABORT_NONE         = 0,
+       NFNL_ABORT_AUTOLOAD,
+       NFNL_ABORT_VALIDATE,
+};
+
 struct nfnetlink_subsystem {
        const char *name;
        __u8 subsys_id;                 /* nfnetlink subsystem ID */
@@ -31,7 +37,8 @@ struct nfnetlink_subsystem {
        const struct nfnl_callback *cb; /* callback for individual types */
        struct module *owner;
        int (*commit)(struct net *net, struct sk_buff *skb);
-       int (*abort)(struct net *net, struct sk_buff *skb, bool autoload);
+       int (*abort)(struct net *net, struct sk_buff *skb,
+                    enum nfnl_abort_action action);
        void (*cleanup)(struct net *net);
        bool (*valid_genid)(struct net *net, u32 genid);
 };
index 082e2c4..5b70ca8 100644 (file)
@@ -16,7 +16,7 @@ struct ip_rt_info {
        u_int32_t mark;
 };
 
-int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
+int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
 
 struct nf_queue_entry;
 
index 9b67394..48314ad 100644 (file)
@@ -42,7 +42,7 @@ struct nf_ipv6_ops {
 #if IS_MODULE(CONFIG_IPV6)
        int (*chk_addr)(struct net *net, const struct in6_addr *addr,
                        const struct net_device *dev, int strict);
-       int (*route_me_harder)(struct net *net, struct sk_buff *skb);
+       int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
        int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
                       const struct in6_addr *daddr, unsigned int srcprefs,
                       struct in6_addr *saddr);
@@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
 #endif
 }
 
-int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
+int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
 
-static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
+static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 #if IS_MODULE(CONFIG_IPV6)
        const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
@@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
        if (!v6_ops)
                return -EHOSTUNREACH;
 
-       return v6_ops->route_me_harder(net, skb);
+       return v6_ops->route_me_harder(net, sk, skb);
 #elif IS_BUILTIN(CONFIG_IPV6)
-       return ip6_route_me_harder(net, skb);
+       return ip6_route_me_harder(net, sk, skb);
 #else
        return -EHOSTUNREACH;
 #endif
index a058213..7c84103 100644 (file)
 #include <net/netfilter/nf_queue.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
+int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
 {
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
        struct flowi4 fl4 = {};
        __be32 saddr = iph->saddr;
-       const struct sock *sk = skb_to_full_sk(skb);
-       __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
+       __u8 flags;
        struct net_device *dev = skb_dst(skb)->dev;
        unsigned int hh_len;
 
+       sk = sk_to_full_sk(sk);
+       flags = sk ? inet_sk_flowi_flags(sk) : 0;
+
        if (addr_type == RTN_UNSPEC)
                addr_type = inet_addr_type_dev_table(net, dev, saddr);
        if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
index f703a71..8330795 100644 (file)
@@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
                    iph->daddr != daddr ||
                    skb->mark != mark ||
                    iph->tos != tos) {
-                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+                       err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
                        if (err < 0)
                                ret = NF_DROP_ERR(err);
                }
index 9dcfa4e..93b0773 100644 (file)
@@ -145,7 +145,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
                                   ip4_dst_hoplimit(skb_dst(nskb)));
        nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
 
-       if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
+       if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
                goto free_nskb;
 
        niph = ip_hdr(nskb);
index 6d0e942..ab9a279 100644 (file)
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include "../bridge/br_private.h"
 
-int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
+int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
 {
        const struct ipv6hdr *iph = ipv6_hdr(skb);
-       struct sock *sk = sk_to_full_sk(skb->sk);
+       struct sock *sk = sk_to_full_sk(sk_partial);
        unsigned int hh_len;
        struct dst_entry *dst;
        int strict = (ipv6_addr_type(&iph->daddr) &
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
                if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
                    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
                    skb->mark != rt_info->mark)
-                       return ip6_route_me_harder(entry->state.net, skb);
+                       return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
        }
        return 0;
 }
index 1a27486..cee7480 100644 (file)
@@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
             skb->mark != mark ||
             ipv6_hdr(skb)->hop_limit != hop_limit ||
             flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
-               err = ip6_route_me_harder(state->net, skb);
+               err = ip6_route_me_harder(state->net, state->sk, skb);
                if (err < 0)
                        ret = NF_DROP_ERR(err);
        }
index 6f35832..7cff6e5 100644 (file)
@@ -637,13 +637,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
        if (SET_WITH_COUNTER(set)) {
                struct ip_set_counter *counter = ext_counter(data, set);
 
+               ip_set_update_counter(counter, ext, flags);
+
                if (flags & IPSET_FLAG_MATCH_COUNTERS &&
                    !(ip_set_match_counter(ip_set_get_packets(counter),
                                mext->packets, mext->packets_op) &&
                      ip_set_match_counter(ip_set_get_bytes(counter),
                                mext->bytes, mext->bytes_op)))
                        return false;
-               ip_set_update_counter(counter, ext, flags);
        }
        if (SET_WITH_SKBINFO(set))
                ip_set_get_skbinfo(ext_skbinfo(data, set),
index cc3c275..c0b8215 100644 (file)
@@ -742,12 +742,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
                struct dst_entry *dst = skb_dst(skb);
 
                if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
-                   ip6_route_me_harder(ipvs->net, skb) != 0)
+                   ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
                        return 1;
        } else
 #endif
                if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
-                   ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
+                   ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
                        return 1;
 
        return 0;
index 59151dc..e87b6bd 100644 (file)
@@ -715,7 +715,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 
                if (ct->tuplehash[dir].tuple.dst.u3.ip !=
                    ct->tuplehash[!dir].tuple.src.u3.ip) {
-                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+                       err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
                        if (err < 0)
                                ret = NF_DROP_ERR(err);
                }
@@ -953,7 +953,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 
                if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
                                      &ct->tuplehash[!dir].tuple.src.u3)) {
-                       err = nf_ip6_route_me_harder(state->net, skb);
+                       err = nf_ip6_route_me_harder(state->net, state->sk, skb);
                        if (err < 0)
                                ret = NF_DROP_ERR(err);
                }
index 9cca35d..d7d34a6 100644 (file)
@@ -446,7 +446,7 @@ synproxy_send_tcp(struct net *net,
 
        skb_dst_set_noref(nskb, skb_dst(skb));
        nskb->protocol = htons(ETH_P_IP);
-       if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
+       if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
                goto free_nskb;
 
        if (nfct) {
index 65cb8e3..0f58e98 100644 (file)
@@ -7137,7 +7137,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
                        GFP_KERNEL);
        kfree(buf);
 
-       if (ctx->report &&
+       if (!ctx->report &&
            !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
                return;
 
@@ -7259,7 +7259,7 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
        audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq,
                        AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL);
 
-       if (nlmsg_report(nlh) &&
+       if (!nlmsg_report(nlh) &&
            !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
                return;
 
@@ -8053,12 +8053,16 @@ static void nf_tables_abort_release(struct nft_trans *trans)
        kfree(trans);
 }
 
-static int __nf_tables_abort(struct net *net, bool autoload)
+static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 {
        struct nft_trans *trans, *next;
        struct nft_trans_elem *te;
        struct nft_hook *hook;
 
+       if (action == NFNL_ABORT_VALIDATE &&
+           nf_tables_validate(net) < 0)
+               return -EAGAIN;
+
        list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
                                         list) {
                switch (trans->msg_type) {
@@ -8190,7 +8194,7 @@ static int __nf_tables_abort(struct net *net, bool autoload)
                nf_tables_abort_release(trans);
        }
 
-       if (autoload)
+       if (action == NFNL_ABORT_AUTOLOAD)
                nf_tables_module_autoload(net);
        else
                nf_tables_module_autoload_cleanup(net);
@@ -8203,9 +8207,10 @@ static void nf_tables_cleanup(struct net *net)
        nft_validate_state_update(net, NFT_VALIDATE_SKIP);
 }
 
-static int nf_tables_abort(struct net *net, struct sk_buff *skb, bool autoload)
+static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+                          enum nfnl_abort_action action)
 {
-       int ret = __nf_tables_abort(net, autoload);
+       int ret = __nf_tables_abort(net, action);
 
        mutex_unlock(&net->nft.commit_mutex);
 
@@ -8836,7 +8841,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 {
        mutex_lock(&net->nft.commit_mutex);
        if (!list_empty(&net->nft.commit_list))
-               __nf_tables_abort(net, false);
+               __nf_tables_abort(net, NFNL_ABORT_NONE);
        __nft_release_tables(net);
        mutex_unlock(&net->nft.commit_mutex);
        WARN_ON_ONCE(!list_empty(&net->nft.tables));
index 2daa1f6..d3df66a 100644 (file)
@@ -333,7 +333,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
                return netlink_ack(skb, nlh, -EINVAL, NULL);
 replay:
        status = 0;
-
+replay_abort:
        skb = netlink_skb_clone(oskb, GFP_KERNEL);
        if (!skb)
                return netlink_ack(oskb, nlh, -ENOMEM, NULL);
@@ -499,7 +499,7 @@ ack:
        }
 done:
        if (status & NFNL_BATCH_REPLAY) {
-               ss->abort(net, oskb, true);
+               ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD);
                nfnl_err_reset(&err_list);
                kfree_skb(skb);
                module_put(ss->owner);
@@ -510,11 +510,25 @@ done:
                        status |= NFNL_BATCH_REPLAY;
                        goto done;
                } else if (err) {
-                       ss->abort(net, oskb, false);
+                       ss->abort(net, oskb, NFNL_ABORT_NONE);
                        netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
                }
        } else {
-               ss->abort(net, oskb, false);
+               enum nfnl_abort_action abort_action;
+
+               if (status & NFNL_BATCH_FAILURE)
+                       abort_action = NFNL_ABORT_NONE;
+               else
+                       abort_action = NFNL_ABORT_VALIDATE;
+
+               err = ss->abort(net, oskb, abort_action);
+               if (err == -EAGAIN) {
+                       nfnl_err_reset(&err_list);
+                       kfree_skb(skb);
+                       module_put(ss->owner);
+                       status |= NFNL_BATCH_FAILURE;
+                       goto replay_abort;
+               }
        }
        if (ss->cleanup)
                ss->cleanup(net);
index 8826bbe..edd02cd 100644 (file)
@@ -42,7 +42,7 @@ static unsigned int nf_route_table_hook4(void *priv,
                    iph->daddr != daddr ||
                    skb->mark != mark ||
                    iph->tos != tos) {
-                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+                       err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
                        if (err < 0)
                                ret = NF_DROP_ERR(err);
                }
@@ -92,7 +92,7 @@ static unsigned int nf_route_table_hook6(void *priv,
             skb->mark != mark ||
             ipv6_hdr(skb)->hop_limit != hop_limit ||
             flowlabel != *((u32 *)ipv6_hdr(skb)))) {
-               err = nf_ip6_route_me_harder(state->net, skb);
+               err = nf_ip6_route_me_harder(state->net, state->sk, skb);
                if (err < 0)
                        ret = NF_DROP_ERR(err);
        }
index cedf47a..2182d36 100644 (file)
@@ -191,8 +191,8 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry
                      skb->mark == rt_info->mark &&
                      iph->daddr == rt_info->daddr &&
                      iph->saddr == rt_info->saddr))
-                       return ip_route_me_harder(entry->state.net, skb,
-                                                 RTN_UNSPEC);
+                       return ip_route_me_harder(entry->state.net, entry->state.sk,
+                                                 skb, RTN_UNSPEC);
        }
 #endif
        return 0;
index d77f482..74c69b7 100755 (executable)
@@ -316,6 +316,14 @@ pp sleep 3
 n2 ping -W 1 -c 1 192.168.241.1
 n1 wg set wg0 peer "$pub2" persistent-keepalive 0
 
+# Test that sk_bound_dev_if works
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2
+# What about when the mark changes and the packet must be rerouted?
+n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1
+n1 ping -c 1 -W 1 192.168.241.2 # First the boring case
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case
+n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1
+
 # Test that onion routing works, even when it loops
 n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
 ip1 addr add 192.168.242.1/24 dev wg0
index d531de1..4eecb43 100644 (file)
@@ -18,10 +18,12 @@ CONFIG_NF_NAT=y
 CONFIG_NETFILTER_XTABLES=y
 CONFIG_NETFILTER_XT_NAT=y
 CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MARK=y
 CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_NF_NAT_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_MANGLE=y
 CONFIG_IP_NF_NAT=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTIPLE_TABLES=y