Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

author Jakub Kicinski <kuba@kernel.org>

Thu, 18 Aug 2022 03:17:44 +0000 (20:17 -0700)

committer Jakub Kicinski <kuba@kernel.org>

Thu, 18 Aug 2022 03:17:45 +0000 (20:17 -0700)
author Jakub Kicinski <kuba@kernel.org>
Thu, 18 Aug 2022 03:17:44 +0000 (20:17 -0700)
committer Jakub Kicinski <kuba@kernel.org>
Thu, 18 Aug 2022 03:17:45 +0000 (20:17 -0700)
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h

index 0677cd3..c396a38 100644 (file)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -95,7 +95,7 @@ struct nf_ip_net {
  
  struct netns_ct {
  #ifdef CONFIG_NF_CONNTRACK_EVENTS
-       bool ctnetlink_has_listener;
+       u8 ctnetlink_has_listener;
         bool ecache_dwork_pending;
  #endif
         u8                      sysctl_log_invalid; /* Log invalid packets */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig

index 22f15eb..4b8d046 100644 (file)
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -144,7 +144,6 @@ config NF_CONNTRACK_ZONES
  
  config NF_CONNTRACK_PROCFS
         bool "Supply CT list in procfs (OBSOLETE)"
-       default y
         depends on PROC_FS
         help
         This option enables for the list of known conntrack entries
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c

index a414274..0d9332e 100644 (file)
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -34,11 +34,6 @@ MODULE_DESCRIPTION("ftp connection tracking helper");
  MODULE_ALIAS("ip_conntrack_ftp");
  MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
  
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-
-static DEFINE_SPINLOCK(nf_ftp_lock);
-
  #define MAX_PORTS 8
  static u_int16_t ports[MAX_PORTS];
  static unsigned int ports_c;
@@ -398,6 +393,9 @@ static int help(struct sk_buff *skb,
                 return NF_ACCEPT;
         }
  
+       if (unlikely(skb_linearize(skb)))
+               return NF_DROP;
+
         th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
         if (th == NULL)
                 return NF_ACCEPT;
@@ -411,12 +409,8 @@ static int help(struct sk_buff *skb,
         }
         datalen = skb->len - dataoff;
  
-       spin_lock_bh(&nf_ftp_lock);
-       fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
-       if (!fb_ptr) {
-               spin_unlock_bh(&nf_ftp_lock);
-               return NF_ACCEPT;
-       }
+       spin_lock_bh(&ct->lock);
+       fb_ptr = skb->data + dataoff;
  
         ends_in_nl = (fb_ptr[datalen - 1] == '\n');
         seq = ntohl(th->seq) + datalen;
@@ -544,7 +538,7 @@ out_update_nl:
         if (ends_in_nl)
                 update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
   out:
-       spin_unlock_bh(&nf_ftp_lock);
+       spin_unlock_bh(&ct->lock);
         return ret;
  }
  
@@ -571,7 +565,6 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
  static void __exit nf_conntrack_ftp_fini(void)
  {
         nf_conntrack_helpers_unregister(ftp, ports_c * 2);
-       kfree(ftp_buffer);
  }
  
  static int __init nf_conntrack_ftp_init(void)
@@ -580,10 +573,6 @@ static int __init nf_conntrack_ftp_init(void)
  
         NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master));
  
-       ftp_buffer = kmalloc(65536, GFP_KERNEL);
-       if (!ftp_buffer)
-               return -ENOMEM;
-
         if (ports_c == 0)
                 ports[ports_c++] = FTP_PORT;
  
@@ -603,7 +592,6 @@ static int __init nf_conntrack_ftp_init(void)
         ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
         if (ret < 0) {
                 pr_err("failed to register helpers\n");
-               kfree(ftp_buffer);
                 return ret;
         }
  
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c

index bb76305..5a9bce2 100644 (file)
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -34,6 +34,8 @@
  #include <net/netfilter/nf_conntrack_zones.h>
  #include <linux/netfilter/nf_conntrack_h323.h>
  
+#define H323_MAX_SIZE 65535
+
  /* Parameters */
  static unsigned int default_rrq_ttl __read_mostly = 300;
  module_param(default_rrq_ttl, uint, 0600);
@@ -86,6 +88,9 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
         if (tcpdatalen <= 0)    /* No TCP data */
                 goto clear_out;
  
+       if (tcpdatalen > H323_MAX_SIZE)
+               tcpdatalen = H323_MAX_SIZE;
+
         if (*data == NULL) {    /* first TPKT */
                 /* Get first TPKT pointer */
                 tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
@@ -1169,6 +1174,9 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
         if (dataoff >= skb->len)
                 return NULL;
         *datalen = skb->len - dataoff;
+       if (*datalen > H323_MAX_SIZE)
+               *datalen = H323_MAX_SIZE;
+
         return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
  }
  
@@ -1770,7 +1778,7 @@ static int __init nf_conntrack_h323_init(void)
  
         NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master));
  
-       h323_buffer = kmalloc(65536, GFP_KERNEL);
+       h323_buffer = kmalloc(H323_MAX_SIZE + 1, GFP_KERNEL);
         if (!h323_buffer)
                 return -ENOMEM;
         ret = h323_helper_init();
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c

index 08ee4e7..1796c45 100644 (file)
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -39,6 +39,7 @@ unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
  EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
  
  #define HELPER_NAME "irc"
+#define MAX_SEARCH_SIZE        4095
  
  MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
  MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
@@ -121,6 +122,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
         int i, ret = NF_ACCEPT;
         char *addr_beg_p, *addr_end_p;
         typeof(nf_nat_irc_hook) nf_nat_irc;
+       unsigned int datalen;
  
         /* If packet is coming from IRC server */
         if (dir == IP_CT_DIR_REPLY)
@@ -140,8 +142,12 @@ static int help(struct sk_buff *skb, unsigned int protoff,
         if (dataoff >= skb->len)
                 return NF_ACCEPT;
  
+       datalen = skb->len - dataoff;
+       if (datalen > MAX_SEARCH_SIZE)
+               datalen = MAX_SEARCH_SIZE;
+
         spin_lock_bh(&irc_buffer_lock);
-       ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
+       ib_ptr = skb_header_pointer(skb, dataoff, datalen,
                                     irc_buffer);
         if (!ib_ptr) {
                 spin_unlock_bh(&irc_buffer_lock);
@@ -149,7 +155,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
         }
  
         data = ib_ptr;
-       data_limit = ib_ptr + skb->len - dataoff;
+       data_limit = ib_ptr + datalen;
  
         /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
          * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -251,7 +257,7 @@ static int __init nf_conntrack_irc_init(void)
         irc_exp_policy.max_expected = max_dcc_channels;
         irc_exp_policy.timeout = dcc_timeout;
  
-       irc_buffer = kmalloc(65536, GFP_KERNEL);
+       irc_buffer = kmalloc(MAX_SEARCH_SIZE + 1, GFP_KERNEL);
         if (!irc_buffer)
                 return -ENOMEM;
  
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c

index fcb33b1..13dc421 100644 (file)
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -34,10 +34,6 @@ MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
  MODULE_DESCRIPTION("SANE connection tracking helper");
  MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
  
-static char *sane_buffer;
-
-static DEFINE_SPINLOCK(nf_sane_lock);
-
  #define MAX_PORTS 8
  static u_int16_t ports[MAX_PORTS];
  static unsigned int ports_c;
@@ -67,14 +63,16 @@ static int help(struct sk_buff *skb,
         unsigned int dataoff, datalen;
         const struct tcphdr *th;
         struct tcphdr _tcph;
-       void *sb_ptr;
         int ret = NF_ACCEPT;
         int dir = CTINFO2DIR(ctinfo);
         struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct);
         struct nf_conntrack_expect *exp;
         struct nf_conntrack_tuple *tuple;
-       struct sane_request *req;
         struct sane_reply_net_start *reply;
+       union {
+               struct sane_request req;
+               struct sane_reply_net_start repl;
+       } buf;
  
         /* Until there's been traffic both ways, don't look in packets. */
         if (ctinfo != IP_CT_ESTABLISHED &&
@@ -92,59 +90,62 @@ static int help(struct sk_buff *skb,
                 return NF_ACCEPT;
  
         datalen = skb->len - dataoff;
-
-       spin_lock_bh(&nf_sane_lock);
-       sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
-       if (!sb_ptr) {
-               spin_unlock_bh(&nf_sane_lock);
-               return NF_ACCEPT;
-       }
-
         if (dir == IP_CT_DIR_ORIGINAL) {
+               const struct sane_request *req;
+
                 if (datalen != sizeof(struct sane_request))
-                       goto out;
+                       return NF_ACCEPT;
+
+               req = skb_header_pointer(skb, dataoff, datalen, &buf.req);
+               if (!req)
+                       return NF_ACCEPT;
  
-               req = sb_ptr;
                 if (req->RPC_code != htonl(SANE_NET_START)) {
                         /* Not an interesting command */
-                       ct_sane_info->state = SANE_STATE_NORMAL;
-                       goto out;
+                       WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
+                       return NF_ACCEPT;
                 }
  
                 /* We're interested in the next reply */
-               ct_sane_info->state = SANE_STATE_START_REQUESTED;
-               goto out;
+               WRITE_ONCE(ct_sane_info->state, SANE_STATE_START_REQUESTED);
+               return NF_ACCEPT;
         }
  
+       /* IP_CT_DIR_REPLY */
+
         /* Is it a reply to an uninteresting command? */
-       if (ct_sane_info->state != SANE_STATE_START_REQUESTED)
-               goto out;
+       if (READ_ONCE(ct_sane_info->state) != SANE_STATE_START_REQUESTED)
+               return NF_ACCEPT;
  
         /* It's a reply to SANE_NET_START. */
-       ct_sane_info->state = SANE_STATE_NORMAL;
+       WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
  
         if (datalen < sizeof(struct sane_reply_net_start)) {
                 pr_debug("NET_START reply too short\n");
-               goto out;
+               return NF_ACCEPT;
         }
  
-       reply = sb_ptr;
+       datalen = sizeof(struct sane_reply_net_start);
+
+       reply = skb_header_pointer(skb, dataoff, datalen, &buf.repl);
+       if (!reply)
+               return NF_ACCEPT;
+
         if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
                 /* saned refused the command */
                 pr_debug("unsuccessful SANE_STATUS = %u\n",
                          ntohl(reply->status));
-               goto out;
+               return NF_ACCEPT;
         }
  
         /* Invalid saned reply? Ignore it. */
         if (reply->zero != 0)
-               goto out;
+               return NF_ACCEPT;
  
         exp = nf_ct_expect_alloc(ct);
         if (exp == NULL) {
                 nf_ct_helper_log(skb, ct, "cannot alloc expectation");
-               ret = NF_DROP;
-               goto out;
+               return NF_DROP;
         }
  
         tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
@@ -162,9 +163,6 @@ static int help(struct sk_buff *skb,
         }
  
         nf_ct_expect_put(exp);
-
-out:
-       spin_unlock_bh(&nf_sane_lock);
         return ret;
  }
  
@@ -178,7 +176,6 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
  static void __exit nf_conntrack_sane_fini(void)
  {
         nf_conntrack_helpers_unregister(sane, ports_c * 2);
-       kfree(sane_buffer);
  }
  
  static int __init nf_conntrack_sane_init(void)
@@ -187,10 +184,6 @@ static int __init nf_conntrack_sane_init(void)
  
         NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master));
  
-       sane_buffer = kmalloc(65536, GFP_KERNEL);
-       if (!sane_buffer)
-               return -ENOMEM;
-
         if (ports_c == 0)
                 ports[ports_c++] = SANE_PORT;
  
@@ -210,7 +203,6 @@ static int __init nf_conntrack_sane_init(void)
         ret = nf_conntrack_helpers_register(sane, ports_c * 2);
         if (ret < 0) {
                 pr_err("failed to register helpers\n");
-               kfree(sane_buffer);
                 return ret;
         }
  
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index 3cc8899..62cfb0e 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -889,7 +889,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
-       cb->seq = nft_net->base_seq;
+       cb->seq = READ_ONCE(nft_net->base_seq);
  
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (family != NFPROTO_UNSPEC && family != table->family)
@@ -1705,7 +1705,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
-       cb->seq = nft_net->base_seq;
+       cb->seq = READ_ONCE(nft_net->base_seq);
  
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (family != NFPROTO_UNSPEC && family != table->family)
@@ -3149,7 +3149,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
-       cb->seq = nft_net->base_seq;
+       cb->seq = READ_ONCE(nft_net->base_seq);
  
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (family != NFPROTO_UNSPEC && family != table->family)
@@ -3907,7 +3907,7 @@ cont:
                 list_for_each_entry(i, &ctx->table->sets, list) {
                         int tmp;
  
-                       if (!nft_is_active_next(ctx->net, set))
+                       if (!nft_is_active_next(ctx->net, i))
                                 continue;
                         if (!sscanf(i->name, name, &tmp))
                                 continue;
@@ -4133,7 +4133,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
-       cb->seq = nft_net->base_seq;
+       cb->seq = READ_ONCE(nft_net->base_seq);
  
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (ctx->family != NFPROTO_UNSPEC &&
@@ -4451,6 +4451,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
                 err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
                 if (err < 0)
                         return err;
+
+               if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
+                       return -EINVAL;
+       } else if (flags & NFT_SET_CONCAT) {
+               return -EINVAL;
         }
  
         if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
@@ -5061,6 +5066,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
+       cb->seq = READ_ONCE(nft_net->base_seq);
+
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
                     dump_ctx->ctx.family != table->family)
@@ -5196,6 +5203,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
         if (!(set->flags & NFT_SET_INTERVAL) &&
             *flags & NFT_SET_ELEM_INTERVAL_END)
                 return -EINVAL;
+       if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) ==
+           (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
+               return -EINVAL;
  
         return 0;
  }
@@ -5599,7 +5609,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
  
                 err = nft_expr_clone(expr, set->exprs[i]);
                 if (err < 0) {
-                       nft_expr_destroy(ctx, expr);
+                       kfree(expr);
                         goto err_expr;
                 }
                 expr_array[i] = expr;
@@ -5842,6 +5852,24 @@ static void nft_setelem_remove(const struct net *net,
                 set->ops->remove(net, set, elem);
  }
  
+static bool nft_setelem_valid_key_end(const struct nft_set *set,
+                                     struct nlattr **nla, u32 flags)
+{
+       if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) ==
+                         (NFT_SET_CONCAT | NFT_SET_INTERVAL)) {
+               if (flags & NFT_SET_ELEM_INTERVAL_END)
+                       return false;
+               if (!nla[NFTA_SET_ELEM_KEY_END] &&
+                   !(flags & NFT_SET_ELEM_CATCHALL))
+                       return false;
+       } else {
+               if (nla[NFTA_SET_ELEM_KEY_END])
+                       return false;
+       }
+
+       return true;
+}
+
  static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                             const struct nlattr *attr, u32 nlmsg_flags)
  {
@@ -5892,6 +5920,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                         return -EINVAL;
         }
  
+       if (set->flags & NFT_SET_OBJECT) {
+               if (!nla[NFTA_SET_ELEM_OBJREF] &&
+                   !(flags & NFT_SET_ELEM_INTERVAL_END))
+                       return -EINVAL;
+       } else {
+               if (nla[NFTA_SET_ELEM_OBJREF])
+                       return -EINVAL;
+       }
+
+       if (!nft_setelem_valid_key_end(set, nla, flags))
+               return -EINVAL;
+
         if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
              (nla[NFTA_SET_ELEM_DATA] ||
               nla[NFTA_SET_ELEM_OBJREF] ||
@@ -5899,6 +5939,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
               nla[NFTA_SET_ELEM_EXPIRATION] ||
               nla[NFTA_SET_ELEM_USERDATA] ||
               nla[NFTA_SET_ELEM_EXPR] ||
+             nla[NFTA_SET_ELEM_KEY_END] ||
               nla[NFTA_SET_ELEM_EXPRESSIONS]))
                 return -EINVAL;
  
@@ -6029,10 +6070,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
         }
  
         if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
-               if (!(set->flags & NFT_SET_OBJECT)) {
-                       err = -EINVAL;
-                       goto err_parse_key_end;
-               }
                 obj = nft_obj_lookup(ctx->net, ctx->table,
                                      nla[NFTA_SET_ELEM_OBJREF],
                                      set->objtype, genmask);
@@ -6325,6 +6362,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
         if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
                 return -EINVAL;
  
+       if (!nft_setelem_valid_key_end(set, nla, flags))
+               return -EINVAL;
+
         nft_set_ext_prepare(&tmpl);
  
         if (flags != 0) {
@@ -6941,7 +6981,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
-       cb->seq = nft_net->base_seq;
+       cb->seq = READ_ONCE(nft_net->base_seq);
  
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (family != NFPROTO_UNSPEC && family != table->family)
@@ -7873,7 +7913,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
  
         rcu_read_lock();
         nft_net = nft_pernet(net);
-       cb->seq = nft_net->base_seq;
+       cb->seq = READ_ONCE(nft_net->base_seq);
  
         list_for_each_entry_rcu(table, &nft_net->tables, list) {
                 if (family != NFPROTO_UNSPEC && family != table->family)
@@ -8806,6 +8846,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
         struct nft_trans_elem *te;
         struct nft_chain *chain;
         struct nft_table *table;
+       unsigned int base_seq;
         LIST_HEAD(adl);
         int err;
  
@@ -8855,9 +8896,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
          * Bump generation counter, invalidate any dump in progress.
          * Cannot fail after this point.
          */
-       while (++nft_net->base_seq == 0)
+       base_seq = READ_ONCE(nft_net->base_seq);
+       while (++base_seq == 0)
                 ;
  
+       WRITE_ONCE(nft_net->base_seq, base_seq);
+
         /* step 3. Start new generation, rules_gen_X now in use. */
         net->nft.gencursor = nft_gencursor_next(net);
  
@@ -9419,13 +9463,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
                                 break;
                         }
                 }
-
-               cond_resched();
         }
  
         list_for_each_entry(set, &ctx->table->sets, list) {
-               cond_resched();
-
                 if (!nft_is_active_next(ctx->net, set))
                         continue;
                 if (!(set->flags & NFT_SET_MAP) ||
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c

index c24b124..9c44518 100644 (file)
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
  
  static unsigned int nfnetlink_pernet_id __read_mostly;
  
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static DEFINE_SPINLOCK(nfnl_grp_active_lock);
+#endif
+
  struct nfnl_net {
         struct sock *nfnl;
  };
@@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb)
                 netlink_rcv_skb(skb, nfnetlink_rcv_msg);
  }
  
+static void nfnetlink_bind_event(struct net *net, unsigned int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+       int type, group_bit;
+       u8 v;
+
+       /* All NFNLGRP_CONNTRACK_* group bits fit into u8.
+        * The other groups are not relevant and can be ignored.
+        */
+       if (group >= 8)
+               return;
+
+       type = nfnl_group2type[group];
+
+       switch (type) {
+       case NFNL_SUBSYS_CTNETLINK:
+               break;
+       case NFNL_SUBSYS_CTNETLINK_EXP:
+               break;
+       default:
+               return;
+       }
+
+       group_bit = (1 << group);
+
+       spin_lock(&nfnl_grp_active_lock);
+       v = READ_ONCE(net->ct.ctnetlink_has_listener);
+       if ((v & group_bit) == 0) {
+               v |= group_bit;
+
+               /* read concurrently without nfnl_grp_active_lock held. */
+               WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
+       }
+
+       spin_unlock(&nfnl_grp_active_lock);
+#endif
+}
+
  static int nfnetlink_bind(struct net *net, int group)
  {
         const struct nfnetlink_subsystem *ss;
@@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group)
         if (!ss)
                 request_module_nowait("nfnetlink-subsys-%d", type);
  
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-       if (type == NFNL_SUBSYS_CTNETLINK) {
-               nfnl_lock(NFNL_SUBSYS_CTNETLINK);
-               WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
-               nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
-       }
-#endif
+       nfnetlink_bind_event(net, group);
         return 0;
  }
  
  static void nfnetlink_unbind(struct net *net, int group)
  {
  #ifdef CONFIG_NF_CONNTRACK_EVENTS
+       int type, group_bit;
+
         if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
                 return;
  
-       if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) {
-               nfnl_lock(NFNL_SUBSYS_CTNETLINK);
-               if (!nfnetlink_has_listeners(net, group))
-                       WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
-               nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+       type = nfnl_group2type[group];
+
+       switch (type) {
+       case NFNL_SUBSYS_CTNETLINK:
+               break;
+       case NFNL_SUBSYS_CTNETLINK_EXP:
+               break;
+       default:
+               return;
+       }
+
+       /* ctnetlink_has_listener is u8 */
+       if (group >= 8)
+               return;
+
+       group_bit = (1 << group);
+
+       spin_lock(&nfnl_grp_active_lock);
+       if (!nfnetlink_has_listeners(net, group)) {
+               u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
+
+               v &= ~group_bit;
+
+               /* read concurrently without nfnl_grp_active_lock held. */
+               WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
         }
+       spin_unlock(&nfnl_grp_active_lock);
  #endif
  }
  
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh

index d4ffebb..7060bae 100755 (executable)
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -14,13 +14,17 @@
  # nft_flowtable.sh -o8000 -l1500 -r2000
  #
  
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsr1="nsr1-$sfx"
+nsr2="nsr2-$sfx"
  
  # Kselftest framework requirement - SKIP code is 4.
  ksft_skip=4
  ret=0
  
-ns1in=""
-ns2in=""
+nsin=""
  ns1out=""
  ns2out=""
  
@@ -36,21 +40,19 @@ checktool (){
  checktool "nft --version" "run test without nft tool"
  checktool "ip -Version" "run test without ip tool"
  checktool "which nc" "run test without nc (netcat)"
-checktool "ip netns add nsr1" "create net namespace"
+checktool "ip netns add $nsr1" "create net namespace $nsr1"
  
-ip netns add ns1
-ip netns add ns2
-
-ip netns add nsr2
+ip netns add $ns1
+ip netns add $ns2
+ip netns add $nsr2
  
  cleanup() {
-       for i in 1 2; do
-               ip netns del ns$i
-               ip netns del nsr$i
-       done
+       ip netns del $ns1
+       ip netns del $ns2
+       ip netns del $nsr1
+       ip netns del $nsr2
  
-       rm -f "$ns1in" "$ns1out"
-       rm -f "$ns2in" "$ns2out"
+       rm -f "$nsin" "$ns1out" "$ns2out"
  
         [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
  }
@@ -59,22 +61,21 @@ trap cleanup EXIT
  
  sysctl -q net.netfilter.nf_log_all_netns=1
  
-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
-ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
+ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
  
-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
+ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
  
  for dev in lo veth0 veth1; do
-  for i in 1 2; do
-    ip -net nsr$i link set $dev up
-  done
+    ip -net $nsr1 link set $dev up
+    ip -net $nsr2 link set $dev up
  done
  
-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
  
-ip -net nsr2 addr add 10.0.2.1/24 dev veth1
-ip -net nsr2 addr add dead:2::1/64 dev veth1
+ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
+ip -net $nsr2 addr add dead:2::1/64 dev veth1
  
  # set different MTUs so we need to push packets coming from ns1 (large MTU)
  # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@@ -106,85 +107,76 @@ do
         esac
  done
  
-if ! ip -net nsr1 link set veth0 mtu $omtu; then
+if ! ip -net $nsr1 link set veth0 mtu $omtu; then
         exit 1
  fi
  
-ip -net ns1 link set eth0 mtu $omtu
+ip -net $ns1 link set eth0 mtu $omtu
  
-if ! ip -net nsr2 link set veth1 mtu $rmtu; then
+if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
         exit 1
  fi
  
-ip -net ns2 link set eth0 mtu $rmtu
+ip -net $ns2 link set eth0 mtu $rmtu
  
  # transfer-net between nsr1 and nsr2.
  # these addresses are not used for connections.
-ip -net nsr1 addr add 192.168.10.1/24 dev veth1
-ip -net nsr1 addr add fee1:2::1/64 dev veth1
-
-ip -net nsr2 addr add 192.168.10.2/24 dev veth0
-ip -net nsr2 addr add fee1:2::2/64 dev veth0
-
-for i in 1 2; do
-  ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
-  ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-
-  ip -net ns$i link set lo up
-  ip -net ns$i link set eth0 up
-  ip -net ns$i addr add 10.0.$i.99/24 dev eth0
-  ip -net ns$i route add default via 10.0.$i.1
-  ip -net ns$i addr add dead:$i::99/64 dev eth0
-  ip -net ns$i route add default via dead:$i::1
-  if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
+ip -net $nsr1 addr add fee1:2::1/64 dev veth1
+
+ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
+ip -net $nsr2 addr add fee1:2::2/64 dev veth0
+
+for i in 0 1; do
+  ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+  ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+done
+
+for ns in $ns1 $ns2;do
+  ip -net $ns link set lo up
+  ip -net $ns link set eth0 up
+
+  if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
         echo "ERROR: Check Originator/Responder values (problem during address addition)"
         exit 1
    fi
-
    # don't set ip DF bit for first two tests
-  ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+  ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
  done
  
-ip -net nsr1 route add default via 192.168.10.2
-ip -net nsr2 route add default via 192.168.10.1
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns2 addr add dead:2::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $ns2 route add default via dead:2::1
+
+ip -net $nsr1 route add default via 192.168.10.2
+ip -net $nsr2 route add default via 192.168.10.1
  
-ip netns exec nsr1 nft -f - <<EOF
+ip netns exec $nsr1 nft -f - <<EOF
  table inet filter {
    flowtable f1 {
       hook ingress priority 0
       devices = { veth0, veth1 }
     }
  
+   counter routed_orig { }
+   counter routed_repl { }
+
     chain forward {
        type filter hook forward priority 0; policy drop;
  
        # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
-      meta oif "veth1" tcp dport 12345 flow offload @f1 counter
-
-      # use packet size to trigger 'should be offloaded by now'.
-      # otherwise, if 'flow offload' expression never offloads, the
-      # test will pass.
-      tcp dport 12345 meta length gt 200 ct mark set 1 counter
+      meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
  
-      # this turns off flow offloading internally, so expect packets again
-      tcp flags fin,rst ct mark set 0 accept
-
-      # this allows large packets from responder, we need this as long
-      # as PMTUd is off.
-      # This rule is deleted for the last test, when we expect PMTUd
-      # to kick in and ensure all packets meet mtu requirements.
-      meta length gt $lmtu accept comment something-to-grep-for
-
-      # next line blocks connection w.o. working offload.
-      # we only do this for reverse dir, because we expect packets to
-      # enter slow path due to MTU mismatch of veth0 and veth1.
-      tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
+      # count packets supposedly offloaded as per direction.
+      ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
  
        ct state established,related accept
  
-      # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
-      meta length lt 200 oif "veth1" tcp dport 12345 counter accept
-
        meta nfproto ipv4 meta l4proto icmp accept
        meta nfproto ipv6 meta l4proto icmpv6 accept
     }
@@ -197,30 +189,30 @@ if [ $? -ne 0 ]; then
  fi
  
  # test basic connectivity
-if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
-  echo "ERROR: ns1 cannot reach ns2" 1>&2
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+  echo "ERROR: $ns1 cannot reach ns2" 1>&2
    exit 1
  fi
  
-if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
-  echo "ERROR: ns2 cannot reach ns1" 1>&2
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
    exit 1
  fi
  
  if [ $ret -eq 0 ];then
-       echo "PASS: netns routing/connectivity: ns1 can reach ns2"
+       echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
  fi
  
-ns1in=$(mktemp)
+nsin=$(mktemp)
  ns1out=$(mktemp)
-ns2in=$(mktemp)
  ns2out=$(mktemp)
  
  make_file()
  {
         name=$1
  
-       SIZE=$((RANDOM % (1024 * 8)))
+       SIZE=$((RANDOM % (1024 * 128)))
+       SIZE=$((SIZE + (1024 * 8)))
         TSIZE=$((SIZE * 1024))
  
         dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
@@ -231,6 +223,38 @@ make_file()
         dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
  }
  
+check_counters()
+{
+       local what=$1
+       local ok=1
+
+       local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
+       local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
+
+       local orig_cnt=${orig#*bytes}
+       local repl_cnt=${repl#*bytes}
+
+       local fs=$(du -sb $nsin)
+       local max_orig=${fs%%/*}
+       local max_repl=$((max_orig/4))
+
+       if [ $orig_cnt -gt $max_orig ];then
+               echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+               ret=1
+               ok=0
+       fi
+
+       if [ $repl_cnt -gt $max_repl ];then
+               echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+               ret=1
+               ok=0
+       fi
+
+       if [ $ok -eq 1 ]; then
+               echo "PASS: $what"
+       fi
+}
+
  check_transfer()
  {
         in=$1
@@ -255,11 +279,11 @@ test_tcp_forwarding_ip()
         local dstport=$4
         local lret=0
  
-       ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
+       ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
         lpid=$!
  
         sleep 1
-       ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
+       ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
         cpid=$!
  
         sleep 3
@@ -274,11 +298,11 @@ test_tcp_forwarding_ip()
  
         wait
  
-       if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
+       if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
                 lret=1
         fi
  
-       if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
+       if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
                 lret=1
         fi
  
@@ -295,41 +319,59 @@ test_tcp_forwarding()
  test_tcp_forwarding_nat()
  {
         local lret
+       local pmtu
  
         test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
         lret=$?
  
+       pmtu=$3
+       what=$4
+
         if [ $lret -eq 0 ] ; then
+               if [ $pmtu -eq 1 ] ;then
+                       check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+               else
+                       echo "PASS: flow offload for ns1/ns2 with masquerade $what"
+               fi
+
                 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
                 lret=$?
+               if [ $pmtu -eq 1 ] ;then
+                       check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+               elif [ $lret -eq 0 ] ; then
+                       echo "PASS: flow offload for ns1/ns2 with dnat $what"
+               fi
         fi
  
         return $lret
  }
  
-make_file "$ns1in"
-make_file "$ns2in"
+make_file "$nsin"
  
  # First test:
  # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-if test_tcp_forwarding ns1 ns2; then
+# Due to MTU mismatch in both directions, all packets (except small packets like pure
+# acks) have to be handled by normal forwarding path.  Therefore, packet counters
+# are not checked.
+if test_tcp_forwarding $ns1 $ns2; then
         echo "PASS: flow offloaded for ns1/ns2"
  else
         echo "FAIL: flow offload for ns1/ns2:" 1>&2
-       ip netns exec nsr1 nft list ruleset
+       ip netns exec $nsr1 nft list ruleset
         ret=1
  fi
  
  # delete default route, i.e. ns2 won't be able to reach ns1 and
  # will depend on ns1 being masqueraded in nsr1.
  # expect ns1 has nsr1 address.
-ip -net ns2 route del default via 10.0.2.1
-ip -net ns2 route del default via dead:2::1
-ip -net ns2 route add 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route del default via 10.0.2.1
+ip -net $ns2 route del default via dead:2::1
+ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
  
  # Second test:
-# Same, but with NAT enabled.
-ip netns exec nsr1 nft -f - <<EOF
+# Same, but with NAT enabled.  Same as in first test: we expect normal forward path
+# to handle most packets.
+ip netns exec $nsr1 nft -f - <<EOF
  table ip nat {
     chain prerouting {
        type nat hook prerouting priority 0; policy accept;
@@ -343,47 +385,45 @@ table ip nat {
  }
  EOF
  
-if test_tcp_forwarding_nat ns1 ns2; then
-       echo "PASS: flow offloaded for ns1/ns2 with NAT"
-else
+if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
         echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
-       ip netns exec nsr1 nft list ruleset
+       ip netns exec $nsr1 nft list ruleset
         ret=1
  fi
  
  # Third test:
-# Same as second test, but with PMTU discovery enabled.
-handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
-
-if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
-       echo "FAIL: Could not delete large-packet accept rule"
-       exit 1
-fi
-
-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-
-if test_tcp_forwarding_nat ns1 ns2; then
-       echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
-else
+# Same as second test, but with PMTU discovery enabled. This
+# means that we expect the fastpath to handle packets as soon
+# as the endpoints adjust the packet size.
+ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+# reset counters.
+# With pmtu in-place we'll also check that nft counters
+# are lower than file size and packets were forwarded via flowtable layer.
+# For earlier tests (large mtus), packets cannot be handled via flowtable
+# (except pure acks and other small packets).
+ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
         echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
-       ip netns exec nsr1 nft list ruleset
+       ip netns exec $nsr1 nft list ruleset
  fi
  
  # Another test:
  # Add bridge interface br0 to Router1, with NAT enabled.
-ip -net nsr1 link add name br0 type bridge
-ip -net nsr1 addr flush dev veth0
-ip -net nsr1 link set up dev veth0
-ip -net nsr1 link set veth0 master br0
-ip -net nsr1 addr add 10.0.1.1/24 dev br0
-ip -net nsr1 addr add dead:1::1/64 dev br0
-ip -net nsr1 link set up dev br0
+ip -net $nsr1 link add name br0 type bridge
+ip -net $nsr1 addr flush dev veth0
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set veth0 master br0
+ip -net $nsr1 addr add 10.0.1.1/24 dev br0
+ip -net $nsr1 addr add dead:1::1/64 dev br0
+ip -net $nsr1 link set up dev br0
  
-ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
  
  # br0 with NAT enabled.
-ip netns exec nsr1 nft -f - <<EOF
+ip netns exec $nsr1 nft -f - <<EOF
  flush table ip nat
  table ip nat {
     chain prerouting {
@@ -398,59 +438,56 @@ table ip nat {
  }
  EOF
  
-if test_tcp_forwarding_nat ns1 ns2; then
-       echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
-else
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
         echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
-       ip netns exec nsr1 nft list ruleset
+       ip netns exec $nsr1 nft list ruleset
         ret=1
  fi
  
+
  # Another test:
  # Add bridge interface br0 to Router1, with NAT and VLAN.
-ip -net nsr1 link set veth0 nomaster
-ip -net nsr1 link set down dev veth0
-ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
-ip -net nsr1 link set up dev veth0
-ip -net nsr1 link set up dev veth0.10
-ip -net nsr1 link set veth0.10 master br0
-
-ip -net ns1 addr flush dev eth0
-ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
-ip -net ns1 link set eth0 up
-ip -net ns1 link set eth0.10 up
-ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
-ip -net ns1 route add default via 10.0.1.1
-ip -net ns1 addr add dead:1::99/64 dev eth0.10
-
-if test_tcp_forwarding_nat ns1 ns2; then
-       echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
-else
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set down dev veth0
+ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set up dev veth0.10
+ip -net $nsr1 link set veth0.10 master br0
+
+ip -net $ns1 addr flush dev eth0
+ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net $ns1 link set eth0 up
+ip -net $ns1 link set eth0.10 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0.10
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
         echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
-       ip netns exec nsr1 nft list ruleset
+       ip netns exec $nsr1 nft list ruleset
         ret=1
  fi
  
  # restore test topology (remove bridge and VLAN)
-ip -net nsr1 link set veth0 nomaster
-ip -net nsr1 link set veth0 down
-ip -net nsr1 link set veth0.10 down
-ip -net nsr1 link delete veth0.10 type vlan
-ip -net nsr1 link delete br0 type bridge
-ip -net ns1 addr flush dev eth0.10
-ip -net ns1 link set eth0.10 down
-ip -net ns1 link set eth0 down
-ip -net ns1 link delete eth0.10 type vlan
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set veth0 down
+ip -net $nsr1 link set veth0.10 down
+ip -net $nsr1 link delete veth0.10 type vlan
+ip -net $nsr1 link delete br0 type bridge
+ip -net $ns1 addr flush dev eth0.10
+ip -net $ns1 link set eth0.10 down
+ip -net $ns1 link set eth0 down
+ip -net $ns1 link delete eth0.10 type vlan
  
  # restore address in ns1 and nsr1
-ip -net ns1 link set eth0 up
-ip -net ns1 addr add 10.0.1.99/24 dev eth0
-ip -net ns1 route add default via 10.0.1.1
-ip -net ns1 addr add dead:1::99/64 dev eth0
-ip -net ns1 route add default via dead:1::1
-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net nsr1 addr add dead:1::1/64 dev veth0
-ip -net nsr1 link set up dev veth0
+ip -net $ns1 link set eth0 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 link set up dev veth0
  
  KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
  KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
@@ -480,23 +517,23 @@ do_esp() {
  
  }
  
-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
  
-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
  
-ip netns exec nsr1 nft delete table ip nat
+ip netns exec $nsr1 nft delete table ip nat
  
  # restore default routes
-ip -net ns2 route del 192.168.10.1 via 10.0.2.1
-ip -net ns2 route add default via 10.0.2.1
-ip -net ns2 route add default via dead:2::1
+ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns2 route add default via dead:2::1
  
-if test_tcp_forwarding ns1 ns2; then
-       echo "PASS: ipsec tunnel mode for ns1/ns2"
+if test_tcp_forwarding $ns1 $ns2; then
+       check_counters "ipsec tunnel mode for ns1/ns2"
  else
         echo "FAIL: ipsec tunnel mode for ns1/ns2"
-       ip netns exec nsr1 nft list ruleset 1>&2
-       ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
+       ip netns exec $nsr1 nft list ruleset 1>&2
+       ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
  fi
  
  exit $ret
author	Jakub Kicinski <kuba@kernel.org>
	Thu, 18 Aug 2022 03:17:44 +0000 (20:17 -0700)
committer	Jakub Kicinski <kuba@kernel.org>
	Thu, 18 Aug 2022 03:17:45 +0000 (20:17 -0700)
include/net/netns/conntrack.h		patch \| blob \| history
net/netfilter/Kconfig		patch \| blob \| history
net/netfilter/nf_conntrack_ftp.c		patch \| blob \| history
net/netfilter/nf_conntrack_h323_main.c		patch \| blob \| history
net/netfilter/nf_conntrack_irc.c		patch \| blob \| history
net/netfilter/nf_conntrack_sane.c		patch \| blob \| history
net/netfilter/nf_tables_api.c		patch \| blob \| history
net/netfilter/nfnetlink.c		patch \| blob \| history
tools/testing/selftests/netfilter/nft_flowtable.sh		patch \| blob \| history