Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

author David S. Miller <davem@davemloft.net>

Mon, 9 May 2016 19:02:58 +0000 (15:02 -0400)

committer David S. Miller <davem@davemloft.net>

Mon, 9 May 2016 19:02:58 +0000 (15:02 -0400)
author David S. Miller <davem@davemloft.net>
Mon, 9 May 2016 19:02:58 +0000 (15:02 -0400)
committer David S. Miller <davem@davemloft.net>
Mon, 9 May 2016 19:02:58 +0000 (15:02 -0400)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h

index 4dd9306..dc4f58a 100644 (file)
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -380,16 +380,16 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
   * allows us to return 0 for single core systems without forcing
   * callers to deal with SMP vs. NONSMP issues.
   */
-static inline u64 xt_percpu_counter_alloc(void)
+static inline unsigned long xt_percpu_counter_alloc(void)
  {
         if (nr_cpu_ids > 1) {
                 void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
                                                     sizeof(struct xt_counters));
  
                 if (res == NULL)
-                       return (u64) -ENOMEM;
+                       return -ENOMEM;
  
-               return (u64) (__force unsigned long) res;
+               return (__force unsigned long) res;
         }
  
         return 0;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h

index a6cc576..af4c10e 100644 (file)
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -731,6 +731,12 @@ struct ip_vs_pe {
         u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
                            bool inverse);
         int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
+       /* create connections for real-server outgoing packets */
+       struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc,
+                                      struct ip_vs_dest *dest,
+                                      struct sk_buff *skb,
+                                      const struct ip_vs_iphdr *iph,
+                                      __be16 dport, __be16 cport);
  };
  
  /* The application module object (a.k.a. app incarnation) */
@@ -874,6 +880,7 @@ struct netns_ipvs {
         /* Service counters */
         atomic_t                ftpsvc_counter;
         atomic_t                nullsvc_counter;
+       atomic_t                conn_out_counter;
  
  #ifdef CONFIG_SYSCTL
         /* 1/rate drop and drop-entry variables */
@@ -1147,6 +1154,12 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
   */
  const char *ip_vs_proto_name(unsigned int proto);
  void ip_vs_init_hash_table(struct list_head *table, int rows);
+struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
+                                     struct ip_vs_dest *dest,
+                                     struct sk_buff *skb,
+                                     const struct ip_vs_iphdr *iph,
+                                     __be16 dport,
+                                     __be16 cport);
  #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
  
  #define IP_VS_APP_TYPE_FTP     1
@@ -1378,6 +1391,10 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol
  bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
                             const union nf_inet_addr *daddr, __be16 dport);
  
+struct ip_vs_dest *
+ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
+                       const union nf_inet_addr *daddr, __be16 dport);
+
  int ip_vs_use_count_inc(void);
  void ip_vs_use_count_dec(void);
  int ip_vs_register_nl_ioctl(void);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h

index fde4068..dd78bea 100644 (file)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -289,8 +289,6 @@ struct kernel_param;
  int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
  extern unsigned int nf_conntrack_htable_size;
  extern unsigned int nf_conntrack_max;
-extern unsigned int nf_conntrack_hash_rnd;
-void init_nf_conntrack_hash_rnd(void);
  
  struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                  const struct nf_conntrack_zone *zone,
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h

index 62e17d1..3e2f332 100644 (file)
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -81,6 +81,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
  
  #define CONNTRACK_LOCKS 1024
  
+extern struct hlist_nulls_head *nf_conntrack_hash;
  extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
  void nf_conntrack_lock(spinlock_t *lock);
  
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h

index dce56f0..5ed33ea 100644 (file)
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -10,6 +10,7 @@
  
  extern unsigned int nf_ct_expect_hsize;
  extern unsigned int nf_ct_expect_max;
+extern struct hlist_head *nf_ct_expect_hash;
  
  struct nf_conntrack_expect {
         /* Conntrack expectation list member */
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h

index 956d8a6..1a5fb36 100644 (file)
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -23,6 +23,9 @@ struct nf_conntrack_l4proto {
         /* L4 Protocol number. */
         u_int8_t l4proto;
  
+       /* Resolve clashes on insertion races. */
+       bool allow_clash;
+
         /* Try to fill in the third arg: dataoff is offset past network protocol
             hdr.  Return true if possible. */
         bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index f6b1daf..0922354 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -303,7 +303,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  struct nft_set {
         struct list_head                list;
         struct list_head                bindings;
-       char                            name[IFNAMSIZ];
+       char                            name[NFT_SET_MAXNAMELEN];
         u32                             ktype;
         u32                             dtype;
         u32                             size;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h

index 723b61c..38b1a80 100644 (file)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -84,7 +84,6 @@ struct netns_ct {
         struct ctl_table_header *event_sysctl_header;
         struct ctl_table_header *helper_sysctl_header;
  #endif
-       char                    *slabname;
         unsigned int            sysctl_log_invalid; /* Log invalid packets */
         int                     sysctl_events;
         int                     sysctl_acct;
@@ -93,11 +92,6 @@ struct netns_ct {
         int                     sysctl_tstamp;
         int                     sysctl_checksum;
  
-       unsigned int            htable_size;
-       seqcount_t              generation;
-       struct kmem_cache       *nf_conntrack_cachep;
-       struct hlist_nulls_head *hash;
-       struct hlist_head       *expect_hash;
         struct ct_pcpu __percpu *pcpu_lists;
         struct ip_conntrack_stat __percpu *stat;
         struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
@@ -107,9 +101,5 @@ struct netns_ct {
         unsigned int            labels_used;
         u8                      label_words;
  #endif
-#ifdef CONFIG_NF_NAT_NEEDED
-       struct hlist_head       *nat_bysource;
-       unsigned int            nat_htable_size;
-#endif
  };
  #endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h

index 6602313..6a4dbe0 100644 (file)
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -3,6 +3,7 @@
  
  #define NFT_TABLE_MAXNAMELEN   32
  #define NFT_CHAIN_MAXNAMELEN   32
+#define NFT_SET_MAXNAMELEN     32
  #define NFT_USERDATA_MAXLEN    256
  
  /**
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c

index 60f5161..2033f92 100644 (file)
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -34,27 +34,6 @@ MODULE_LICENSE("GPL");
  MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
  MODULE_DESCRIPTION("arptables core");
  
-/*#define DEBUG_ARP_TABLES*/
-/*#define DEBUG_ARP_TABLES_USER*/
-
-#ifdef DEBUG_ARP_TABLES
-#define dprintf(format, args...)  pr_debug(format, ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_ARP_TABLES_USER
-#define duprintf(format, args...) pr_debug(format, ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-#ifdef CONFIG_NETFILTER_DEBUG
-#define ARP_NF_ASSERT(x)       WARN_ON(!(x))
-#else
-#define ARP_NF_ASSERT(x)
-#endif
-
  void *arpt_alloc_initial_table(const struct xt_table *info)
  {
         return xt_alloc_initial_table(arpt, ARPT);
@@ -113,36 +92,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
  #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
  
         if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
-                 ARPT_INV_ARPOP)) {
-               dprintf("ARP operation field mismatch.\n");
-               dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
-                       arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
+                 ARPT_INV_ARPOP))
                 return 0;
-       }
  
         if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
-                 ARPT_INV_ARPHRD)) {
-               dprintf("ARP hardware address format mismatch.\n");
-               dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
-                       arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
+                 ARPT_INV_ARPHRD))
                 return 0;
-       }
  
         if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
-                 ARPT_INV_ARPPRO)) {
-               dprintf("ARP protocol address format mismatch.\n");
-               dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
-                       arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
+                 ARPT_INV_ARPPRO))
                 return 0;
-       }
  
         if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
-                 ARPT_INV_ARPHLN)) {
-               dprintf("ARP hardware address length mismatch.\n");
-               dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
-                       arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
+                 ARPT_INV_ARPHLN))
                 return 0;
-       }
  
         src_devaddr = arpptr;
         arpptr += dev->addr_len;
@@ -155,49 +118,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
         if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
                   ARPT_INV_SRCDEVADDR) ||
             FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
-                 ARPT_INV_TGTDEVADDR)) {
-               dprintf("Source or target device address mismatch.\n");
-
+                 ARPT_INV_TGTDEVADDR))
                 return 0;
-       }
  
         if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
                   ARPT_INV_SRCIP) ||
             FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
-                 ARPT_INV_TGTIP)) {
-               dprintf("Source or target IP address mismatch.\n");
-
-               dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
-                       &src_ipaddr,
-                       &arpinfo->smsk.s_addr,
-                       &arpinfo->src.s_addr,
-                       arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
-               dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n",
-                       &tgt_ipaddr,
-                       &arpinfo->tmsk.s_addr,
-                       &arpinfo->tgt.s_addr,
-                       arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
+                 ARPT_INV_TGTIP))
                 return 0;
-       }
  
         /* Look for ifname matches.  */
         ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
  
-       if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
-               dprintf("VIA in mismatch (%s vs %s).%s\n",
-                       indev, arpinfo->iniface,
-                       arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
+       if (FWINV(ret != 0, ARPT_INV_VIA_IN))
                 return 0;
-       }
  
         ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
  
-       if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
-               dprintf("VIA out mismatch (%s vs %s).%s\n",
-                       outdev, arpinfo->outiface,
-                       arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
+       if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
                 return 0;
-       }
  
         return 1;
  #undef FWINV
@@ -205,16 +144,10 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
  
  static inline int arp_checkentry(const struct arpt_arp *arp)
  {
-       if (arp->flags & ~ARPT_F_MASK) {
-               duprintf("Unknown flag bits set: %08X\n",
-                        arp->flags & ~ARPT_F_MASK);
+       if (arp->flags & ~ARPT_F_MASK)
                 return 0;
-       }
-       if (arp->invflags & ~ARPT_INV_MASK) {
-               duprintf("Unknown invflag bits set: %08X\n",
-                        arp->invflags & ~ARPT_INV_MASK);
+       if (arp->invflags & ~ARPT_INV_MASK)
                 return 0;
-       }
  
         return 1;
  }
@@ -406,11 +339,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
                                 = (void *)arpt_get_target_c(e);
                         int visited = e->comefrom & (1 << hook);
  
-                       if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
-                               pr_notice("arptables: loop hook %u pos %u %08X.\n",
-                                      hook, pos, e->comefrom);
+                       if (e->comefrom & (1 << NF_ARP_NUMHOOKS))
                                 return 0;
-                       }
+
                         e->comefrom
                                 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
  
@@ -423,12 +354,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
  
                                 if ((strcmp(t->target.u.user.name,
                                             XT_STANDARD_TARGET) == 0) &&
-                                   t->verdict < -NF_MAX_VERDICT - 1) {
-                                       duprintf("mark_source_chains: bad "
-                                               "negative verdict (%i)\n",
-                                                               t->verdict);
+                                   t->verdict < -NF_MAX_VERDICT - 1)
                                         return 0;
-                               }
  
                                 /* Return: backtrack through the last
                                  * big jump.
@@ -462,8 +389,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
                                            XT_STANDARD_TARGET) == 0 &&
                                     newpos >= 0) {
                                         /* This a jump; chase it. */
-                                       duprintf("Jump rule %u -> %u\n",
-                                                pos, newpos);
                                         e = (struct arpt_entry *)
                                                 (entry0 + newpos);
                                         if (!find_jump_target(newinfo, e))
@@ -480,8 +405,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
                                 pos = newpos;
                         }
                 }
-next:
-               duprintf("Finished chain %u\n", hook);
+next:          ;
         }
         return 1;
  }
@@ -489,7 +413,6 @@ next:
  static inline int check_target(struct arpt_entry *e, const char *name)
  {
         struct xt_entry_target *t = arpt_get_target(e);
-       int ret;
         struct xt_tgchk_param par = {
                 .table     = name,
                 .entryinfo = e,
@@ -499,13 +422,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
                 .family    = NFPROTO_ARP,
         };
  
-       ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
-       if (ret < 0) {
-               duprintf("arp_tables: check failed for `%s'.\n",
-                        t->u.kernel.target->name);
-               return ret;
-       }
-       return 0;
+       return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
  }
  
  static inline int
@@ -513,17 +430,18 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
  {
         struct xt_entry_target *t;
         struct xt_target *target;
+       unsigned long pcnt;
         int ret;
  
-       e->counters.pcnt = xt_percpu_counter_alloc();
-       if (IS_ERR_VALUE(e->counters.pcnt))
+       pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(pcnt))
                 return -ENOMEM;
+       e->counters.pcnt = pcnt;
  
         t = arpt_get_target(e);
         target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
                                         t->u.user.revision);
         if (IS_ERR(target)) {
-               duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
                 ret = PTR_ERR(target);
                 goto out;
         }
@@ -569,17 +487,12 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
  
         if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
             (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
-           (unsigned char *)e + e->next_offset > limit) {
-               duprintf("Bad offset %p\n", e);
+           (unsigned char *)e + e->next_offset > limit)
                 return -EINVAL;
-       }
  
         if (e->next_offset
-           < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
-               duprintf("checking: element %p size %u\n",
-                        e, e->next_offset);
+           < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target))
                 return -EINVAL;
-       }
  
         if (!arp_checkentry(&e->arp))
                 return -EINVAL;
@@ -596,12 +509,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
                 if ((unsigned char *)e - base == hook_entries[h])
                         newinfo->hook_entry[h] = hook_entries[h];
                 if ((unsigned char *)e - base == underflows[h]) {
-                       if (!check_underflow(e)) {
-                               pr_debug("Underflows must be unconditional and "
-                                        "use the STANDARD target with "
-                                        "ACCEPT/DROP\n");
+                       if (!check_underflow(e))
                                 return -EINVAL;
-                       }
+
                         newinfo->underflow[h] = underflows[h];
                 }
         }
@@ -646,7 +556,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
                 newinfo->underflow[i] = 0xFFFFFFFF;
         }
  
-       duprintf("translate_table: size %u\n", newinfo->size);
         i = 0;
  
         /* Walk through entries, checking offsets. */
@@ -663,31 +572,21 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
                     XT_ERROR_TARGET) == 0)
                         ++newinfo->stacksize;
         }
-       duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
         if (ret != 0)
                 return ret;
  
-       if (i != repl->num_entries) {
-               duprintf("translate_table: %u not %u entries\n",
-                        i, repl->num_entries);
+       if (i != repl->num_entries)
                 return -EINVAL;
-       }
  
         /* Check hooks all assigned */
         for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
                 /* Only hooks which are valid */
                 if (!(repl->valid_hooks & (1 << i)))
                         continue;
-               if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
-                       duprintf("Invalid hook entry %u %u\n",
-                                i, repl->hook_entry[i]);
+               if (newinfo->hook_entry[i] == 0xFFFFFFFF)
                         return -EINVAL;
-               }
-               if (newinfo->underflow[i] == 0xFFFFFFFF) {
-                       duprintf("Invalid underflow %u %u\n",
-                                i, repl->underflow[i]);
+               if (newinfo->underflow[i] == 0xFFFFFFFF)
                         return -EINVAL;
-               }
         }
  
         if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -895,11 +794,8 @@ static int get_info(struct net *net, void __user *user,
         struct xt_table *t;
         int ret;
  
-       if (*len != sizeof(struct arpt_getinfo)) {
-               duprintf("length %u != %Zu\n", *len,
-                        sizeof(struct arpt_getinfo));
+       if (*len != sizeof(struct arpt_getinfo))
                 return -EINVAL;
-       }
  
         if (copy_from_user(name, user, sizeof(name)) != 0)
                 return -EFAULT;
@@ -955,33 +851,25 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
         struct arpt_get_entries get;
         struct xt_table *t;
  
-       if (*len < sizeof(get)) {
-               duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+       if (*len < sizeof(get))
                 return -EINVAL;
-       }
         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
                 return -EFAULT;
-       if (*len != sizeof(struct arpt_get_entries) + get.size) {
-               duprintf("get_entries: %u != %Zu\n", *len,
-                        sizeof(struct arpt_get_entries) + get.size);
+       if (*len != sizeof(struct arpt_get_entries) + get.size)
                 return -EINVAL;
-       }
+
         get.name[sizeof(get.name) - 1] = '\0';
  
         t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
         if (!IS_ERR_OR_NULL(t)) {
                 const struct xt_table_info *private = t->private;
  
-               duprintf("t->private->number = %u\n",
-                        private->number);
                 if (get.size == private->size)
                         ret = copy_entries_to_user(private->size,
                                                    t, uptr->entrytable);
-               else {
-                       duprintf("get_entries: I've got %u not %u!\n",
-                                private->size, get.size);
+               else
                         ret = -EAGAIN;
-               }
+
                 module_put(t->me);
                 xt_table_unlock(t);
         } else
@@ -1019,8 +907,6 @@ static int __do_replace(struct net *net, const char *name,
  
         /* You lied! */
         if (valid_hooks != t->valid_hooks) {
-               duprintf("Valid hook crap: %08X vs %08X\n",
-                        valid_hooks, t->valid_hooks);
                 ret = -EINVAL;
                 goto put_module;
         }
@@ -1030,8 +916,6 @@ static int __do_replace(struct net *net, const char *name,
                 goto put_module;
  
         /* Update module usage count based on number of rules */
-       duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
-               oldinfo->number, oldinfo->initial_entries, newinfo->number);
         if ((oldinfo->number > oldinfo->initial_entries) ||
             (newinfo->number <= oldinfo->initial_entries))
                 module_put(t->me);
@@ -1101,8 +985,6 @@ static int do_replace(struct net *net, const void __user *user,
         if (ret != 0)
                 goto free_newinfo;
  
-       duprintf("arp_tables: Translated table\n");
-
         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                            tmp.num_counters, tmp.counters);
         if (ret)
@@ -1200,20 +1082,14 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
         unsigned int entry_offset;
         int ret, off;
  
-       duprintf("check_compat_entry_size_and_hooks %p\n", e);
         if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
             (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
-           (unsigned char *)e + e->next_offset > limit) {
-               duprintf("Bad offset %p, limit = %p\n", e, limit);
+           (unsigned char *)e + e->next_offset > limit)
                 return -EINVAL;
-       }
  
         if (e->next_offset < sizeof(struct compat_arpt_entry) +
-                            sizeof(struct compat_xt_entry_target)) {
-               duprintf("checking: element %p size %u\n",
-                        e, e->next_offset);
+                            sizeof(struct compat_xt_entry_target))
                 return -EINVAL;
-       }
  
         if (!arp_checkentry(&e->arp))
                 return -EINVAL;
@@ -1230,8 +1106,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
         target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
                                         t->u.user.revision);
         if (IS_ERR(target)) {
-               duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
-                        t->u.user.name);
                 ret = PTR_ERR(target);
                 goto out;
         }
@@ -1301,7 +1175,6 @@ static int translate_compat_table(struct xt_table_info **pinfo,
         size = compatr->size;
         info->number = compatr->num_entries;
  
-       duprintf("translate_compat_table: size %u\n", info->size);
         j = 0;
         xt_compat_lock(NFPROTO_ARP);
         xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
@@ -1316,11 +1189,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
         }
  
         ret = -EINVAL;
-       if (j != compatr->num_entries) {
-               duprintf("translate_compat_table: %u not %u entries\n",
-                        j, compatr->num_entries);
+       if (j != compatr->num_entries)
                 goto out_unlock;
-       }
  
         ret = -ENOMEM;
         newinfo = xt_alloc_table_info(size);
@@ -1411,8 +1281,6 @@ static int compat_do_replace(struct net *net, void __user *user,
         if (ret != 0)
                 goto free_newinfo;
  
-       duprintf("compat_do_replace: Translated table\n");
-
         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                            tmp.num_counters, compat_ptr(tmp.counters));
         if (ret)
@@ -1445,7 +1313,6 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
                 break;
  
         default:
-               duprintf("do_arpt_set_ctl:  unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1528,17 +1395,13 @@ static int compat_get_entries(struct net *net,
         struct compat_arpt_get_entries get;
         struct xt_table *t;
  
-       if (*len < sizeof(get)) {
-               duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+       if (*len < sizeof(get))
                 return -EINVAL;
-       }
         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
                 return -EFAULT;
-       if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
-               duprintf("compat_get_entries: %u != %zu\n",
-                        *len, sizeof(get) + get.size);
+       if (*len != sizeof(struct compat_arpt_get_entries) + get.size)
                 return -EINVAL;
-       }
+
         get.name[sizeof(get.name) - 1] = '\0';
  
         xt_compat_lock(NFPROTO_ARP);
@@ -1547,16 +1410,13 @@ static int compat_get_entries(struct net *net,
                 const struct xt_table_info *private = t->private;
                 struct xt_table_info info;
  
-               duprintf("t->private->number = %u\n", private->number);
                 ret = compat_table_info(private, &info);
                 if (!ret && get.size == info.size) {
                         ret = compat_copy_entries_to_user(private->size,
                                                           t, uptr->entrytable);
-               } else if (!ret) {
-                       duprintf("compat_get_entries: I've got %u not %u!\n",
-                                private->size, get.size);
+               } else if (!ret)
                         ret = -EAGAIN;
-               }
+
                 xt_compat_flush_offsets(NFPROTO_ARP);
                 module_put(t->me);
                 xt_table_unlock(t);
@@ -1608,7 +1468,6 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
                 break;
  
         default:
-               duprintf("do_arpt_set_ctl:  unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1651,7 +1510,6 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
         }
  
         default:
-               duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1696,7 +1554,6 @@ int arpt_register_table(struct net *net,
         memcpy(loc_cpu_entry, repl->entries, repl->size);
  
         ret = translate_table(newinfo, loc_cpu_entry, repl);
-       duprintf("arpt_register_table: translate table gives %d\n", ret);
         if (ret != 0)
                 goto out_free;
  
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c

index 735d1ee..54906e0 100644 (file)
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,34 +35,12 @@ MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
  MODULE_DESCRIPTION("IPv4 packet filter");
  
-/*#define DEBUG_IP_FIREWALL*/
-/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
-/*#define DEBUG_IP_FIREWALL_USER*/
-
-#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) pr_info(format , ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) pr_info(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
  #ifdef CONFIG_NETFILTER_DEBUG
  #define IP_NF_ASSERT(x)                WARN_ON(!(x))
  #else
  #define IP_NF_ASSERT(x)
  #endif
  
-#if 0
-/* All the better to debug you with... */
-#define static
-#define inline
-#endif
-
  void *ipt_alloc_initial_table(const struct xt_table *info)
  {
         return xt_alloc_initial_table(ipt, IPT);
@@ -85,52 +63,28 @@ ip_packet_match(const struct iphdr *ip,
         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
                   IPT_INV_SRCIP) ||
             FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
-                 IPT_INV_DSTIP)) {
-               dprintf("Source or dest mismatch.\n");
-
-               dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
-                       &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
-                       ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
-               dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
-                       &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
-                       ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+                 IPT_INV_DSTIP))
                 return false;
-       }
  
         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
  
-       if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
-               dprintf("VIA in mismatch (%s vs %s).%s\n",
-                       indev, ipinfo->iniface,
-                       ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
+       if (FWINV(ret != 0, IPT_INV_VIA_IN))
                 return false;
-       }
  
         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
  
-       if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
-               dprintf("VIA out mismatch (%s vs %s).%s\n",
-                       outdev, ipinfo->outiface,
-                       ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
+       if (FWINV(ret != 0, IPT_INV_VIA_OUT))
                 return false;
-       }
  
         /* Check specific protocol */
         if (ipinfo->proto &&
-           FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
-               dprintf("Packet protocol %hi does not match %hi.%s\n",
-                       ip->protocol, ipinfo->proto,
-                       ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
+           FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
                 return false;
-       }
  
         /* If we have a fragment rule but the packet is not a fragment
          * then we return zero */
-       if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
-               dprintf("Fragment rule but not fragment.%s\n",
-                       ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+       if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
                 return false;
-       }
  
         return true;
  }
@@ -138,16 +92,10 @@ ip_packet_match(const struct iphdr *ip,
  static bool
  ip_checkentry(const struct ipt_ip *ip)
  {
-       if (ip->flags & ~IPT_F_MASK) {
-               duprintf("Unknown flag bits set: %08X\n",
-                        ip->flags & ~IPT_F_MASK);
+       if (ip->flags & ~IPT_F_MASK)
                 return false;
-       }
-       if (ip->invflags & ~IPT_INV_MASK) {
-               duprintf("Unknown invflag bits set: %08X\n",
-                        ip->invflags & ~IPT_INV_MASK);
+       if (ip->invflags & ~IPT_INV_MASK)
                 return false;
-       }
         return true;
  }
  
@@ -346,10 +294,6 @@ ipt_do_table(struct sk_buff *skb,
  
         e = get_entry(table_base, private->hook_entry[hook]);
  
-       pr_debug("Entering %s(hook %u), UF %p\n",
-                table->name, hook,
-                get_entry(table_base, private->underflow[hook]));
-
         do {
                 const struct xt_entry_target *t;
                 const struct xt_entry_match *ematch;
@@ -396,22 +340,15 @@ ipt_do_table(struct sk_buff *skb,
                                 if (stackidx == 0) {
                                         e = get_entry(table_base,
                                             private->underflow[hook]);
-                                       pr_debug("Underflow (this is normal) "
-                                                "to %p\n", e);
                                 } else {
                                         e = jumpstack[--stackidx];
-                                       pr_debug("Pulled %p out from pos %u\n",
-                                                e, stackidx);
                                         e = ipt_next_entry(e);
                                 }
                                 continue;
                         }
                         if (table_base + v != ipt_next_entry(e) &&
-                           !(e->ip.flags & IPT_F_GOTO)) {
+                           !(e->ip.flags & IPT_F_GOTO))
                                 jumpstack[stackidx++] = e;
-                               pr_debug("Pushed %p into pos %u\n",
-                                        e, stackidx - 1);
-                       }
  
                         e = get_entry(table_base, v);
                         continue;
@@ -429,18 +366,13 @@ ipt_do_table(struct sk_buff *skb,
                         /* Verdict */
                         break;
         } while (!acpar.hotdrop);
-       pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
  
         xt_write_recseq_end(addend);
         local_bh_enable();
  
-#ifdef DEBUG_ALLOW_ALL
-       return NF_ACCEPT;
-#else
         if (acpar.hotdrop)
                 return NF_DROP;
         else return verdict;
-#endif
  }
  
  static bool find_jump_target(const struct xt_table_info *t,
@@ -480,11 +412,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                 = (void *)ipt_get_target_c(e);
                         int visited = e->comefrom & (1 << hook);
  
-                       if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
-                               pr_err("iptables: loop hook %u pos %u %08X.\n",
-                                      hook, pos, e->comefrom);
+                       if (e->comefrom & (1 << NF_INET_NUMHOOKS))
                                 return 0;
-                       }
+
                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
  
                         /* Unconditional return/END. */
@@ -496,26 +426,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
  
                                 if ((strcmp(t->target.u.user.name,
                                             XT_STANDARD_TARGET) == 0) &&
-                                   t->verdict < -NF_MAX_VERDICT - 1) {
-                                       duprintf("mark_source_chains: bad "
-                                               "negative verdict (%i)\n",
-                                                               t->verdict);
+                                   t->verdict < -NF_MAX_VERDICT - 1)
                                         return 0;
-                               }
  
                                 /* Return: backtrack through the last
                                    big jump. */
                                 do {
                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
-#ifdef DEBUG_IP_FIREWALL_USER
-                                       if (e->comefrom
-                                           & (1 << NF_INET_NUMHOOKS)) {
-                                               duprintf("Back unset "
-                                                        "on hook %u "
-                                                        "rule %u\n",
-                                                        hook, pos);
-                                       }
-#endif
                                         oldpos = pos;
                                         pos = e->counters.pcnt;
                                         e->counters.pcnt = 0;
@@ -543,8 +460,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                            XT_STANDARD_TARGET) == 0 &&
                                     newpos >= 0) {
                                         /* This a jump; chase it. */
-                                       duprintf("Jump rule %u -> %u\n",
-                                                pos, newpos);
                                         e = (struct ipt_entry *)
                                                 (entry0 + newpos);
                                         if (!find_jump_target(newinfo, e))
@@ -561,8 +476,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                 pos = newpos;
                         }
                 }
-next:
-               duprintf("Finished chain %u\n", hook);
+next:          ;
         }
         return 1;
  }
@@ -584,18 +498,12 @@ static int
  check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
  {
         const struct ipt_ip *ip = par->entryinfo;
-       int ret;
  
         par->match     = m->u.kernel.match;
         par->matchinfo = m->data;
  
-       ret = xt_check_match(par, m->u.match_size - sizeof(*m),
-             ip->proto, ip->invflags & IPT_INV_PROTO);
-       if (ret < 0) {
-               duprintf("check failed for `%s'.\n", par->match->name);
-               return ret;
-       }
-       return 0;
+       return xt_check_match(par, m->u.match_size - sizeof(*m),
+                             ip->proto, ip->invflags & IPT_INV_PROTO);
  }
  
  static int
@@ -606,10 +514,8 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
  
         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
                                       m->u.user.revision);
-       if (IS_ERR(match)) {
-               duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+       if (IS_ERR(match))
                 return PTR_ERR(match);
-       }
         m->u.kernel.match = match;
  
         ret = check_match(m, par);
@@ -634,16 +540,9 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
                 .hook_mask = e->comefrom,
                 .family    = NFPROTO_IPV4,
         };
-       int ret;
  
-       ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
-             e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
-       if (ret < 0) {
-               duprintf("check failed for `%s'.\n",
-                        t->u.kernel.target->name);
-               return ret;
-       }
-       return 0;
+       return xt_check_target(&par, t->u.target_size - sizeof(*t),
+                              e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
  }
  
  static int
@@ -656,10 +555,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
         unsigned int j;
         struct xt_mtchk_param mtpar;
         struct xt_entry_match *ematch;
+       unsigned long pcnt;
  
-       e->counters.pcnt = xt_percpu_counter_alloc();
-       if (IS_ERR_VALUE(e->counters.pcnt))
+       pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(pcnt))
                 return -ENOMEM;
+       e->counters.pcnt = pcnt;
  
         j = 0;
         mtpar.net       = net;
@@ -678,7 +579,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
                                         t->u.user.revision);
         if (IS_ERR(target)) {
-               duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
                 ret = PTR_ERR(target);
                 goto cleanup_matches;
         }
@@ -732,17 +632,12 @@ check_entry_size_and_hooks(struct ipt_entry *e,
  
         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
             (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
-           (unsigned char *)e + e->next_offset > limit) {
-               duprintf("Bad offset %p\n", e);
+           (unsigned char *)e + e->next_offset > limit)
                 return -EINVAL;
-       }
  
         if (e->next_offset
-           < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
-               duprintf("checking: element %p size %u\n",
-                        e, e->next_offset);
+           < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
                 return -EINVAL;
-       }
  
         if (!ip_checkentry(&e->ip))
                 return -EINVAL;
@@ -759,12 +654,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
                 if ((unsigned char *)e - base == hook_entries[h])
                         newinfo->hook_entry[h] = hook_entries[h];
                 if ((unsigned char *)e - base == underflows[h]) {
-                       if (!check_underflow(e)) {
-                               pr_debug("Underflows must be unconditional and "
-                                        "use the STANDARD target with "
-                                        "ACCEPT/DROP\n");
+                       if (!check_underflow(e))
                                 return -EINVAL;
-                       }
+
                         newinfo->underflow[h] = underflows[h];
                 }
         }
@@ -816,7 +708,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                 newinfo->underflow[i] = 0xFFFFFFFF;
         }
  
-       duprintf("translate_table: size %u\n", newinfo->size);
         i = 0;
         /* Walk through entries, checking offsets. */
         xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -833,27 +724,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                         ++newinfo->stacksize;
         }
  
-       if (i != repl->num_entries) {
-               duprintf("translate_table: %u not %u entries\n",
-                        i, repl->num_entries);
+       if (i != repl->num_entries)
                 return -EINVAL;
-       }
  
         /* Check hooks all assigned */
         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
                 /* Only hooks which are valid */
                 if (!(repl->valid_hooks & (1 << i)))
                         continue;
-               if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
-                       duprintf("Invalid hook entry %u %u\n",
-                                i, repl->hook_entry[i]);
+               if (newinfo->hook_entry[i] == 0xFFFFFFFF)
                         return -EINVAL;
-               }
-               if (newinfo->underflow[i] == 0xFFFFFFFF) {
-                       duprintf("Invalid underflow %u %u\n",
-                                i, repl->underflow[i]);
+               if (newinfo->underflow[i] == 0xFFFFFFFF)
                         return -EINVAL;
-               }
         }
  
         if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1081,11 +963,8 @@ static int get_info(struct net *net, void __user *user,
         struct xt_table *t;
         int ret;
  
-       if (*len != sizeof(struct ipt_getinfo)) {
-               duprintf("length %u != %zu\n", *len,
-                        sizeof(struct ipt_getinfo));
+       if (*len != sizeof(struct ipt_getinfo))
                 return -EINVAL;
-       }
  
         if (copy_from_user(name, user, sizeof(name)) != 0)
                 return -EFAULT;
@@ -1143,31 +1022,23 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
         struct ipt_get_entries get;
         struct xt_table *t;
  
-       if (*len < sizeof(get)) {
-               duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+       if (*len < sizeof(get))
                 return -EINVAL;
-       }
         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
                 return -EFAULT;
-       if (*len != sizeof(struct ipt_get_entries) + get.size) {
-               duprintf("get_entries: %u != %zu\n",
-                        *len, sizeof(get) + get.size);
+       if (*len != sizeof(struct ipt_get_entries) + get.size)
                 return -EINVAL;
-       }
         get.name[sizeof(get.name) - 1] = '\0';
  
         t = xt_find_table_lock(net, AF_INET, get.name);
         if (!IS_ERR_OR_NULL(t)) {
                 const struct xt_table_info *private = t->private;
-               duprintf("t->private->number = %u\n", private->number);
                 if (get.size == private->size)
                         ret = copy_entries_to_user(private->size,
                                                    t, uptr->entrytable);
-               else {
-                       duprintf("get_entries: I've got %u not %u!\n",
-                                private->size, get.size);
+               else
                         ret = -EAGAIN;
-               }
+
                 module_put(t->me);
                 xt_table_unlock(t);
         } else
@@ -1203,8 +1074,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
  
         /* You lied! */
         if (valid_hooks != t->valid_hooks) {
-               duprintf("Valid hook crap: %08X vs %08X\n",
-                        valid_hooks, t->valid_hooks);
                 ret = -EINVAL;
                 goto put_module;
         }
@@ -1214,8 +1083,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
                 goto put_module;
  
         /* Update module usage count based on number of rules */
-       duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
-               oldinfo->number, oldinfo->initial_entries, newinfo->number);
         if ((oldinfo->number > oldinfo->initial_entries) ||
             (newinfo->number <= oldinfo->initial_entries))
                 module_put(t->me);
@@ -1284,8 +1151,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
         if (ret != 0)
                 goto free_newinfo;
  
-       duprintf("Translated table\n");
-
         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                            tmp.num_counters, tmp.counters);
         if (ret)
@@ -1411,11 +1276,9 @@ compat_find_calc_match(struct xt_entry_match *m,
  
         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
                                       m->u.user.revision);
-       if (IS_ERR(match)) {
-               duprintf("compat_check_calc_match: `%s' not found\n",
-                        m->u.user.name);
+       if (IS_ERR(match))
                 return PTR_ERR(match);
-       }
+
         m->u.kernel.match = match;
         *size += xt_compat_match_offset(match);
         return 0;
@@ -1447,20 +1310,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
         unsigned int j;
         int ret, off;
  
-       duprintf("check_compat_entry_size_and_hooks %p\n", e);
         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
             (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
-           (unsigned char *)e + e->next_offset > limit) {
-               duprintf("Bad offset %p, limit = %p\n", e, limit);
+           (unsigned char *)e + e->next_offset > limit)
                 return -EINVAL;
-       }
  
         if (e->next_offset < sizeof(struct compat_ipt_entry) +
-                            sizeof(struct compat_xt_entry_target)) {
-               duprintf("checking: element %p size %u\n",
-                        e, e->next_offset);
+                            sizeof(struct compat_xt_entry_target))
                 return -EINVAL;
-       }
  
         if (!ip_checkentry(&e->ip))
                 return -EINVAL;
@@ -1484,8 +1341,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
                                         t->u.user.revision);
         if (IS_ERR(target)) {
-               duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
-                        t->u.user.name);
                 ret = PTR_ERR(target);
                 goto release_matches;
         }
@@ -1567,7 +1422,6 @@ translate_compat_table(struct net *net,
         size = compatr->size;
         info->number = compatr->num_entries;
  
-       duprintf("translate_compat_table: size %u\n", info->size);
         j = 0;
         xt_compat_lock(AF_INET);
         xt_compat_init_offsets(AF_INET, compatr->num_entries);
@@ -1582,11 +1436,8 @@ translate_compat_table(struct net *net,
         }
  
         ret = -EINVAL;
-       if (j != compatr->num_entries) {
-               duprintf("translate_compat_table: %u not %u entries\n",
-                        j, compatr->num_entries);
+       if (j != compatr->num_entries)
                 goto out_unlock;
-       }
  
         ret = -ENOMEM;
         newinfo = xt_alloc_table_info(size);
@@ -1683,8 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
         if (ret != 0)
                 goto free_newinfo;
  
-       duprintf("compat_do_replace: Translated table\n");
-
         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                            tmp.num_counters, compat_ptr(tmp.counters));
         if (ret)
@@ -1718,7 +1567,6 @@ compat_do_ipt_set_ctl(struct sock *sk,    int cmd, void __user *user,
                 break;
  
         default:
-               duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1768,19 +1616,15 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
         struct compat_ipt_get_entries get;
         struct xt_table *t;
  
-       if (*len < sizeof(get)) {
-               duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+       if (*len < sizeof(get))
                 return -EINVAL;
-       }
  
         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
                 return -EFAULT;
  
-       if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
-               duprintf("compat_get_entries: %u != %zu\n",
-                        *len, sizeof(get) + get.size);
+       if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
                 return -EINVAL;
-       }
+
         get.name[sizeof(get.name) - 1] = '\0';
  
         xt_compat_lock(AF_INET);
@@ -1788,16 +1632,13 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
         if (!IS_ERR_OR_NULL(t)) {
                 const struct xt_table_info *private = t->private;
                 struct xt_table_info info;
-               duprintf("t->private->number = %u\n", private->number);
                 ret = compat_table_info(private, &info);
-               if (!ret && get.size == info.size) {
+               if (!ret && get.size == info.size)
                         ret = compat_copy_entries_to_user(private->size,
                                                           t, uptr->entrytable);
-               } else if (!ret) {
-                       duprintf("compat_get_entries: I've got %u not %u!\n",
-                                private->size, get.size);
+               else if (!ret)
                         ret = -EAGAIN;
-               }
+
                 xt_compat_flush_offsets(AF_INET);
                 module_put(t->me);
                 xt_table_unlock(t);
@@ -1850,7 +1691,6 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                 break;
  
         default:
-               duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1902,7 +1742,6 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
         }
  
         default:
-               duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -2004,7 +1843,6 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
                 /* We've been asked to examine this packet, and we
                  * can't.  Hence, no choice but to drop.
                  */
-               duprintf("Dropping evil ICMP tinygram.\n");
                 par->hotdrop = true;
                 return false;
         }
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

index e3c46e8..ae1a71a 100644 (file)
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net)
  
         in->ctl_table[0].data = &nf_conntrack_max;
         in->ctl_table[1].data = &net->ct.count;
-       in->ctl_table[2].data = &net->ct.htable_size;
+       in->ctl_table[2].data = &nf_conntrack_htable_size;
         in->ctl_table[3].data = &net->ct.sysctl_checksum;
         in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
  #endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c

index f0dfe92..c6f3c40 100644 (file)
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -31,15 +31,14 @@ struct ct_iter_state {
  
  static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_iter_state *st = seq->private;
         struct hlist_nulls_node *n;
  
         for (st->bucket = 0;
-            st->bucket < net->ct.htable_size;
+            st->bucket < nf_conntrack_htable_size;
              st->bucket++) {
                 n = rcu_dereference(
-                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+                       hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
                 if (!is_a_nulls(n))
                         return n;
         }
@@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
  static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
                                       struct hlist_nulls_node *head)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_iter_state *st = seq->private;
  
         head = rcu_dereference(hlist_nulls_next_rcu(head));
         while (is_a_nulls(head)) {
                 if (likely(get_nulls_value(head) == st->bucket)) {
-                       if (++st->bucket >= net->ct.htable_size)
+                       if (++st->bucket >= nf_conntrack_htable_size)
                                 return NULL;
                 }
                 head = rcu_dereference(
-                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+                       hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
         }
         return head;
  }
@@ -114,6 +112,23 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
  }
  #endif
  
+static bool ct_seq_should_skip(const struct nf_conn *ct,
+                              const struct net *net,
+                              const struct nf_conntrack_tuple_hash *hash)
+{
+       /* we only want to print DIR_ORIGINAL */
+       if (NF_CT_DIRECTION(hash))
+               return true;
+
+       if (nf_ct_l3num(ct) != AF_INET)
+               return true;
+
+       if (!net_eq(nf_ct_net(ct), net))
+               return true;
+
+       return false;
+}
+
  static int ct_seq_show(struct seq_file *s, void *v)
  {
         struct nf_conntrack_tuple_hash *hash = v;
@@ -123,14 +138,15 @@ static int ct_seq_show(struct seq_file *s, void *v)
         int ret = 0;
  
         NF_CT_ASSERT(ct);
-       if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+       if (ct_seq_should_skip(ct, seq_file_net(s), hash))
                 return 0;
  
+       if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+               return 0;
  
-       /* we only want to print DIR_ORIGINAL */
-       if (NF_CT_DIRECTION(hash))
-               goto release;
-       if (nf_ct_l3num(ct) != AF_INET)
+       /* check if we raced w. object reuse */
+       if (!nf_ct_is_confirmed(ct) ||
+           ct_seq_should_skip(ct, seq_file_net(s), hash))
                 goto release;
  
         l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
@@ -220,13 +236,12 @@ struct ct_expect_iter_state {
  
  static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_expect_iter_state *st = seq->private;
         struct hlist_node *n;
  
         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
                 n = rcu_dereference(
-                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+                       hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
                 if (n)
                         return n;
         }
@@ -236,7 +251,6 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
  static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
                                              struct hlist_node *head)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_expect_iter_state *st = seq->private;
  
         head = rcu_dereference(hlist_next_rcu(head));
@@ -244,7 +258,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
                 if (++st->bucket >= nf_ct_expect_hsize)
                         return NULL;
                 head = rcu_dereference(
-                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+                       hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
         }
         return head;
  }
@@ -285,6 +299,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
  
         exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
  
+       if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
+               return 0;
+
         if (exp->tuple.src.l3num != AF_INET)
                 return 0;
  
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c

index 73e606c..63e06c3 100644 (file)
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,34 +39,12 @@ MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
  MODULE_DESCRIPTION("IPv6 packet filter");
  
-/*#define DEBUG_IP_FIREWALL*/
-/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
-/*#define DEBUG_IP_FIREWALL_USER*/
-
-#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) pr_info(format , ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) pr_info(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
  #ifdef CONFIG_NETFILTER_DEBUG
  #define IP_NF_ASSERT(x)        WARN_ON(!(x))
  #else
  #define IP_NF_ASSERT(x)
  #endif
  
-#if 0
-/* All the better to debug you with... */
-#define static
-#define inline
-#endif
-
  void *ip6t_alloc_initial_table(const struct xt_table *info)
  {
         return xt_alloc_initial_table(ip6t, IP6T);
@@ -100,35 +78,18 @@ ip6_packet_match(const struct sk_buff *skb,
         if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
                                        &ip6info->src), IP6T_INV_SRCIP) ||
             FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
-                                      &ip6info->dst), IP6T_INV_DSTIP)) {
-               dprintf("Source or dest mismatch.\n");
-/*
-               dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
-                       ipinfo->smsk.s_addr, ipinfo->src.s_addr,
-                       ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
-               dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
-                       ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
-                       ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
+                                      &ip6info->dst), IP6T_INV_DSTIP))
                 return false;
-       }
  
         ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
  
-       if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
-               dprintf("VIA in mismatch (%s vs %s).%s\n",
-                       indev, ip6info->iniface,
-                       ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
+       if (FWINV(ret != 0, IP6T_INV_VIA_IN))
                 return false;
-       }
  
         ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
  
-       if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
-               dprintf("VIA out mismatch (%s vs %s).%s\n",
-                       outdev, ip6info->outiface,
-                       ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
+       if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
                 return false;
-       }
  
  /* ... might want to do something with class and flowlabel here ... */
  
@@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb,
                 }
                 *fragoff = _frag_off;
  
-               dprintf("Packet protocol %hi ?= %s%hi.\n",
-                               protohdr,
-                               ip6info->invflags & IP6T_INV_PROTO ? "!":"",
-                               ip6info->proto);
-
                 if (ip6info->proto == protohdr) {
                         if (ip6info->invflags & IP6T_INV_PROTO)
                                 return false;
@@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb,
  static bool
  ip6_checkentry(const struct ip6t_ip6 *ipv6)
  {
-       if (ipv6->flags & ~IP6T_F_MASK) {
-               duprintf("Unknown flag bits set: %08X\n",
-                        ipv6->flags & ~IP6T_F_MASK);
+       if (ipv6->flags & ~IP6T_F_MASK)
                 return false;
-       }
-       if (ipv6->invflags & ~IP6T_INV_MASK) {
-               duprintf("Unknown invflag bits set: %08X\n",
-                        ipv6->invflags & ~IP6T_INV_MASK);
+       if (ipv6->invflags & ~IP6T_INV_MASK)
                 return false;
-       }
+
         return true;
  }
  
@@ -446,13 +397,9 @@ ip6t_do_table(struct sk_buff *skb,
         xt_write_recseq_end(addend);
         local_bh_enable();
  
-#ifdef DEBUG_ALLOW_ALL
-       return NF_ACCEPT;
-#else
         if (acpar.hotdrop)
                 return NF_DROP;
         else return verdict;
-#endif
  }
  
  static bool find_jump_target(const struct xt_table_info *t,
@@ -492,11 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                 = (void *)ip6t_get_target_c(e);
                         int visited = e->comefrom & (1 << hook);
  
-                       if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
-                               pr_err("iptables: loop hook %u pos %u %08X.\n",
-                                      hook, pos, e->comefrom);
+                       if (e->comefrom & (1 << NF_INET_NUMHOOKS))
                                 return 0;
-                       }
+
                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
  
                         /* Unconditional return/END. */
@@ -508,26 +453,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
  
                                 if ((strcmp(t->target.u.user.name,
                                             XT_STANDARD_TARGET) == 0) &&
-                                   t->verdict < -NF_MAX_VERDICT - 1) {
-                                       duprintf("mark_source_chains: bad "
-                                               "negative verdict (%i)\n",
-                                                               t->verdict);
+                                   t->verdict < -NF_MAX_VERDICT - 1)
                                         return 0;
-                               }
  
                                 /* Return: backtrack through the last
                                    big jump. */
                                 do {
                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
-#ifdef DEBUG_IP_FIREWALL_USER
-                                       if (e->comefrom
-                                           & (1 << NF_INET_NUMHOOKS)) {
-                                               duprintf("Back unset "
-                                                        "on hook %u "
-                                                        "rule %u\n",
-                                                        hook, pos);
-                                       }
-#endif
                                         oldpos = pos;
                                         pos = e->counters.pcnt;
                                         e->counters.pcnt = 0;
@@ -555,8 +487,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                            XT_STANDARD_TARGET) == 0 &&
                                     newpos >= 0) {
                                         /* This a jump; chase it. */
-                                       duprintf("Jump rule %u -> %u\n",
-                                                pos, newpos);
                                         e = (struct ip6t_entry *)
                                                 (entry0 + newpos);
                                         if (!find_jump_target(newinfo, e))
@@ -573,8 +503,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                 pos = newpos;
                         }
                 }
-next:
-               duprintf("Finished chain %u\n", hook);
+next:          ;
         }
         return 1;
  }
@@ -595,19 +524,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
  static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
  {
         const struct ip6t_ip6 *ipv6 = par->entryinfo;
-       int ret;
  
         par->match     = m->u.kernel.match;
         par->matchinfo = m->data;
  
-       ret = xt_check_match(par, m->u.match_size - sizeof(*m),
-                            ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
-       if (ret < 0) {
-               duprintf("ip_tables: check failed for `%s'.\n",
-                        par.match->name);
-               return ret;
-       }
-       return 0;
+       return xt_check_match(par, m->u.match_size - sizeof(*m),
+                             ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
  }
  
  static int
@@ -618,10 +540,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
  
         match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
                                       m->u.user.revision);
-       if (IS_ERR(match)) {
-               duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+       if (IS_ERR(match))
                 return PTR_ERR(match);
-       }
+
         m->u.kernel.match = match;
  
         ret = check_match(m, par);
@@ -646,17 +567,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
                 .hook_mask = e->comefrom,
                 .family    = NFPROTO_IPV6,
         };
-       int ret;
  
         t = ip6t_get_target(e);
-       ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
-             e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
-       if (ret < 0) {
-               duprintf("ip_tables: check failed for `%s'.\n",
-                        t->u.kernel.target->name);
-               return ret;
-       }
-       return 0;
+       return xt_check_target(&par, t->u.target_size - sizeof(*t),
+                              e->ipv6.proto,
+                              e->ipv6.invflags & IP6T_INV_PROTO);
  }
  
  static int
@@ -669,10 +584,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
         unsigned int j;
         struct xt_mtchk_param mtpar;
         struct xt_entry_match *ematch;
+       unsigned long pcnt;
  
-       e->counters.pcnt = xt_percpu_counter_alloc();
-       if (IS_ERR_VALUE(e->counters.pcnt))
+       pcnt = xt_percpu_counter_alloc();
+       if (IS_ERR_VALUE(pcnt))
                 return -ENOMEM;
+       e->counters.pcnt = pcnt;
  
         j = 0;
         mtpar.net       = net;
@@ -691,7 +608,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
         target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
                                         t->u.user.revision);
         if (IS_ERR(target)) {
-               duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
                 ret = PTR_ERR(target);
                 goto cleanup_matches;
         }
@@ -744,17 +660,12 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
  
         if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
             (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
-           (unsigned char *)e + e->next_offset > limit) {
-               duprintf("Bad offset %p\n", e);
+           (unsigned char *)e + e->next_offset > limit)
                 return -EINVAL;
-       }
  
         if (e->next_offset
-           < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
-               duprintf("checking: element %p size %u\n",
-                        e, e->next_offset);
+           < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target))
                 return -EINVAL;
-       }
  
         if (!ip6_checkentry(&e->ipv6))
                 return -EINVAL;
@@ -771,12 +682,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
                 if ((unsigned char *)e - base == hook_entries[h])
                         newinfo->hook_entry[h] = hook_entries[h];
                 if ((unsigned char *)e - base == underflows[h]) {
-                       if (!check_underflow(e)) {
-                               pr_debug("Underflows must be unconditional and "
-                                        "use the STANDARD target with "
-                                        "ACCEPT/DROP\n");
+                       if (!check_underflow(e))
                                 return -EINVAL;
-                       }
+
                         newinfo->underflow[h] = underflows[h];
                 }
         }
@@ -828,7 +736,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                 newinfo->underflow[i] = 0xFFFFFFFF;
         }
  
-       duprintf("translate_table: size %u\n", newinfo->size);
         i = 0;
         /* Walk through entries, checking offsets. */
         xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -845,27 +752,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                         ++newinfo->stacksize;
         }
  
-       if (i != repl->num_entries) {
-               duprintf("translate_table: %u not %u entries\n",
-                        i, repl->num_entries);
+       if (i != repl->num_entries)
                 return -EINVAL;
-       }
  
         /* Check hooks all assigned */
         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
                 /* Only hooks which are valid */
                 if (!(repl->valid_hooks & (1 << i)))
                         continue;
-               if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
-                       duprintf("Invalid hook entry %u %u\n",
-                                i, repl->hook_entry[i]);
+               if (newinfo->hook_entry[i] == 0xFFFFFFFF)
                         return -EINVAL;
-               }
-               if (newinfo->underflow[i] == 0xFFFFFFFF) {
-                       duprintf("Invalid underflow %u %u\n",
-                                i, repl->underflow[i]);
+               if (newinfo->underflow[i] == 0xFFFFFFFF)
                         return -EINVAL;
-               }
         }
  
         if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1093,11 +991,8 @@ static int get_info(struct net *net, void __user *user,
         struct xt_table *t;
         int ret;
  
-       if (*len != sizeof(struct ip6t_getinfo)) {
-               duprintf("length %u != %zu\n", *len,
-                        sizeof(struct ip6t_getinfo));
+       if (*len != sizeof(struct ip6t_getinfo))
                 return -EINVAL;
-       }
  
         if (copy_from_user(name, user, sizeof(name)) != 0)
                 return -EFAULT;
@@ -1155,31 +1050,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
         struct ip6t_get_entries get;
         struct xt_table *t;
  
-       if (*len < sizeof(get)) {
-               duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+       if (*len < sizeof(get))
                 return -EINVAL;
-       }
         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
                 return -EFAULT;
-       if (*len != sizeof(struct ip6t_get_entries) + get.size) {
-               duprintf("get_entries: %u != %zu\n",
-                        *len, sizeof(get) + get.size);
+       if (*len != sizeof(struct ip6t_get_entries) + get.size)
                 return -EINVAL;
-       }
+
         get.name[sizeof(get.name) - 1] = '\0';
  
         t = xt_find_table_lock(net, AF_INET6, get.name);
         if (!IS_ERR_OR_NULL(t)) {
                 struct xt_table_info *private = t->private;
-               duprintf("t->private->number = %u\n", private->number);
                 if (get.size == private->size)
                         ret = copy_entries_to_user(private->size,
                                                    t, uptr->entrytable);
-               else {
-                       duprintf("get_entries: I've got %u not %u!\n",
-                                private->size, get.size);
+               else
                         ret = -EAGAIN;
-               }
+
                 module_put(t->me);
                 xt_table_unlock(t);
         } else
@@ -1215,8 +1103,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
  
         /* You lied! */
         if (valid_hooks != t->valid_hooks) {
-               duprintf("Valid hook crap: %08X vs %08X\n",
-                        valid_hooks, t->valid_hooks);
                 ret = -EINVAL;
                 goto put_module;
         }
@@ -1226,8 +1112,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
                 goto put_module;
  
         /* Update module usage count based on number of rules */
-       duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
-               oldinfo->number, oldinfo->initial_entries, newinfo->number);
         if ((oldinfo->number > oldinfo->initial_entries) ||
             (newinfo->number <= oldinfo->initial_entries))
                 module_put(t->me);
@@ -1296,8 +1180,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
         if (ret != 0)
                 goto free_newinfo;
  
-       duprintf("ip_tables: Translated table\n");
-
         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                            tmp.num_counters, tmp.counters);
         if (ret)
@@ -1422,11 +1304,9 @@ compat_find_calc_match(struct xt_entry_match *m,
  
         match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
                                       m->u.user.revision);
-       if (IS_ERR(match)) {
-               duprintf("compat_check_calc_match: `%s' not found\n",
-                        m->u.user.name);
+       if (IS_ERR(match))
                 return PTR_ERR(match);
-       }
+
         m->u.kernel.match = match;
         *size += xt_compat_match_offset(match);
         return 0;
@@ -1458,20 +1338,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
         unsigned int j;
         int ret, off;
  
-       duprintf("check_compat_entry_size_and_hooks %p\n", e);
         if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
             (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
-           (unsigned char *)e + e->next_offset > limit) {
-               duprintf("Bad offset %p, limit = %p\n", e, limit);
+           (unsigned char *)e + e->next_offset > limit)
                 return -EINVAL;
-       }
  
         if (e->next_offset < sizeof(struct compat_ip6t_entry) +
-                            sizeof(struct compat_xt_entry_target)) {
-               duprintf("checking: element %p size %u\n",
-                        e, e->next_offset);
+                            sizeof(struct compat_xt_entry_target))
                 return -EINVAL;
-       }
  
         if (!ip6_checkentry(&e->ipv6))
                 return -EINVAL;
@@ -1495,8 +1369,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
         target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
                                         t->u.user.revision);
         if (IS_ERR(target)) {
-               duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
-                        t->u.user.name);
                 ret = PTR_ERR(target);
                 goto release_matches;
         }
@@ -1575,7 +1447,6 @@ translate_compat_table(struct net *net,
         size = compatr->size;
         info->number = compatr->num_entries;
  
-       duprintf("translate_compat_table: size %u\n", info->size);
         j = 0;
         xt_compat_lock(AF_INET6);
         xt_compat_init_offsets(AF_INET6, compatr->num_entries);
@@ -1590,11 +1461,8 @@ translate_compat_table(struct net *net,
         }
  
         ret = -EINVAL;
-       if (j != compatr->num_entries) {
-               duprintf("translate_compat_table: %u not %u entries\n",
-                        j, compatr->num_entries);
+       if (j != compatr->num_entries)
                 goto out_unlock;
-       }
  
         ret = -ENOMEM;
         newinfo = xt_alloc_table_info(size);
@@ -1685,8 +1553,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
         if (ret != 0)
                 goto free_newinfo;
  
-       duprintf("compat_do_replace: Translated table\n");
-
         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
                            tmp.num_counters, compat_ptr(tmp.counters));
         if (ret)
@@ -1720,7 +1586,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
                 break;
  
         default:
-               duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1770,19 +1635,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
         struct compat_ip6t_get_entries get;
         struct xt_table *t;
  
-       if (*len < sizeof(get)) {
-               duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+       if (*len < sizeof(get))
                 return -EINVAL;
-       }
  
         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
                 return -EFAULT;
  
-       if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
-               duprintf("compat_get_entries: %u != %zu\n",
-                        *len, sizeof(get) + get.size);
+       if (*len != sizeof(struct compat_ip6t_get_entries) + get.size)
                 return -EINVAL;
-       }
+
         get.name[sizeof(get.name) - 1] = '\0';
  
         xt_compat_lock(AF_INET6);
@@ -1790,16 +1651,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
         if (!IS_ERR_OR_NULL(t)) {
                 const struct xt_table_info *private = t->private;
                 struct xt_table_info info;
-               duprintf("t->private->number = %u\n", private->number);
                 ret = compat_table_info(private, &info);
-               if (!ret && get.size == info.size) {
+               if (!ret && get.size == info.size)
                         ret = compat_copy_entries_to_user(private->size,
                                                           t, uptr->entrytable);
-               } else if (!ret) {
-                       duprintf("compat_get_entries: I've got %u not %u!\n",
-                                private->size, get.size);
+               else if (!ret)
                         ret = -EAGAIN;
-               }
+
                 xt_compat_flush_offsets(AF_INET6);
                 module_put(t->me);
                 xt_table_unlock(t);
@@ -1852,7 +1710,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                 break;
  
         default:
-               duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -1904,7 +1761,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
         }
  
         default:
-               duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
                 ret = -EINVAL;
         }
  
@@ -2006,7 +1862,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
                 /* We've been asked to examine this packet, and we
                  * can't.  Hence, no choice but to drop.
                  */
-               duprintf("Dropping evil ICMP tinygram.\n");
                 par->hotdrop = true;
                 return false;
         }
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c

index 5d778dd..06bed74 100644 (file)
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -60,7 +60,7 @@ synproxy_send_tcp(struct net *net,
         fl6.fl6_dport = nth->dest;
         security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
         dst = ip6_route_output(net, NULL, &fl6);
-       if (dst == NULL || dst->error) {
+       if (dst->error) {
                 dst_release(dst);
                 goto free_nskb;
         }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c

index 85ca189..2cb3c62 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,6 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
         spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
  }
  
+static void ip_vs_conn_expire(unsigned long data);
  
  /*
   *     Returns hash value for IPVS connection entry
@@ -453,10 +454,16 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
  }
  EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
  
+static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
+{
+       __ip_vs_conn_put(cp);
+       ip_vs_conn_expire((unsigned long)cp);
+}
+
  /*
   *      Put back the conn and restart its timer with its timeout
   */
-void ip_vs_conn_put(struct ip_vs_conn *cp)
+static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
  {
         unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
                 0 : cp->timeout;
@@ -465,6 +472,16 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
         __ip_vs_conn_put(cp);
  }
  
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+       if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
+           (atomic_read(&cp->refcnt) == 1) &&
+           !timer_pending(&cp->timer))
+               /* expire connection immediately */
+               __ip_vs_conn_put_notimer(cp);
+       else
+               __ip_vs_conn_put_timer(cp);
+}
  
  /*
   *     Fill a no_client_port connection with a client port number
@@ -819,7 +836,8 @@ static void ip_vs_conn_expire(unsigned long data)
                 if (cp->control)
                         ip_vs_control_del(cp);
  
-               if (cp->flags & IP_VS_CONN_F_NFCT) {
+               if ((cp->flags & IP_VS_CONN_F_NFCT) &&
+                   !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
                         /* Do not access conntracks during subsys cleanup
                          * because nf_conntrack_find_get can not be used after
                          * conntrack cleanup for the net.
@@ -834,7 +852,10 @@ static void ip_vs_conn_expire(unsigned long data)
                 ip_vs_unbind_dest(cp);
                 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
                         atomic_dec(&ip_vs_conn_no_cport_cnt);
-               call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
+               if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+                       ip_vs_conn_rcu_free(&cp->rcu_head);
+               else
+                       call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
                 atomic_dec(&ipvs->conn_count);
                 return;
         }
@@ -850,7 +871,7 @@ static void ip_vs_conn_expire(unsigned long data)
         if (ipvs->sync_state & IP_VS_STATE_MASTER)
                 ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
  
-       ip_vs_conn_put(cp);
+       __ip_vs_conn_put_timer(cp);
  }
  
  /* Modify timer, so that it expires as soon as possible.
@@ -1240,6 +1261,16 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
         return 1;
  }
  
+static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
+{
+       struct ip_vs_service *svc;
+
+       if (!cp->dest)
+               return false;
+       svc = rcu_dereference(cp->dest->svc);
+       return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET);
+}
+
  /* Called from keventd and must protect itself from softirqs */
  void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
  {
@@ -1254,11 +1285,16 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
                 unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask;
  
                 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
-                       if (cp->flags & IP_VS_CONN_F_TEMPLATE)
-                               /* connection template */
-                               continue;
                         if (cp->ipvs != ipvs)
                                 continue;
+                       if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+                               if (atomic_read(&cp->n_control) ||
+                                   !ip_vs_conn_ops_mode(cp))
+                                       continue;
+                               else
+                                       /* connection template of OPS */
+                                       goto try_drop;
+                       }
                         if (cp->protocol == IPPROTO_TCP) {
                                 switch(cp->state) {
                                 case IP_VS_TCP_S_SYN_RECV:
@@ -1286,6 +1322,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
                                         continue;
                                 }
                         } else {
+try_drop:
                                 if (!todrop_entry(cp))
                                         continue;
                         }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index b9a4082..1207f20 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
  #ifdef CONFIG_IP_VS_DEBUG
  EXPORT_SYMBOL(ip_vs_get_debug_level);
  #endif
+EXPORT_SYMBOL(ip_vs_new_conn_out);
  
  static int ip_vs_net_id __read_mostly;
  /* netns cnt used for uniqueness */
@@ -611,7 +612,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
                 /* do not touch skb anymore */
  
-               atomic_inc(&cp->in_pkts);
+               if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
+                       atomic_inc(&cp->control->in_pkts);
+               else
+                       atomic_inc(&cp->in_pkts);
                 ip_vs_conn_put(cp);
                 return ret;
         }
@@ -1100,6 +1104,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
         }
  }
  
+/* Generic function to create new connections for outgoing RS packets
+ *
+ * Pre-requisites for successful connection creation:
+ * 1) Virtual Service is NOT fwmark based:
+ *    In fwmark-VS actual vaddr and vport are unknown to IPVS
+ * 2) Real Server and Virtual Service were NOT configured without port:
+ *    This is to allow match of different VS to the same RS ip-addr
+ */
+struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
+                                     struct ip_vs_dest *dest,
+                                     struct sk_buff *skb,
+                                     const struct ip_vs_iphdr *iph,
+                                     __be16 dport,
+                                     __be16 cport)
+{
+       struct ip_vs_conn_param param;
+       struct ip_vs_conn *ct = NULL, *cp = NULL;
+       const union nf_inet_addr *vaddr, *daddr, *caddr;
+       union nf_inet_addr snet;
+       __be16 vport;
+       unsigned int flags;
+
+       EnterFunction(12);
+       vaddr = &svc->addr;
+       vport = svc->port;
+       daddr = &iph->saddr;
+       caddr = &iph->daddr;
+
+       /* check pre-requisites are satisfied */
+       if (svc->fwmark)
+               return NULL;
+       if (!vport || !dport)
+               return NULL;
+
+       /* for persistent service first create connection template */
+       if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
+               /* apply netmask the same way ingress-side does */
+#ifdef CONFIG_IP_VS_IPV6
+               if (svc->af == AF_INET6)
+                       ipv6_addr_prefix(&snet.in6, &caddr->in6,
+                                        (__force __u32)svc->netmask);
+               else
+#endif
+                       snet.ip = caddr->ip & svc->netmask;
+               /* fill params and create template if not existent */
+               if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol,
+                                                 &snet, 0, vaddr,
+                                                 vport, &param) < 0)
+                       return NULL;
+               ct = ip_vs_ct_in_get(&param);
+               if (!ct) {
+                       ct = ip_vs_conn_new(&param, dest->af, daddr, dport,
+                                           IP_VS_CONN_F_TEMPLATE, dest, 0);
+                       if (!ct) {
+                               kfree(param.pe_data);
+                               return NULL;
+                       }
+                       ct->timeout = svc->timeout;
+               } else {
+                       kfree(param.pe_data);
+               }
+       }
+
+       /* connection flags */
+       flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) &&
+                iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0;
+       /* create connection */
+       ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+                             caddr, cport, vaddr, vport, &param);
+       cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
+       if (!cp) {
+               if (ct)
+                       ip_vs_conn_put(ct);
+               return NULL;
+       }
+       if (ct) {
+               ip_vs_control_add(cp, ct);
+               ip_vs_conn_put(ct);
+       }
+       ip_vs_conn_stats(cp, svc);
+
+       /* return connection (will be used to handle outgoing packet) */
+       IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u "
+                     "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+                     ip_vs_fwd_tag(cp),
+                     IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+                     IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+                     cp->flags, atomic_read(&cp->refcnt));
+       LeaveFunction(12);
+       return cp;
+}
+
+/* Handle outgoing packets which are considered requests initiated by
+ * real servers, so that subsequent responses from external client can be
+ * routed to the right real server.
+ * Used also for outgoing responses in OPS mode.
+ *
+ * Connection management is handled by persistent-engine specific callback.
+ */
+static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
+                                             struct netns_ipvs *ipvs,
+                                             int af, struct sk_buff *skb,
+                                             const struct ip_vs_iphdr *iph)
+{
+       struct ip_vs_dest *dest;
+       struct ip_vs_conn *cp = NULL;
+       __be16 _ports[2], *pptr;
+
+       if (hooknum == NF_INET_LOCAL_IN)
+               return NULL;
+
+       pptr = frag_safe_skb_hp(skb, iph->len,
+                               sizeof(_ports), _ports, iph);
+       if (!pptr)
+               return NULL;
+
+       rcu_read_lock();
+       dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
+                                      &iph->saddr, pptr[0]);
+       if (dest) {
+               struct ip_vs_service *svc;
+               struct ip_vs_pe *pe;
+
+               svc = rcu_dereference(dest->svc);
+               if (svc) {
+                       pe = rcu_dereference(svc->pe);
+                       if (pe && pe->conn_out)
+                               cp = pe->conn_out(svc, dest, skb, iph,
+                                                 pptr[0], pptr[1]);
+               }
+       }
+       rcu_read_unlock();
+
+       return cp;
+}
+
  /* Handle response packets: rewrite addresses and send away...
   */
  static unsigned int
@@ -1245,6 +1386,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
  
         if (likely(cp))
                 return handle_response(af, skb, pd, cp, &iph, hooknum);
+
+       /* Check for real-server-started requests */
+       if (atomic_read(&ipvs->conn_out_counter)) {
+               /* Currently only for UDP:
+                * connection oriented protocols typically use
+                * ephemeral ports for outgoing connections, so
+                * related incoming responses would not match any VS
+                */
+               if (pp->protocol == IPPROTO_UDP) {
+                       cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
+                       if (likely(cp))
+                               return handle_response(af, skb, pd, cp, &iph,
+                                                      hooknum);
+               }
+       }
+
         if (sysctl_nat_icmp_send(ipvs) &&
             (pp->protocol == IPPROTO_TCP ||
              pp->protocol == IPPROTO_UDP ||
@@ -1837,6 +1994,9 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
  
         if (ipvs->sync_state & IP_VS_STATE_MASTER)
                 ip_vs_sync_conn(ipvs, cp, pkts);
+       else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
+               /* increment is done inside ip_vs_sync_conn too */
+               atomic_inc(&cp->control->in_pkts);
  
         ip_vs_conn_put(cp);
         return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c

index f35ebc0..c3c809b 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
         return false;
  }
  
+/* Find real service record by <proto,addr,port>.
+ * In case of multiple records with the same <proto,addr,port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
+                                          __u16 protocol,
+                                          const union nf_inet_addr *daddr,
+                                          __be16 dport)
+{
+       unsigned int hash;
+       struct ip_vs_dest *dest;
+
+       /* Check for "full" addressed entries */
+       hash = ip_vs_rs_hashkey(af, daddr, dport);
+
+       hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+               if (dest->port == dport &&
+                   dest->af == af &&
+                   ip_vs_addr_equal(af, &dest->addr, daddr) &&
+                       (dest->protocol == protocol || dest->vfwmark)) {
+                       /* HIT */
+                       return dest;
+               }
+       }
+
+       return NULL;
+}
+
  /* Lookup destination by {addr,port} in the given service
   * Called under RCU lock.
   */
@@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
                 atomic_inc(&ipvs->ftpsvc_counter);
         else if (svc->port == 0)
                 atomic_inc(&ipvs->nullsvc_counter);
+       if (svc->pe && svc->pe->conn_out)
+               atomic_inc(&ipvs->conn_out_counter);
  
         ip_vs_start_estimator(ipvs, &svc->stats);
  
@@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
         struct ip_vs_scheduler *sched = NULL, *old_sched;
         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
         int ret = 0;
+       bool new_pe_conn_out, old_pe_conn_out;
  
         /*
          * Lookup the scheduler, by 'u->sched_name'
@@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
         svc->netmask = u->netmask;
  
         old_pe = rcu_dereference_protected(svc->pe, 1);
-       if (pe != old_pe)
+       if (pe != old_pe) {
                 rcu_assign_pointer(svc->pe, pe);
+               /* check for optional methods in new pe */
+               new_pe_conn_out = (pe && pe->conn_out) ? true : false;
+               old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
+               if (new_pe_conn_out && !old_pe_conn_out)
+                       atomic_inc(&svc->ipvs->conn_out_counter);
+               if (old_pe_conn_out && !new_pe_conn_out)
+                       atomic_dec(&svc->ipvs->conn_out_counter);
+       }
  
  out:
         ip_vs_scheduler_put(old_sched);
@@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
  
         /* Unbind persistence engine, keep svc->pe */
         old_pe = rcu_dereference_protected(svc->pe, 1);
+       if (old_pe && old_pe->conn_out)
+               atomic_dec(&ipvs->conn_out_counter);
         ip_vs_pe_put(old_pe);
  
         /*
@@ -3969,6 +4012,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
                     (unsigned long) ipvs);
         atomic_set(&ipvs->ftpsvc_counter, 0);
         atomic_set(&ipvs->nullsvc_counter, 0);
+       atomic_set(&ipvs->conn_out_counter, 0);
  
         /* procfs stats */
         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c

index 30434fb..f04fd8d 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -93,6 +93,10 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
                 return;
  
+       /* Never alter conntrack for OPS conns (no reply is expected) */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               return;
+
         /* Alter reply only in original direction */
         if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
                 return;
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c

index 0a6eb5c..d07ef9e 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
         return cp->pe_data_len;
  }
  
+static struct ip_vs_conn *
+ip_vs_sip_conn_out(struct ip_vs_service *svc,
+                  struct ip_vs_dest *dest,
+                  struct sk_buff *skb,
+                  const struct ip_vs_iphdr *iph,
+                  __be16 dport,
+                  __be16 cport)
+{
+       if (likely(iph->protocol == IPPROTO_UDP))
+               return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);
+       /* currently no need to handle other than UDP */
+       return NULL;
+}
+
  static struct ip_vs_pe ip_vs_sip_pe =
  {
         .name =                 "sip",
@@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe =
         .ct_match =             ip_vs_sip_ct_match,
         .hashkey_raw =          ip_vs_sip_hashkey_raw,
         .show_pe_data =         ip_vs_sip_show_pe_data,
+       .conn_out =             ip_vs_sip_conn_out,
  };
  
  static int __init ip_vs_sip_init(void)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 2fd6074..0cd2936 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,6 +54,7 @@
  #include <net/netfilter/nf_nat.h>
  #include <net/netfilter/nf_nat_core.h>
  #include <net/netfilter/nf_nat_helper.h>
+#include <net/netns/hash.h>
  
  #define NF_CONNTRACK_VERSION   "0.5.0"
  
@@ -68,7 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
  EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
  
+struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+
+static __read_mostly struct kmem_cache *nf_conntrack_cachep;
  static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly seqcount_t nf_conntrack_generation;
  static __read_mostly bool nf_conntrack_locks_all;
  
  void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -107,7 +113,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
                 spin_lock_nested(&nf_conntrack_locks[h1],
                                  SINGLE_DEPTH_NESTING);
         }
-       if (read_seqcount_retry(&net->ct.generation, sequence)) {
+       if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
                 nf_conntrack_double_unlock(h1, h2);
                 return true;
         }
@@ -141,43 +147,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
  DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
  EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
  
-unsigned int nf_conntrack_hash_rnd __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
+static unsigned int nf_conntrack_hash_rnd __read_mostly;
  
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+                             const struct net *net)
  {
         unsigned int n;
+       u32 seed;
+
+       get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
  
         /* The direction must be ignored, so we hash everything up to the
          * destination ports (which is a multiple of 4) and treat the last
          * three bytes manually.
          */
+       seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
         n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
-       return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
+       return jhash2((u32 *)tuple, n, seed ^
                       (((__force __u16)tuple->dst.u.all << 16) |
                       tuple->dst.protonum));
  }
  
-static u32 __hash_bucket(u32 hash, unsigned int size)
-{
-       return reciprocal_scale(hash, size);
-}
-
-static u32 hash_bucket(u32 hash, const struct net *net)
+static u32 scale_hash(u32 hash)
  {
-       return __hash_bucket(hash, net->ct.htable_size);
+       return reciprocal_scale(hash, nf_conntrack_htable_size);
  }
  
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-                                 unsigned int size)
+static u32 __hash_conntrack(const struct net *net,
+                           const struct nf_conntrack_tuple *tuple,
+                           unsigned int size)
  {
-       return __hash_bucket(hash_conntrack_raw(tuple), size);
+       return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
  }
  
-static inline u_int32_t hash_conntrack(const struct net *net,
-                                      const struct nf_conntrack_tuple *tuple)
+static u32 hash_conntrack(const struct net *net,
+                         const struct nf_conntrack_tuple *tuple)
  {
-       return __hash_conntrack(tuple, net->ct.htable_size);
+       return scale_hash(hash_conntrack_raw(tuple, net));
  }
  
  bool
@@ -358,7 +364,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
         }
         rcu_read_lock();
         l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-       if (l4proto && l4proto->destroy)
+       if (l4proto->destroy)
                 l4proto->destroy(ct);
  
         rcu_read_unlock();
@@ -393,7 +399,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
  
         local_bh_disable();
         do {
-               sequence = read_seqcount_begin(&net->ct.generation);
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
                 hash = hash_conntrack(net,
                                       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                 reply_hash = hash_conntrack(net,
@@ -445,7 +451,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
  static inline bool
  nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
                 const struct nf_conntrack_tuple *tuple,
-               const struct nf_conntrack_zone *zone)
+               const struct nf_conntrack_zone *zone,
+               const struct net *net)
  {
         struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
  
@@ -454,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
          */
         return nf_ct_tuple_equal(tuple, &h->tuple) &&
                nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
-              nf_ct_is_confirmed(ct);
+              nf_ct_is_confirmed(ct) &&
+              net_eq(net, nf_ct_net(ct));
  }
  
  /*
@@ -467,21 +475,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
                       const struct nf_conntrack_tuple *tuple, u32 hash)
  {
         struct nf_conntrack_tuple_hash *h;
+       struct hlist_nulls_head *ct_hash;
         struct hlist_nulls_node *n;
-       unsigned int bucket = hash_bucket(hash, net);
+       unsigned int bucket, sequence;
  
-       /* Disable BHs the entire time since we normally need to disable them
-        * at least once for the stats anyway.
-        */
-       local_bh_disable();
  begin:
-       hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
-               if (nf_ct_key_equal(h, tuple, zone)) {
-                       NF_CT_STAT_INC(net, found);
-                       local_bh_enable();
+       do {
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
+               bucket = scale_hash(hash);
+               ct_hash = nf_conntrack_hash;
+       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+       hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+               if (nf_ct_key_equal(h, tuple, zone, net)) {
+                       NF_CT_STAT_INC_ATOMIC(net, found);
                         return h;
                 }
-               NF_CT_STAT_INC(net, searched);
+               NF_CT_STAT_INC_ATOMIC(net, searched);
         }
         /*
          * if the nulls value we got at the end of this lookup is
@@ -489,10 +499,9 @@ begin:
          * We probably met an item that was moved to another chain.
          */
         if (get_nulls_value(n) != bucket) {
-               NF_CT_STAT_INC(net, search_restart);
+               NF_CT_STAT_INC_ATOMIC(net, search_restart);
                 goto begin;
         }
-       local_bh_enable();
  
         return NULL;
  }
@@ -514,7 +523,7 @@ begin:
                              !atomic_inc_not_zero(&ct->ct_general.use)))
                         h = NULL;
                 else {
-                       if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
+                       if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
                                 nf_ct_put(ct);
                                 goto begin;
                         }
@@ -530,7 +539,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                       const struct nf_conntrack_tuple *tuple)
  {
         return __nf_conntrack_find_get(net, zone, tuple,
-                                      hash_conntrack_raw(tuple));
+                                      hash_conntrack_raw(tuple, net));
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
  
@@ -538,12 +547,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
                                        unsigned int hash,
                                        unsigned int reply_hash)
  {
-       struct net *net = nf_ct_net(ct);
-
         hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
-                          &net->ct.hash[hash]);
+                          &nf_conntrack_hash[hash]);
         hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
-                          &net->ct.hash[reply_hash]);
+                          &nf_conntrack_hash[reply_hash]);
  }
  
  int
@@ -560,7 +567,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
  
         local_bh_disable();
         do {
-               sequence = read_seqcount_begin(&net->ct.generation);
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
                 hash = hash_conntrack(net,
                                       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                 reply_hash = hash_conntrack(net,
@@ -568,17 +575,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
         } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  
         /* See if there's one in the list already, including reverse */
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                   zone, net))
                         goto out;
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                   zone, net))
                         goto out;
  
         add_timer(&ct->timeout);
@@ -599,6 +603,63 @@ out:
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
  
+static inline void nf_ct_acct_update(struct nf_conn *ct,
+                                    enum ip_conntrack_info ctinfo,
+                                    unsigned int len)
+{
+       struct nf_conn_acct *acct;
+
+       acct = nf_conn_acct_find(ct);
+       if (acct) {
+               struct nf_conn_counter *counter = acct->counter;
+
+               atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+               atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
+       }
+}
+
+static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+                            const struct nf_conn *loser_ct)
+{
+       struct nf_conn_acct *acct;
+
+       acct = nf_conn_acct_find(loser_ct);
+       if (acct) {
+               struct nf_conn_counter *counter = acct->counter;
+               enum ip_conntrack_info ctinfo;
+               unsigned int bytes;
+
+               /* u32 should be fine since we must have seen one packet. */
+               bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
+               nf_ct_acct_update(ct, ctinfo, bytes);
+       }
+}
+
+/* Resolve race on insertion if this protocol allows this. */
+static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
+                              enum ip_conntrack_info ctinfo,
+                              struct nf_conntrack_tuple_hash *h)
+{
+       /* This is the conntrack entry already in hashes that won race. */
+       struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+       struct nf_conntrack_l4proto *l4proto;
+
+       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       if (l4proto->allow_clash &&
+           !nf_ct_is_dying(ct) &&
+           atomic_inc_not_zero(&ct->ct_general.use)) {
+               nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
+               nf_conntrack_put(skb->nfct);
+               /* Assign conntrack already in hashes to this skbuff. Don't
+                * modify skb->nfctinfo to ensure consistent stateful filtering.
+                */
+               skb->nfct = &ct->ct_general;
+               return NF_ACCEPT;
+       }
+       NF_CT_STAT_INC(net, drop);
+       return NF_DROP;
+}
+
  /* Confirm a connection given skb; places it in hash table */
  int
  __nf_conntrack_confirm(struct sk_buff *skb)
@@ -613,6 +674,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         enum ip_conntrack_info ctinfo;
         struct net *net;
         unsigned int sequence;
+       int ret = NF_DROP;
  
         ct = nf_ct_get(skb, &ctinfo);
         net = nf_ct_net(ct);
@@ -628,10 +690,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         local_bh_disable();
  
         do {
-               sequence = read_seqcount_begin(&net->ct.generation);
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
                 /* reuse the hash saved before */
                 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
-               hash = hash_bucket(hash, net);
+               hash = scale_hash(hash);
                 reply_hash = hash_conntrack(net,
                                            &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
  
@@ -655,23 +717,22 @@ __nf_conntrack_confirm(struct sk_buff *skb)
          */
         nf_ct_del_from_dying_or_unconfirmed_list(ct);
  
-       if (unlikely(nf_ct_is_dying(ct)))
-               goto out;
+       if (unlikely(nf_ct_is_dying(ct))) {
+               nf_ct_add_to_dying_list(ct);
+               goto dying;
+       }
  
         /* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                   zone, net))
                         goto out;
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                   zone, net))
                         goto out;
  
         /* Timer relative to confirmation time, not original
@@ -710,10 +771,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
  
  out:
         nf_ct_add_to_dying_list(ct);
+       ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
+dying:
         nf_conntrack_double_unlock(hash, reply_hash);
         NF_CT_STAT_INC(net, insert_failed);
         local_bh_enable();
-       return NF_DROP;
+       return ret;
  }
  EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
  
@@ -726,29 +789,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
         struct net *net = nf_ct_net(ignored_conntrack);
         const struct nf_conntrack_zone *zone;
         struct nf_conntrack_tuple_hash *h;
+       struct hlist_nulls_head *ct_hash;
+       unsigned int hash, sequence;
         struct hlist_nulls_node *n;
         struct nf_conn *ct;
-       unsigned int hash;
  
         zone = nf_ct_zone(ignored_conntrack);
-       hash = hash_conntrack(net, tuple);
  
-       /* Disable BHs the entire time since we need to disable them at
-        * least once for the stats anyway.
-        */
-       rcu_read_lock_bh();
-       hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
+       rcu_read_lock();
+       do {
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
+               hash = hash_conntrack(net, tuple);
+               ct_hash = nf_conntrack_hash;
+       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+       hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                 ct = nf_ct_tuplehash_to_ctrack(h);
                 if (ct != ignored_conntrack &&
-                   nf_ct_tuple_equal(tuple, &h->tuple) &&
-                   nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
-                       NF_CT_STAT_INC(net, found);
-                       rcu_read_unlock_bh();
+                   nf_ct_key_equal(h, tuple, zone, net)) {
+                       NF_CT_STAT_INC_ATOMIC(net, found);
+                       rcu_read_unlock();
                         return 1;
                 }
-               NF_CT_STAT_INC(net, searched);
+               NF_CT_STAT_INC_ATOMIC(net, searched);
         }
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
  
         return 0;
  }
@@ -762,71 +827,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
  {
         /* Use oldest entry, which is roughly LRU */
         struct nf_conntrack_tuple_hash *h;
-       struct nf_conn *ct = NULL, *tmp;
+       struct nf_conn *tmp;
         struct hlist_nulls_node *n;
-       unsigned int i = 0, cnt = 0;
-       int dropped = 0;
-       unsigned int hash, sequence;
+       unsigned int i, hash, sequence;
+       struct nf_conn *ct = NULL;
         spinlock_t *lockp;
+       bool ret = false;
+
+       i = 0;
  
         local_bh_disable();
  restart:
-       sequence = read_seqcount_begin(&net->ct.generation);
-       hash = hash_bucket(_hash, net);
-       for (; i < net->ct.htable_size; i++) {
+       sequence = read_seqcount_begin(&nf_conntrack_generation);
+       for (; i < NF_CT_EVICTION_RANGE; i++) {
+               hash = scale_hash(_hash++);
                 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
                 nf_conntrack_lock(lockp);
-               if (read_seqcount_retry(&net->ct.generation, sequence)) {
+               if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
                         spin_unlock(lockp);
                         goto restart;
                 }
-               hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
-                                        hnnode) {
+               hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
+                                              hnnode) {
                         tmp = nf_ct_tuplehash_to_ctrack(h);
-                       if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
-                           !nf_ct_is_dying(tmp) &&
-                           atomic_inc_not_zero(&tmp->ct_general.use)) {
+
+                       if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+                           !net_eq(nf_ct_net(tmp), net) ||
+                           nf_ct_is_dying(tmp))
+                               continue;
+
+                       if (atomic_inc_not_zero(&tmp->ct_general.use)) {
                                 ct = tmp;
                                 break;
                         }
-                       cnt++;
                 }
  
-               hash = (hash + 1) % net->ct.htable_size;
                 spin_unlock(lockp);
-
-               if (ct || cnt >= NF_CT_EVICTION_RANGE)
+               if (ct)
                         break;
-
         }
+
         local_bh_enable();
  
         if (!ct)
-               return dropped;
+               return false;
  
-       if (del_timer(&ct->timeout)) {
+       /* kill only if in same netns -- might have moved due to
+        * SLAB_DESTROY_BY_RCU rules
+        */
+       if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
                 if (nf_ct_delete(ct, 0, 0)) {
-                       dropped = 1;
                         NF_CT_STAT_INC_ATOMIC(net, early_drop);
+                       ret = true;
                 }
         }
-       nf_ct_put(ct);
-       return dropped;
-}
-
-void init_nf_conntrack_hash_rnd(void)
-{
-       unsigned int rand;
  
-       /*
-        * Why not initialize nf_conntrack_rnd in a "init()" function ?
-        * Because there isn't enough entropy when system initializing,
-        * and we initialize it as late as possible.
-        */
-       do {
-               get_random_bytes(&rand, sizeof(rand));
-       } while (!rand);
-       cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+       nf_ct_put(ct);
+       return ret;
  }
  
  static struct nf_conn *
@@ -838,12 +895,6 @@ __nf_conntrack_alloc(struct net *net,
  {
         struct nf_conn *ct;
  
-       if (unlikely(!nf_conntrack_hash_rnd)) {
-               init_nf_conntrack_hash_rnd();
-               /* recompute the hash as nf_conntrack_hash_rnd is initialized */
-               hash = hash_conntrack_raw(orig);
-       }
-
         /* We don't want any race condition at early drop stage */
         atomic_inc(&net->ct.count);
  
@@ -860,7 +911,7 @@ __nf_conntrack_alloc(struct net *net,
          * Do not use kmem_cache_zalloc(), as this cache uses
          * SLAB_DESTROY_BY_RCU.
          */
-       ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
+       ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
         if (ct == NULL)
                 goto out;
  
@@ -887,7 +938,7 @@ __nf_conntrack_alloc(struct net *net,
         atomic_set(&ct->ct_general.use, 0);
         return ct;
  out_free:
-       kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+       kmem_cache_free(nf_conntrack_cachep, ct);
  out:
         atomic_dec(&net->ct.count);
         return ERR_PTR(-ENOMEM);
@@ -914,7 +965,7 @@ void nf_conntrack_free(struct nf_conn *ct)
  
         nf_ct_ext_destroy(ct);
         nf_ct_ext_free(ct);
-       kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+       kmem_cache_free(nf_conntrack_cachep, ct);
         smp_mb__before_atomic();
         atomic_dec(&net->ct.count);
  }
@@ -1061,7 +1112,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
  
         /* look for tuple match */
         zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-       hash = hash_conntrack_raw(&tuple);
+       hash = hash_conntrack_raw(&tuple, net);
         h = __nf_conntrack_find_get(net, zone, &tuple, hash);
         if (!h) {
                 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1270,17 +1321,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
         }
  
  acct:
-       if (do_acct) {
-               struct nf_conn_acct *acct;
-
-               acct = nf_conn_acct_find(ct);
-               if (acct) {
-                       struct nf_conn_counter *counter = acct->counter;
-
-                       atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
-                       atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
-               }
-       }
+       if (do_acct)
+               nf_ct_acct_update(ct, ctinfo, skb->len);
  }
  EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
  
@@ -1289,18 +1331,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
                        const struct sk_buff *skb,
                        int do_acct)
  {
-       if (do_acct) {
-               struct nf_conn_acct *acct;
-
-               acct = nf_conn_acct_find(ct);
-               if (acct) {
-                       struct nf_conn_counter *counter = acct->counter;
-
-                       atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
-                       atomic64_add(skb->len - skb_network_offset(skb),
-                                    &counter[CTINFO2DIR(ctinfo)].bytes);
-               }
-       }
+       if (do_acct)
+               nf_ct_acct_update(ct, ctinfo, skb->len);
  
         if (del_timer(&ct->timeout)) {
                 ct->timeout.function((unsigned long)ct);
@@ -1396,16 +1428,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
         int cpu;
         spinlock_t *lockp;
  
-       for (; *bucket < net->ct.htable_size; (*bucket)++) {
+       for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
                 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                 local_bh_disable();
                 nf_conntrack_lock(lockp);
-               if (*bucket < net->ct.htable_size) {
-                       hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+               if (*bucket < nf_conntrack_htable_size) {
+                       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
                                 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
                                         continue;
                                 ct = nf_ct_tuplehash_to_ctrack(h);
-                               if (iter(ct, data))
+                               if (net_eq(nf_ct_net(ct), net) &&
+                                   iter(ct, data))
                                         goto found;
                         }
                 }
@@ -1443,6 +1476,9 @@ void nf_ct_iterate_cleanup(struct net *net,
  
         might_sleep();
  
+       if (atomic_read(&net->ct.count) == 0)
+               return;
+
         while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
                 /* Time to push up daises... */
                 if (del_timer(&ct->timeout))
@@ -1494,6 +1530,8 @@ void nf_conntrack_cleanup_end(void)
         while (untrack_refs() > 0)
                 schedule();
  
+       nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
+
  #ifdef CONFIG_NF_CONNTRACK_ZONES
         nf_ct_extend_unregister(&nf_ct_zone_extend);
  #endif
@@ -1544,15 +1582,12 @@ i_see_dead_people:
         }
  
         list_for_each_entry(net, net_exit_list, exit_list) {
-               nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
                 nf_conntrack_proto_pernet_fini(net);
                 nf_conntrack_helper_pernet_fini(net);
                 nf_conntrack_ecache_pernet_fini(net);
                 nf_conntrack_tstamp_pernet_fini(net);
                 nf_conntrack_acct_pernet_fini(net);
                 nf_conntrack_expect_pernet_fini(net);
-               kmem_cache_destroy(net->ct.nf_conntrack_cachep);
-               kfree(net->ct.slabname);
                 free_percpu(net->ct.stat);
                 free_percpu(net->ct.pcpu_lists);
         }
@@ -1607,7 +1642,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
  
         local_bh_disable();
         nf_conntrack_all_lock();
-       write_seqcount_begin(&init_net.ct.generation);
+       write_seqcount_begin(&nf_conntrack_generation);
  
         /* Lookups in the old hash might happen in parallel, which means we
          * might get false negatives during connection lookup. New connections
@@ -1615,26 +1650,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
          * though since that required taking the locks.
          */
  
-       for (i = 0; i < init_net.ct.htable_size; i++) {
-               while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
-                       h = hlist_nulls_entry(init_net.ct.hash[i].first,
-                                       struct nf_conntrack_tuple_hash, hnnode);
+       for (i = 0; i < nf_conntrack_htable_size; i++) {
+               while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+                       h = hlist_nulls_entry(nf_conntrack_hash[i].first,
+                                             struct nf_conntrack_tuple_hash, hnnode);
                         ct = nf_ct_tuplehash_to_ctrack(h);
                         hlist_nulls_del_rcu(&h->hnnode);
-                       bucket = __hash_conntrack(&h->tuple, hashsize);
+                       bucket = __hash_conntrack(nf_ct_net(ct),
+                                                 &h->tuple, hashsize);
                         hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
                 }
         }
-       old_size = init_net.ct.htable_size;
-       old_hash = init_net.ct.hash;
+       old_size = nf_conntrack_htable_size;
+       old_hash = nf_conntrack_hash;
  
-       init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-       init_net.ct.hash = hash;
+       nf_conntrack_hash = hash;
+       nf_conntrack_htable_size = hashsize;
  
-       write_seqcount_end(&init_net.ct.generation);
+       write_seqcount_end(&nf_conntrack_generation);
         nf_conntrack_all_unlock();
         local_bh_enable();
  
+       synchronize_net();
         nf_ct_free_hashtable(old_hash, old_size);
         return 0;
  }
@@ -1655,7 +1692,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
  int nf_conntrack_init_start(void)
  {
         int max_factor = 8;
-       int i, ret, cpu;
+       int ret = -ENOMEM;
+       int i, cpu;
+
+       seqcount_init(&nf_conntrack_generation);
  
         for (i = 0; i < CONNTRACK_LOCKS; i++)
                 spin_lock_init(&nf_conntrack_locks[i]);
@@ -1682,8 +1722,19 @@ int nf_conntrack_init_start(void)
                  * entries. */
                 max_factor = 4;
         }
+
+       nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
+       if (!nf_conntrack_hash)
+               return -ENOMEM;
+
         nf_conntrack_max = max_factor * nf_conntrack_htable_size;
  
+       nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
+                                               sizeof(struct nf_conn), 0,
+                                               SLAB_DESTROY_BY_RCU, NULL);
+       if (!nf_conntrack_cachep)
+               goto err_cachep;
+
         printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
                NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
                nf_conntrack_max);
@@ -1760,6 +1811,9 @@ err_tstamp:
  err_acct:
         nf_conntrack_expect_fini();
  err_expect:
+       kmem_cache_destroy(nf_conntrack_cachep);
+err_cachep:
+       nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
         return ret;
  }
  
@@ -1783,7 +1837,6 @@ int nf_conntrack_init_net(struct net *net)
         int cpu;
  
         atomic_set(&net->ct.count, 0);
-       seqcount_init(&net->ct.generation);
  
         net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
         if (!net->ct.pcpu_lists)
@@ -1801,24 +1854,6 @@ int nf_conntrack_init_net(struct net *net)
         if (!net->ct.stat)
                 goto err_pcpu_lists;
  
-       net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
-       if (!net->ct.slabname)
-               goto err_slabname;
-
-       net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
-                                                       sizeof(struct nf_conn), 0,
-                                                       SLAB_DESTROY_BY_RCU, NULL);
-       if (!net->ct.nf_conntrack_cachep) {
-               printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-               goto err_cache;
-       }
-
-       net->ct.htable_size = nf_conntrack_htable_size;
-       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
-       if (!net->ct.hash) {
-               printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
-               goto err_hash;
-       }
         ret = nf_conntrack_expect_pernet_init(net);
         if (ret < 0)
                 goto err_expect;
@@ -1850,12 +1885,6 @@ err_tstamp:
  err_acct:
         nf_conntrack_expect_pernet_fini(net);
  err_expect:
-       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-err_hash:
-       kmem_cache_destroy(net->ct.nf_conntrack_cachep);
-err_cache:
-       kfree(net->ct.slabname);
-err_slabname:
         free_percpu(net->ct.stat);
  err_pcpu_lists:
         free_percpu(net->ct.pcpu_lists);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c

index 278927a..9e36931 100644 (file)
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -24,6 +24,7 @@
  #include <linux/moduleparam.h>
  #include <linux/export.h>
  #include <net/net_namespace.h>
+#include <net/netns/hash.h>
  
  #include <net/netfilter/nf_conntrack.h>
  #include <net/netfilter/nf_conntrack_core.h>
@@ -35,9 +36,13 @@
  unsigned int nf_ct_expect_hsize __read_mostly;
  EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
  
+struct hlist_head *nf_ct_expect_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
+
  unsigned int nf_ct_expect_max __read_mostly;
  
  static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+static unsigned int nf_ct_expect_hashrnd __read_mostly;
  
  /* nf_conntrack_expect helper functions */
  void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -72,21 +77,32 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
         nf_ct_expect_put(exp);
  }
  
-static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
+static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
  {
-       unsigned int hash;
+       unsigned int hash, seed;
  
-       if (unlikely(!nf_conntrack_hash_rnd)) {
-               init_nf_conntrack_hash_rnd();
-       }
+       get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
+
+       seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
  
         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
-                      (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
+                      (__force __u16)tuple->dst.u.all) ^ seed);
  
         return reciprocal_scale(hash, nf_ct_expect_hsize);
  }
  
+static bool
+nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
+               const struct nf_conntrack_expect *i,
+               const struct nf_conntrack_zone *zone,
+               const struct net *net)
+{
+       return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+              net_eq(net, nf_ct_net(i->master)) &&
+              nf_ct_zone_equal_any(i->master, zone);
+}
+
  struct nf_conntrack_expect *
  __nf_ct_expect_find(struct net *net,
                     const struct nf_conntrack_zone *zone,
@@ -98,10 +114,9 @@ __nf_ct_expect_find(struct net *net,
         if (!net->ct.expect_count)
                 return NULL;
  
-       h = nf_ct_expect_dst_hash(tuple);
-       hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
-               if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
-                   nf_ct_zone_equal_any(i->master, zone))
+       h = nf_ct_expect_dst_hash(net, tuple);
+       hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
+               if (nf_ct_exp_equal(tuple, i, zone, net))
                         return i;
         }
         return NULL;
@@ -139,11 +154,10 @@ nf_ct_find_expectation(struct net *net,
         if (!net->ct.expect_count)
                 return NULL;
  
-       h = nf_ct_expect_dst_hash(tuple);
-       hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
+       h = nf_ct_expect_dst_hash(net, tuple);
+       hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
-                   nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
-                   nf_ct_zone_equal_any(i->master, zone)) {
+                   nf_ct_exp_equal(tuple, i, zone, net)) {
                         exp = i;
                         break;
                 }
@@ -223,6 +237,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
         }
  
         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
+              net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
                nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
  }
  
@@ -232,6 +247,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
         return a->master == b->master && a->class == b->class &&
                nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
                nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+              net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
                nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
  }
  
@@ -342,7 +358,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
         struct nf_conn_help *master_help = nfct_help(exp->master);
         struct nf_conntrack_helper *helper;
         struct net *net = nf_ct_exp_net(exp);
-       unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
+       unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
  
         /* two references : one for hash insert, one for the timer */
         atomic_add(2, &exp->use);
@@ -350,7 +366,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
         hlist_add_head(&exp->lnode, &master_help->expectations);
         master_help->expecting[exp->class]++;
  
-       hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
+       hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
         net->ct.expect_count++;
  
         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
@@ -401,8 +417,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
                 ret = -ESHUTDOWN;
                 goto out;
         }
-       h = nf_ct_expect_dst_hash(&expect->tuple);
-       hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
+       h = nf_ct_expect_dst_hash(net, &expect->tuple);
+       hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
                 if (expect_matches(i, expect)) {
                         if (del_timer(&i->timeout)) {
                                 nf_ct_unlink_expect(i);
@@ -468,12 +484,11 @@ struct ct_expect_iter_state {
  
  static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_expect_iter_state *st = seq->private;
         struct hlist_node *n;
  
         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+               n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
                 if (n)
                         return n;
         }
@@ -483,14 +498,13 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
  static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
                                              struct hlist_node *head)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_expect_iter_state *st = seq->private;
  
         head = rcu_dereference(hlist_next_rcu(head));
         while (head == NULL) {
                 if (++st->bucket >= nf_ct_expect_hsize)
                         return NULL;
-               head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+               head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
         }
         return head;
  }
@@ -623,28 +637,13 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
  
  int nf_conntrack_expect_pernet_init(struct net *net)
  {
-       int err = -ENOMEM;
-
         net->ct.expect_count = 0;
-       net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
-       if (net->ct.expect_hash == NULL)
-               goto err1;
-
-       err = exp_proc_init(net);
-       if (err < 0)
-               goto err2;
-
-       return 0;
-err2:
-       nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
-err1:
-       return err;
+       return exp_proc_init(net);
  }
  
  void nf_conntrack_expect_pernet_fini(struct net *net)
  {
         exp_proc_remove(net);
-       nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
  }
  
  int nf_conntrack_expect_init(void)
@@ -660,6 +659,13 @@ int nf_conntrack_expect_init(void)
                                 0, 0, NULL);
         if (!nf_ct_expect_cachep)
                 return -ENOMEM;
+
+       nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
+       if (!nf_ct_expect_hash) {
+               kmem_cache_destroy(nf_ct_expect_cachep);
+               return -ENOMEM;
+       }
+
         return 0;
  }
  
@@ -667,4 +673,5 @@ void nf_conntrack_expect_fini(void)
  {
         rcu_barrier(); /* Wait for call_rcu() before destroy */
         kmem_cache_destroy(nf_ct_expect_cachep);
+       nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize);
  }
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c

index 3b40ec5..f703adb 100644 (file)
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -38,10 +38,10 @@ unsigned int nf_ct_helper_hsize __read_mostly;
  EXPORT_SYMBOL_GPL(nf_ct_helper_hsize);
  static unsigned int nf_ct_helper_count __read_mostly;
  
-static bool nf_ct_auto_assign_helper __read_mostly = true;
+static bool nf_ct_auto_assign_helper __read_mostly = false;
  module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
  MODULE_PARM_DESC(nf_conntrack_helper,
-                "Enable automatic conntrack helper assignment (default 1)");
+                "Enable automatic conntrack helper assignment (default 0)");
  
  #ifdef CONFIG_SYSCTL
  static struct ctl_table helper_sysctl_table[] = {
@@ -400,7 +400,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
         spin_lock_bh(&nf_conntrack_expect_lock);
         for (i = 0; i < nf_ct_expect_hsize; i++) {
                 hlist_for_each_entry_safe(exp, next,
-                                         &net->ct.expect_hash[i], hnode) {
+                                         &nf_ct_expect_hash[i], hnode) {
                         struct nf_conn_help *help = nfct_help(exp->master);
                         if ((rcu_dereference_protected(
                                         help->helper,
@@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
                 spin_unlock_bh(&pcpu->lock);
         }
         local_bh_disable();
-       for (i = 0; i < net->ct.htable_size; i++) {
+       for (i = 0; i < nf_conntrack_htable_size; i++) {
                 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
-               if (i < net->ct.htable_size) {
-                       hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+               if (i < nf_conntrack_htable_size) {
+                       hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
                                 unhelp(h, me);
                 }
                 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 294a8e2..a18d1ce 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -824,19 +824,22 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
         last = (struct nf_conn *)cb->args[1];
  
         local_bh_disable();
-       for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
+       for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
  restart:
                 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
                 nf_conntrack_lock(lockp);
-               if (cb->args[0] >= net->ct.htable_size) {
+               if (cb->args[0] >= nf_conntrack_htable_size) {
                         spin_unlock(lockp);
                         goto out;
                 }
-               hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
-                                        hnnode) {
+               hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
+                                          hnnode) {
                         if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
                                 continue;
                         ct = nf_ct_tuplehash_to_ctrack(h);
+                       if (!net_eq(net, nf_ct_net(ct)))
+                               continue;
+
                         /* Dump entries of a given L3 protocol number.
                          * If it is not specified, ie. l3proto == 0,
                          * then dump everything. */
@@ -2629,10 +2632,14 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
         last = (struct nf_conntrack_expect *)cb->args[1];
         for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
  restart:
-               hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]],
+               hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]],
                                      hnode) {
                         if (l3proto && exp->tuple.src.l3num != l3proto)
                                 continue;
+
+                       if (!net_eq(nf_ct_net(exp->master), net))
+                               continue;
+
                         if (cb->args[1]) {
                                 if (exp != last)
                                         continue;
@@ -2883,8 +2890,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
                 spin_lock_bh(&nf_conntrack_expect_lock);
                 for (i = 0; i < nf_ct_expect_hsize; i++) {
                         hlist_for_each_entry_safe(exp, next,
-                                                 &net->ct.expect_hash[i],
+                                                 &nf_ct_expect_hash[i],
                                                   hnode) {
+
+                               if (!net_eq(nf_ct_exp_net(exp), net))
+                                       continue;
+
                                 m_help = nfct_help(exp->master);
                                 if (!strcmp(m_help->helper->name, name) &&
                                     del_timer(&exp->timeout)) {
@@ -2901,8 +2912,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
                 spin_lock_bh(&nf_conntrack_expect_lock);
                 for (i = 0; i < nf_ct_expect_hsize; i++) {
                         hlist_for_each_entry_safe(exp, next,
-                                                 &net->ct.expect_hash[i],
+                                                 &nf_ct_expect_hash[i],
                                                   hnode) {
+
+                               if (!net_eq(nf_ct_exp_net(exp), net))
+                                       continue;
+
                                 if (del_timer(&exp->timeout)) {
                                         nf_ct_unlink_expect_report(exp,
                                                         NETLINK_CB(skb).portid,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c

index 478f92f..4fd0405 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
         .l3proto                = PF_INET,
         .l4proto                = IPPROTO_UDP,
         .name                   = "udp",
+       .allow_clash            = true,
         .pkt_to_tuple           = udp_pkt_to_tuple,
         .invert_tuple           = udp_invert_tuple,
         .print_tuple            = udp_print_tuple,
@@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
         .l3proto                = PF_INET6,
         .l4proto                = IPPROTO_UDP,
         .name                   = "udp",
+       .allow_clash            = true,
         .pkt_to_tuple           = udp_pkt_to_tuple,
         .invert_tuple           = udp_invert_tuple,
         .print_tuple            = udp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c

index 1ac8ee1..9d692f5 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
         .l3proto                = PF_INET,
         .l4proto                = IPPROTO_UDPLITE,
         .name                   = "udplite",
+       .allow_clash            = true,
         .pkt_to_tuple           = udplite_pkt_to_tuple,
         .invert_tuple           = udplite_invert_tuple,
         .print_tuple            = udplite_print_tuple,
@@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
         .l3proto                = PF_INET6,
         .l4proto                = IPPROTO_UDPLITE,
         .name                   = "udplite",
+       .allow_clash            = true,
         .pkt_to_tuple           = udplite_pkt_to_tuple,
         .invert_tuple           = udplite_invert_tuple,
         .print_tuple            = udplite_print_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c

index 0f1a45b..f87e84e 100644 (file)
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -54,14 +54,13 @@ struct ct_iter_state {
  
  static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_iter_state *st = seq->private;
         struct hlist_nulls_node *n;
  
         for (st->bucket = 0;
-            st->bucket < net->ct.htable_size;
+            st->bucket < nf_conntrack_htable_size;
              st->bucket++) {
-               n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+               n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
                 if (!is_a_nulls(n))
                         return n;
         }
@@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
  static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
                                       struct hlist_nulls_node *head)
  {
-       struct net *net = seq_file_net(seq);
         struct ct_iter_state *st = seq->private;
  
         head = rcu_dereference(hlist_nulls_next_rcu(head));
         while (is_a_nulls(head)) {
                 if (likely(get_nulls_value(head) == st->bucket)) {
-                       if (++st->bucket >= net->ct.htable_size)
+                       if (++st->bucket >= nf_conntrack_htable_size)
                                 return NULL;
                 }
                 head = rcu_dereference(
                                 hlist_nulls_first_rcu(
-                                       &net->ct.hash[st->bucket]));
+                                       &nf_conntrack_hash[st->bucket]));
         }
         return head;
  }
@@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
         },
         {
                 .procname       = "nf_conntrack_buckets",
-               .data           = &init_net.ct.htable_size,
+               .data           = &nf_conntrack_htable_size,
                 .maxlen         = sizeof(unsigned int),
                 .mode           = 0444,
                 .proc_handler   = proc_dointvec,
@@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
                 goto out_kmemdup;
  
         table[1].data = &net->ct.count;
-       table[2].data = &net->ct.htable_size;
         table[3].data = &net->ct.sysctl_checksum;
         table[4].data = &net->ct.sysctl_log_invalid;
  
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index 06a9f45..6877a39 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -38,6 +38,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
  static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
                                                 __read_mostly;
  
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
  
  inline const struct nf_nat_l3proto *
  __nf_nat_l3proto_find(u8 family)
@@ -118,15 +121,17 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
  
  /* We keep an extra hash for each conntrack, for fast searching. */
  static inline unsigned int
-hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
  {
         unsigned int hash;
  
+       get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+
         /* Original src, to ensure we map it consistently if poss. */
         hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
-                     tuple->dst.protonum ^ nf_conntrack_hash_rnd);
+                     tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
  
-       return reciprocal_scale(hash, net->ct.nat_htable_size);
+       return reciprocal_scale(hash, nf_nat_htable_size);
  }
  
  /* Is this tuple already taken? (not by us) */
@@ -196,9 +201,10 @@ find_appropriate_src(struct net *net,
         const struct nf_conn_nat *nat;
         const struct nf_conn *ct;
  
-       hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
+       hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
                 ct = nat->ct;
                 if (same_src(ct, tuple) &&
+                   net_eq(net, nf_ct_net(ct)) &&
                     nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
                         /* Copy source part from reply tuple. */
                         nf_ct_invert_tuplepr(result,
@@ -431,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct,
                 nat = nfct_nat(ct);
                 nat->ct = ct;
                 hlist_add_head_rcu(&nat->bysource,
-                                  &net->ct.nat_bysource[srchash]);
+                                  &nf_nat_bysource[srchash]);
                 spin_unlock_bh(&nf_nat_lock);
         }
  
@@ -819,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
  }
  #endif
  
-static int __net_init nf_nat_net_init(struct net *net)
-{
-       /* Leave them the same for the moment. */
-       net->ct.nat_htable_size = net->ct.htable_size;
-       net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
-       if (!net->ct.nat_bysource)
-               return -ENOMEM;
-       return 0;
-}
-
  static void __net_exit nf_nat_net_exit(struct net *net)
  {
         struct nf_nat_proto_clean clean = {};
  
         nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
-       synchronize_rcu();
-       nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
  }
  
  static struct pernet_operations nf_nat_net_ops = {
-       .init = nf_nat_net_init,
         .exit = nf_nat_net_exit,
  };
  
@@ -852,8 +845,16 @@ static int __init nf_nat_init(void)
  {
         int ret;
  
+       /* Leave them the same for the moment. */
+       nf_nat_htable_size = nf_conntrack_htable_size;
+
+       nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+       if (!nf_nat_bysource)
+               return -ENOMEM;
+
         ret = nf_ct_extend_register(&nat_extend);
         if (ret < 0) {
+               nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
                 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
                 return ret;
         }
@@ -877,6 +878,7 @@ static int __init nf_nat_init(void)
         return 0;
  
   cleanup_extend:
+       nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
         nf_ct_extend_unregister(&nat_extend);
         return ret;
  }
@@ -895,6 +897,7 @@ static void __exit nf_nat_cleanup(void)
         for (i = 0; i < NFPROTO_NUMPROTO; i++)
                 kfree(nf_nat_l4protos[i]);
         synchronize_net();
+       nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
  }
  
  MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index 7a85a9d..4d292b9 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2317,7 +2317,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
  static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
         [NFTA_SET_TABLE]                = { .type = NLA_STRING },
         [NFTA_SET_NAME]                 = { .type = NLA_STRING,
-                                           .len = IFNAMSIZ - 1 },
+                                           .len = NFT_SET_MAXNAMELEN - 1 },
         [NFTA_SET_FLAGS]                = { .type = NLA_U32 },
         [NFTA_SET_KEY_TYPE]             = { .type = NLA_U32 },
         [NFTA_SET_KEY_LEN]              = { .type = NLA_U32 },
@@ -2401,7 +2401,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
         unsigned long *inuse;
         unsigned int n = 0, min = 0;
  
-       p = strnchr(name, IFNAMSIZ, '%');
+       p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
         if (p != NULL) {
                 if (p[1] != 'd' || strchr(p + 2, '%'))
                         return -EINVAL;
@@ -2696,7 +2696,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
         struct nft_table *table;
         struct nft_set *set;
         struct nft_ctx ctx;
-       char name[IFNAMSIZ];
+       char name[NFT_SET_MAXNAMELEN];
         unsigned int size;
         bool create;
         u64 timeout;
@@ -3375,6 +3375,22 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem)
  }
  EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
  
+static int nft_setelem_parse_flags(const struct nft_set *set,
+                                  const struct nlattr *attr, u32 *flags)
+{
+       if (attr == NULL)
+               return 0;
+
+       *flags = ntohl(nla_get_be32(attr));
+       if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
+               return -EINVAL;
+       if (!(set->flags & NFT_SET_INTERVAL) &&
+           *flags & NFT_SET_ELEM_INTERVAL_END)
+               return -EINVAL;
+
+       return 0;
+}
+
  static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                             const struct nlattr *attr)
  {
@@ -3388,8 +3404,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
         struct nft_data data;
         enum nft_registers dreg;
         struct nft_trans *trans;
+       u32 flags = 0;
         u64 timeout;
-       u32 flags;
         u8 ulen;
         int err;
  
@@ -3403,17 +3419,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
  
         nft_set_ext_prepare(&tmpl);
  
-       flags = 0;
-       if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
-               flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
-               if (flags & ~NFT_SET_ELEM_INTERVAL_END)
-                       return -EINVAL;
-               if (!(set->flags & NFT_SET_INTERVAL) &&
-                   flags & NFT_SET_ELEM_INTERVAL_END)
-                       return -EINVAL;
-               if (flags != 0)
-                       nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
-       }
+       err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+       if (err < 0)
+               return err;
+       if (flags != 0)
+               nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
  
         if (set->flags & NFT_SET_MAP) {
                 if (nla[NFTA_SET_ELEM_DATA] == NULL &&
@@ -3582,9 +3592,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
                            const struct nlattr *attr)
  {
         struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+       struct nft_set_ext_tmpl tmpl;
         struct nft_data_desc desc;
         struct nft_set_elem elem;
+       struct nft_set_ext *ext;
         struct nft_trans *trans;
+       u32 flags = 0;
+       void *priv;
         int err;
  
         err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -3596,6 +3610,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
         if (nla[NFTA_SET_ELEM_KEY] == NULL)
                 goto err1;
  
+       nft_set_ext_prepare(&tmpl);
+
+       err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+       if (err < 0)
+               return err;
+       if (flags != 0)
+               nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+
         err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
                             nla[NFTA_SET_ELEM_KEY]);
         if (err < 0)
@@ -3605,24 +3627,40 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
         if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
                 goto err2;
  
+       nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len);
+
+       err = -ENOMEM;
+       elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
+                                     GFP_KERNEL);
+       if (elem.priv == NULL)
+               goto err2;
+
+       ext = nft_set_elem_ext(set, elem.priv);
+       if (flags)
+               *nft_set_ext_flags(ext) = flags;
+
         trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
         if (trans == NULL) {
                 err = -ENOMEM;
-               goto err2;
+               goto err3;
         }
  
-       elem.priv = set->ops->deactivate(set, &elem);
-       if (elem.priv == NULL) {
+       priv = set->ops->deactivate(set, &elem);
+       if (priv == NULL) {
                 err = -ENOENT;
-               goto err3;
+               goto err4;
         }
+       kfree(elem.priv);
+       elem.priv = priv;
  
         nft_trans_elem(trans) = elem;
         list_add_tail(&trans->list, &ctx->net->nft.commit_list);
         return 0;
  
-err3:
+err4:
         kfree(trans);
+err3:
+       kfree(elem.priv);
  err2:
         nft_data_uninit(&elem.key.val, desc.type);
  err1:
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c

index 2671b9d..3c84f14 100644 (file)
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -306,10 +306,10 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
         int i;
  
         local_bh_disable();
-       for (i = 0; i < net->ct.htable_size; i++) {
+       for (i = 0; i < nf_conntrack_htable_size; i++) {
                 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
-               if (i < net->ct.htable_size) {
-                       hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+               if (i < nf_conntrack_htable_size) {
+                       hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
                                 untimeout(h, timeout);
                 }
                 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c

index 25998fa..137e308 100644 (file)
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -197,6 +197,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
                         nf_conntrack_event_cache(IPCT_MARK, ct);
                 }
                 break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+       case NFT_CT_LABELS:
+               nf_connlabels_replace(ct,
+                                     &regs->data[priv->sreg],
+                                     &regs->data[priv->sreg],
+                                     NF_CT_LABELS_MAX_SIZE / sizeof(u32));
+               break;
  #endif
         default:
                 break;
@@ -364,6 +372,16 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
         case NFT_CT_MARK:
                 len = FIELD_SIZEOF(struct nf_conn, mark);
                 break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+       case NFT_CT_LABELS:
+               if (tb[NFTA_CT_DIRECTION])
+                       return -EINVAL;
+               len = NF_CT_LABELS_MAX_SIZE;
+               err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
+               if (err)
+                       return err;
+               break;
  #endif
         default:
                 return -EOPNOTSUPP;
@@ -384,6 +402,18 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
  static void nft_ct_destroy(const struct nft_ctx *ctx,
                            const struct nft_expr *expr)
  {
+       struct nft_ct *priv = nft_expr_priv(expr);
+
+       switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+       case NFT_CT_LABELS:
+               nf_connlabels_put(ctx->net);
+               break;
+#endif
+       default:
+               break;
+       }
+
         nft_ct_l3proto_module_put(ctx->afi->family);
  }
  
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c

index 1c30f41..f762094 100644 (file)
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -29,6 +29,17 @@ struct nft_rbtree_elem {
         struct nft_set_ext      ext;
  };
  
+static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
+{
+       return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+              (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
+}
+
+static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+                            const struct nft_rbtree_elem *interval)
+{
+       return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+}
  
  static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
                               const struct nft_set_ext **ext)
@@ -37,6 +48,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
         const struct nft_rbtree_elem *rbe, *interval = NULL;
         const struct rb_node *parent;
         u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+       const void *this;
         int d;
  
         spin_lock_bh(&nft_rbtree_lock);
@@ -44,9 +56,16 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
         while (parent != NULL) {
                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
  
-               d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+               this = nft_set_ext_key(&rbe->ext);
+               d = memcmp(this, key, set->klen);
                 if (d < 0) {
                         parent = parent->rb_left;
+                       /* In case of adjacent ranges, we always see the high
+                        * part of the range in first place, before the low one.
+                        * So don't update interval if the keys are equal.
+                        */
+                       if (interval && nft_rbtree_equal(set, this, interval))
+                               continue;
                         interval = rbe;
                 } else if (d > 0)
                         parent = parent->rb_right;
@@ -56,9 +75,7 @@ found:
                                 parent = parent->rb_left;
                                 continue;
                         }
-                       if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
-                           *nft_set_ext_flags(&rbe->ext) &
-                           NFT_SET_ELEM_INTERVAL_END)
+                       if (nft_rbtree_interval_end(rbe))
                                 goto out;
                         spin_unlock_bh(&nft_rbtree_lock);
  
@@ -98,9 +115,16 @@ static int __nft_rbtree_insert(const struct nft_set *set,
                 else if (d > 0)
                         p = &parent->rb_right;
                 else {
-                       if (nft_set_elem_active(&rbe->ext, genmask))
-                               return -EEXIST;
-                       p = &parent->rb_left;
+                       if (nft_set_elem_active(&rbe->ext, genmask)) {
+                               if (nft_rbtree_interval_end(rbe) &&
+                                   !nft_rbtree_interval_end(new))
+                                       p = &parent->rb_left;
+                               else if (!nft_rbtree_interval_end(rbe) &&
+                                        nft_rbtree_interval_end(new))
+                                       p = &parent->rb_right;
+                               else
+                                       return -EEXIST;
+                       }
                 }
         }
         rb_link_node(&new->node, parent, p);
@@ -145,7 +169,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
  {
         const struct nft_rbtree *priv = nft_set_priv(set);
         const struct rb_node *parent = priv->root.rb_node;
-       struct nft_rbtree_elem *rbe;
+       struct nft_rbtree_elem *rbe, *this = elem->priv;
         u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
         int d;
  
@@ -163,6 +187,15 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
                                 parent = parent->rb_left;
                                 continue;
                         }
+                       if (nft_rbtree_interval_end(rbe) &&
+                           !nft_rbtree_interval_end(this)) {
+                               parent = parent->rb_left;
+                               continue;
+                       } else if (!nft_rbtree_interval_end(rbe) &&
+                                  nft_rbtree_interval_end(this)) {
+                               parent = parent->rb_right;
+                               continue;
+                       }
                         nft_set_elem_change_active(set, &rbe->ext);
                         return rbe;
                 }
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c

index 9741a76..9f0bc49 100644 (file)
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -439,20 +439,12 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
         u8 protonum;
  
         l3proto = __nf_ct_l3proto_find(l3num);
-       if (!l3proto) {
-               pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
-               return NULL;
-       }
         if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
                                  &protonum) <= 0) {
                 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
                 return NULL;
         }
         l4proto = __nf_ct_l4proto_find(l3num, protonum);
-       if (!l4proto) {
-               pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
-               return NULL;
-       }
         if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
                              protonum, net, &tuple, l3proto, l4proto)) {
                 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
author	David S. Miller <davem@davemloft.net>
	Mon, 9 May 2016 19:02:58 +0000 (15:02 -0400)
committer	David S. Miller <davem@davemloft.net>
	Mon, 9 May 2016 19:02:58 +0000 (15:02 -0400)
include/linux/netfilter/x_tables.h		patch \| blob \| history
include/net/ip_vs.h		patch \| blob \| history
include/net/netfilter/nf_conntrack.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_core.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_expect.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_l4proto.h		patch \| blob \| history
include/net/netfilter/nf_tables.h		patch \| blob \| history
include/net/netns/conntrack.h		patch \| blob \| history
include/uapi/linux/netfilter/nf_tables.h		patch \| blob \| history
net/ipv4/netfilter/arp_tables.c		patch \| blob \| history
net/ipv4/netfilter/ip_tables.c		patch \| blob \| history
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c		patch \| blob \| history
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c		patch \| blob \| history
net/ipv6/netfilter/ip6_tables.c		patch \| blob \| history
net/ipv6/netfilter/ip6t_SYNPROXY.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_conn.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_core.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_ctl.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_nfct.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_pe_sip.c		patch \| blob \| history
net/netfilter/nf_conntrack_core.c		patch \| blob \| history
net/netfilter/nf_conntrack_expect.c		patch \| blob \| history
net/netfilter/nf_conntrack_helper.c		patch \| blob \| history
net/netfilter/nf_conntrack_netlink.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_udp.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_udplite.c		patch \| blob \| history
net/netfilter/nf_conntrack_standalone.c		patch \| blob \| history
net/netfilter/nf_nat_core.c		patch \| blob \| history
net/netfilter/nf_tables_api.c		patch \| blob \| history
net/netfilter/nfnetlink_cttimeout.c		patch \| blob \| history
net/netfilter/nft_ct.c		patch \| blob \| history
net/netfilter/nft_rbtree.c		patch \| blob \| history
net/openvswitch/conntrack.c		patch \| blob \| history