inet: frags: break the 2GB limit for frags storage
authorEric Dumazet <edumazet@google.com>
Sat, 31 Mar 2018 19:58:53 +0000 (12:58 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 1 Apr 2018 03:25:39 +0000 (23:25 -0400)
Some users are willing to provision huge amounts of memory to be able
to perform reassembly reasonnably well under pressure.

Current memory tracking is using one atomic_t and integers.

Switch to atomic_long_t so that 64bit arches can use more than 2GB,
without any cost for 32bit arches.

Note that this patch avoids an overflow error, if high_thresh was set
to ~2GB, since this test in inet_frag_alloc() was never true :

if (... || frag_mem_limit(nf) > nf->high_thresh)

Tested:

$ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh

<frag DDOS>

$ grep FRAG /proc/net/sockstat
FRAG: inuse 14705885 memory 16000002880

$ nstat -n ; sleep 1 ; nstat | grep Reas
IpReasmReqds                    3317150            0.0
IpReasmFails                    3317112            0.0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ip-sysctl.txt
include/net/inet_frag.h
net/ieee802154/6lowpan/reassembly.c
net/ipv4/ip_fragment.c
net/ipv4/proc.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/proc.c
net/ipv6/reassembly.c

index 6f2a367..5dc1a04 100644 (file)
@@ -133,10 +133,10 @@ min_adv_mss - INTEGER
 
 IP Fragmentation:
 
-ipfrag_high_thresh - INTEGER
+ipfrag_high_thresh - LONG INTEGER
        Maximum memory used to reassemble IP fragments.
 
-ipfrag_low_thresh - INTEGER
+ipfrag_low_thresh - LONG INTEGER
        (Obsolete since linux-4.17)
        Maximum memory used to reassemble IP fragments before the kernel
        begins to remove incomplete fragment queues to free up resources.
index 95e353e..a52e727 100644 (file)
@@ -8,11 +8,11 @@ struct netns_frags {
        struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
 
        /* Keep atomic mem on separate cachelines in structs that include it */
-       atomic_t                mem ____cacheline_aligned_in_smp;
+       atomic_long_t           mem ____cacheline_aligned_in_smp;
        /* sysctls */
+       long                    high_thresh;
+       long                    low_thresh;
        int                     timeout;
-       int                     high_thresh;
-       int                     low_thresh;
        int                     max_dist;
        struct inet_frags       *f;
 };
@@ -102,7 +102,7 @@ void inet_frags_fini(struct inet_frags *);
 
 static inline int inet_frags_init_net(struct netns_frags *nf)
 {
-       atomic_set(&nf->mem, 0);
+       atomic_long_set(&nf->mem, 0);
        return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
 }
 void inet_frags_exit_net(struct netns_frags *nf);
@@ -119,19 +119,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
 
 /* Memory Tracking Functions. */
 
-static inline int frag_mem_limit(struct netns_frags *nf)
+static inline long frag_mem_limit(const struct netns_frags *nf)
 {
-       return atomic_read(&nf->mem);
+       return atomic_long_read(&nf->mem);
 }
 
-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
 {
-       atomic_sub(i, &nf->mem);
+       atomic_long_sub(val, &nf->mem);
 }
 
-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
 {
-       atomic_add(i, &nf->mem);
+       atomic_long_add(val, &nf->mem);
 }
 
 /* RFC 3168 support :
index 1aec71a..44f148a 100644 (file)
@@ -411,23 +411,23 @@ err:
 }
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
        {
                .procname       = "6lowpanfrag_high_thresh",
                .data           = &init_net.ieee802154_lowpan.frags.high_thresh,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &init_net.ieee802154_lowpan.frags.low_thresh
        },
        {
                .procname       = "6lowpanfrag_low_thresh",
                .data           = &init_net.ieee802154_lowpan.frags.low_thresh,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &zero,
                .extra2         = &init_net.ieee802154_lowpan.frags.high_thresh
        },
index b036622..053869f 100644 (file)
@@ -678,23 +678,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 EXPORT_SYMBOL(ip_check_defrag);
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table ip4_frags_ns_ctl_table[] = {
        {
                .procname       = "ipfrag_high_thresh",
                .data           = &init_net.ipv4.frags.high_thresh,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &init_net.ipv4.frags.low_thresh
        },
        {
                .procname       = "ipfrag_low_thresh",
                .data           = &init_net.ipv4.frags.low_thresh,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &zero,
                .extra2         = &init_net.ipv4.frags.high_thresh
        },
index aacfce0..a058de6 100644 (file)
@@ -71,7 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
                   sock_prot_inuse_get(net, &udplite_prot));
        seq_printf(seq, "RAW: inuse %d\n",
                   sock_prot_inuse_get(net, &raw_prot));
-       seq_printf(seq,  "FRAG: inuse %u memory %u\n",
+       seq_printf(seq,  "FRAG: inuse %u memory %lu\n",
                   atomic_read(&net->ipv4.frags.rhashtable.nelems),
                   frag_mem_limit(&net->ipv4.frags));
        return 0;
index d866412..603a395 100644 (file)
@@ -63,7 +63,7 @@ struct nf_ct_frag6_skb_cb
 static struct inet_frags nf_frags;
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
        {
@@ -76,18 +76,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
        {
                .procname       = "nf_conntrack_frag6_low_thresh",
                .data           = &init_net.nf_frag.frags.low_thresh,
-               .maxlen         = sizeof(unsigned int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &zero,
                .extra2         = &init_net.nf_frag.frags.high_thresh
        },
        {
                .procname       = "nf_conntrack_frag6_high_thresh",
                .data           = &init_net.nf_frag.frags.high_thresh,
-               .maxlen         = sizeof(unsigned int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &init_net.nf_frag.frags.low_thresh
        },
        { }
index 8befeb9..a85f7e0 100644 (file)
@@ -47,7 +47,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
                        sock_prot_inuse_get(net, &udplitev6_prot));
        seq_printf(seq, "RAW6: inuse %d\n",
                       sock_prot_inuse_get(net, &rawv6_prot));
-       seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+       seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
                   atomic_read(&net->ipv6.frags.rhashtable.nelems),
                   frag_mem_limit(&net->ipv6.frags));
        return 0;
index 2a77fda..905a8ae 100644 (file)
@@ -552,15 +552,15 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
        {
                .procname       = "ip6frag_high_thresh",
                .data           = &init_net.ipv6.frags.high_thresh,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_doulongvec_minmax,
                .extra1         = &init_net.ipv6.frags.low_thresh
        },
        {
                .procname       = "ip6frag_low_thresh",
                .data           = &init_net.ipv6.frags.low_thresh,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &zero,