Merge tag 'mips_fixes_4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan...
[linux-2.6-microblaze.git] / net / ipv4 / sysctl_net_ipv4.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
4  *
5  * Begun April 1, 1996, Mike Shaver.
6  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
7  */
8
9 #include <linux/mm.h>
10 #include <linux/module.h>
11 #include <linux/sysctl.h>
12 #include <linux/igmp.h>
13 #include <linux/inetdevice.h>
14 #include <linux/seqlock.h>
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <linux/nsproxy.h>
18 #include <linux/swap.h>
19 #include <net/snmp.h>
20 #include <net/icmp.h>
21 #include <net/ip.h>
22 #include <net/route.h>
23 #include <net/tcp.h>
24 #include <net/udp.h>
25 #include <net/cipso_ipv4.h>
26 #include <net/inet_frag.h>
27 #include <net/ping.h>
28 #include <net/protocol.h>
29
30 static int zero;
31 static int one = 1;
32 static int four = 4;
33 static int thousand = 1000;
34 static int gso_max_segs = GSO_MAX_SEGS;
35 static int tcp_retr1_max = 255;
36 static int ip_local_port_range_min[] = { 1, 1 };
37 static int ip_local_port_range_max[] = { 65535, 65535 };
38 static int tcp_adv_win_scale_min = -31;
39 static int tcp_adv_win_scale_max = 31;
40 static int ip_privileged_port_min;
41 static int ip_privileged_port_max = 65535;
42 static int ip_ttl_min = 1;
43 static int ip_ttl_max = 255;
44 static int tcp_syn_retries_min = 1;
45 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
46 static int ip_ping_group_range_min[] = { 0, 0 };
47 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
48
49 /* obsolete */
50 static int sysctl_tcp_low_latency __read_mostly;
51
52 /* Update system visible IP port range */
53 static void set_local_port_range(struct net *net, int range[2])
54 {
55         bool same_parity = !((range[0] ^ range[1]) & 1);
56
57         write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
58         if (same_parity && !net->ipv4.ip_local_ports.warned) {
59                 net->ipv4.ip_local_ports.warned = true;
60                 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
61         }
62         net->ipv4.ip_local_ports.range[0] = range[0];
63         net->ipv4.ip_local_ports.range[1] = range[1];
64         write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
65 }
66
67 /* Validate changes from /proc interface. */
68 static int ipv4_local_port_range(struct ctl_table *table, int write,
69                                  void __user *buffer,
70                                  size_t *lenp, loff_t *ppos)
71 {
72         struct net *net =
73                 container_of(table->data, struct net, ipv4.ip_local_ports.range);
74         int ret;
75         int range[2];
76         struct ctl_table tmp = {
77                 .data = &range,
78                 .maxlen = sizeof(range),
79                 .mode = table->mode,
80                 .extra1 = &ip_local_port_range_min,
81                 .extra2 = &ip_local_port_range_max,
82         };
83
84         inet_get_local_port_range(net, &range[0], &range[1]);
85
86         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
87
88         if (write && ret == 0) {
89                 /* Ensure that the upper limit is not smaller than the lower,
90                  * and that the lower does not encroach upon the privileged
91                  * port limit.
92                  */
93                 if ((range[1] < range[0]) ||
94                     (range[0] < net->ipv4.sysctl_ip_prot_sock))
95                         ret = -EINVAL;
96                 else
97                         set_local_port_range(net, range);
98         }
99
100         return ret;
101 }
102
103 /* Validate changes from /proc interface. */
104 static int ipv4_privileged_ports(struct ctl_table *table, int write,
105                                 void __user *buffer, size_t *lenp, loff_t *ppos)
106 {
107         struct net *net = container_of(table->data, struct net,
108             ipv4.sysctl_ip_prot_sock);
109         int ret;
110         int pports;
111         int range[2];
112         struct ctl_table tmp = {
113                 .data = &pports,
114                 .maxlen = sizeof(pports),
115                 .mode = table->mode,
116                 .extra1 = &ip_privileged_port_min,
117                 .extra2 = &ip_privileged_port_max,
118         };
119
120         pports = net->ipv4.sysctl_ip_prot_sock;
121
122         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
123
124         if (write && ret == 0) {
125                 inet_get_local_port_range(net, &range[0], &range[1]);
126                 /* Ensure that the local port range doesn't overlap with the
127                  * privileged port range.
128                  */
129                 if (range[0] < pports)
130                         ret = -EINVAL;
131                 else
132                         net->ipv4.sysctl_ip_prot_sock = pports;
133         }
134
135         return ret;
136 }
137
138 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
139 {
140         kgid_t *data = table->data;
141         struct net *net =
142                 container_of(table->data, struct net, ipv4.ping_group_range.range);
143         unsigned int seq;
144         do {
145                 seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
146
147                 *low = data[0];
148                 *high = data[1];
149         } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
150 }
151
152 /* Update system visible IP port range */
153 static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
154 {
155         kgid_t *data = table->data;
156         struct net *net =
157                 container_of(table->data, struct net, ipv4.ping_group_range.range);
158         write_seqlock(&net->ipv4.ping_group_range.lock);
159         data[0] = low;
160         data[1] = high;
161         write_sequnlock(&net->ipv4.ping_group_range.lock);
162 }
163
164 /* Validate changes from /proc interface. */
165 static int ipv4_ping_group_range(struct ctl_table *table, int write,
166                                  void __user *buffer,
167                                  size_t *lenp, loff_t *ppos)
168 {
169         struct user_namespace *user_ns = current_user_ns();
170         int ret;
171         gid_t urange[2];
172         kgid_t low, high;
173         struct ctl_table tmp = {
174                 .data = &urange,
175                 .maxlen = sizeof(urange),
176                 .mode = table->mode,
177                 .extra1 = &ip_ping_group_range_min,
178                 .extra2 = &ip_ping_group_range_max,
179         };
180
181         inet_get_ping_group_range_table(table, &low, &high);
182         urange[0] = from_kgid_munged(user_ns, low);
183         urange[1] = from_kgid_munged(user_ns, high);
184         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
185
186         if (write && ret == 0) {
187                 low = make_kgid(user_ns, urange[0]);
188                 high = make_kgid(user_ns, urange[1]);
189                 if (!gid_valid(low) || !gid_valid(high) ||
190                     (urange[1] < urange[0]) || gid_lt(high, low)) {
191                         low = make_kgid(&init_user_ns, 1);
192                         high = make_kgid(&init_user_ns, 0);
193                 }
194                 set_ping_group_range(table, low, high);
195         }
196
197         return ret;
198 }
199
200 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
201                                        void __user *buffer, size_t *lenp, loff_t *ppos)
202 {
203         char val[TCP_CA_NAME_MAX];
204         struct ctl_table tbl = {
205                 .data = val,
206                 .maxlen = TCP_CA_NAME_MAX,
207         };
208         int ret;
209
210         tcp_get_default_congestion_control(val);
211
212         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
213         if (write && ret == 0)
214                 ret = tcp_set_default_congestion_control(val);
215         return ret;
216 }
217
218 static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
219                                                  int write,
220                                                  void __user *buffer, size_t *lenp,
221                                                  loff_t *ppos)
222 {
223         struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
224         int ret;
225
226         tbl.data = kmalloc(tbl.maxlen, GFP_USER);
227         if (!tbl.data)
228                 return -ENOMEM;
229         tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
230         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
231         kfree(tbl.data);
232         return ret;
233 }
234
235 static int proc_allowed_congestion_control(struct ctl_table *ctl,
236                                            int write,
237                                            void __user *buffer, size_t *lenp,
238                                            loff_t *ppos)
239 {
240         struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
241         int ret;
242
243         tbl.data = kmalloc(tbl.maxlen, GFP_USER);
244         if (!tbl.data)
245                 return -ENOMEM;
246
247         tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
248         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
249         if (write && ret == 0)
250                 ret = tcp_set_allowed_congestion_control(tbl.data);
251         kfree(tbl.data);
252         return ret;
253 }
254
255 static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
256                                  void __user *buffer, size_t *lenp,
257                                  loff_t *ppos)
258 {
259         struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
260         struct tcp_fastopen_context *ctxt;
261         int ret;
262         u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
263
264         tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
265         if (!tbl.data)
266                 return -ENOMEM;
267
268         rcu_read_lock();
269         ctxt = rcu_dereference(tcp_fastopen_ctx);
270         if (ctxt)
271                 memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
272         else
273                 memset(user_key, 0, sizeof(user_key));
274         rcu_read_unlock();
275
276         snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
277                 user_key[0], user_key[1], user_key[2], user_key[3]);
278         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
279
280         if (write && ret == 0) {
281                 if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
282                            user_key + 2, user_key + 3) != 4) {
283                         ret = -EINVAL;
284                         goto bad_key;
285                 }
286                 /* Generate a dummy secret but don't publish it. This
287                  * is needed so we don't regenerate a new key on the
288                  * first invocation of tcp_fastopen_cookie_gen
289                  */
290                 tcp_fastopen_init_key_once(false);
291                 tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
292         }
293
294 bad_key:
295         pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
296                user_key[0], user_key[1], user_key[2], user_key[3],
297                (char *)tbl.data, ret);
298         kfree(tbl.data);
299         return ret;
300 }
301
302 static void proc_configure_early_demux(int enabled, int protocol)
303 {
304         struct net_protocol *ipprot;
305 #if IS_ENABLED(CONFIG_IPV6)
306         struct inet6_protocol *ip6prot;
307 #endif
308
309         rcu_read_lock();
310
311         ipprot = rcu_dereference(inet_protos[protocol]);
312         if (ipprot)
313                 ipprot->early_demux = enabled ? ipprot->early_demux_handler :
314                                                 NULL;
315
316 #if IS_ENABLED(CONFIG_IPV6)
317         ip6prot = rcu_dereference(inet6_protos[protocol]);
318         if (ip6prot)
319                 ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
320                                                  NULL;
321 #endif
322         rcu_read_unlock();
323 }
324
325 static int proc_tcp_early_demux(struct ctl_table *table, int write,
326                                 void __user *buffer, size_t *lenp, loff_t *ppos)
327 {
328         int ret = 0;
329
330         ret = proc_dointvec(table, write, buffer, lenp, ppos);
331
332         if (write && !ret) {
333                 int enabled = init_net.ipv4.sysctl_tcp_early_demux;
334
335                 proc_configure_early_demux(enabled, IPPROTO_TCP);
336         }
337
338         return ret;
339 }
340
341 static int proc_udp_early_demux(struct ctl_table *table, int write,
342                                 void __user *buffer, size_t *lenp, loff_t *ppos)
343 {
344         int ret = 0;
345
346         ret = proc_dointvec(table, write, buffer, lenp, ppos);
347
348         if (write && !ret) {
349                 int enabled = init_net.ipv4.sysctl_udp_early_demux;
350
351                 proc_configure_early_demux(enabled, IPPROTO_UDP);
352         }
353
354         return ret;
355 }
356
357 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
358                                              int write,
359                                              void __user *buffer,
360                                              size_t *lenp, loff_t *ppos)
361 {
362         int ret;
363
364         ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
365         if (write && ret == 0)
366                 tcp_fastopen_active_timeout_reset();
367
368         return ret;
369 }
370
371 static int proc_tcp_available_ulp(struct ctl_table *ctl,
372                                   int write,
373                                   void __user *buffer, size_t *lenp,
374                                   loff_t *ppos)
375 {
376         struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, };
377         int ret;
378
379         tbl.data = kmalloc(tbl.maxlen, GFP_USER);
380         if (!tbl.data)
381                 return -ENOMEM;
382         tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX);
383         ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
384         kfree(tbl.data);
385
386         return ret;
387 }
388
389 static struct ctl_table ipv4_table[] = {
390         {
391                 .procname       = "tcp_retrans_collapse",
392                 .data           = &sysctl_tcp_retrans_collapse,
393                 .maxlen         = sizeof(int),
394                 .mode           = 0644,
395                 .proc_handler   = proc_dointvec
396         },
397         {
398                 .procname       = "tcp_max_orphans",
399                 .data           = &sysctl_tcp_max_orphans,
400                 .maxlen         = sizeof(int),
401                 .mode           = 0644,
402                 .proc_handler   = proc_dointvec
403         },
404         {
405                 .procname       = "tcp_fastopen",
406                 .data           = &sysctl_tcp_fastopen,
407                 .maxlen         = sizeof(int),
408                 .mode           = 0644,
409                 .proc_handler   = proc_dointvec,
410         },
411         {
412                 .procname       = "tcp_fastopen_key",
413                 .mode           = 0600,
414                 .maxlen         = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
415                 .proc_handler   = proc_tcp_fastopen_key,
416         },
417         {
418                 .procname       = "tcp_fastopen_blackhole_timeout_sec",
419                 .data           = &sysctl_tcp_fastopen_blackhole_timeout,
420                 .maxlen         = sizeof(int),
421                 .mode           = 0644,
422                 .proc_handler   = proc_tfo_blackhole_detect_timeout,
423                 .extra1         = &zero,
424         },
425         {
426                 .procname       = "tcp_abort_on_overflow",
427                 .data           = &sysctl_tcp_abort_on_overflow,
428                 .maxlen         = sizeof(int),
429                 .mode           = 0644,
430                 .proc_handler   = proc_dointvec
431         },
432         {
433                 .procname       = "tcp_stdurg",
434                 .data           = &sysctl_tcp_stdurg,
435                 .maxlen         = sizeof(int),
436                 .mode           = 0644,
437                 .proc_handler   = proc_dointvec
438         },
439         {
440                 .procname       = "tcp_rfc1337",
441                 .data           = &sysctl_tcp_rfc1337,
442                 .maxlen         = sizeof(int),
443                 .mode           = 0644,
444                 .proc_handler   = proc_dointvec
445         },
446         {
447                 .procname       = "inet_peer_threshold",
448                 .data           = &inet_peer_threshold,
449                 .maxlen         = sizeof(int),
450                 .mode           = 0644,
451                 .proc_handler   = proc_dointvec
452         },
453         {
454                 .procname       = "inet_peer_minttl",
455                 .data           = &inet_peer_minttl,
456                 .maxlen         = sizeof(int),
457                 .mode           = 0644,
458                 .proc_handler   = proc_dointvec_jiffies,
459         },
460         {
461                 .procname       = "inet_peer_maxttl",
462                 .data           = &inet_peer_maxttl,
463                 .maxlen         = sizeof(int),
464                 .mode           = 0644,
465                 .proc_handler   = proc_dointvec_jiffies,
466         },
467         {
468                 .procname       = "tcp_fack",
469                 .data           = &sysctl_tcp_fack,
470                 .maxlen         = sizeof(int),
471                 .mode           = 0644,
472                 .proc_handler   = proc_dointvec
473         },
474         {
475                 .procname       = "tcp_recovery",
476                 .data           = &sysctl_tcp_recovery,
477                 .maxlen         = sizeof(int),
478                 .mode           = 0644,
479                 .proc_handler   = proc_dointvec,
480         },
481         {
482                 .procname       = "tcp_max_reordering",
483                 .data           = &sysctl_tcp_max_reordering,
484                 .maxlen         = sizeof(int),
485                 .mode           = 0644,
486                 .proc_handler   = proc_dointvec
487         },
488         {
489                 .procname       = "tcp_dsack",
490                 .data           = &sysctl_tcp_dsack,
491                 .maxlen         = sizeof(int),
492                 .mode           = 0644,
493                 .proc_handler   = proc_dointvec
494         },
495         {
496                 .procname       = "tcp_mem",
497                 .maxlen         = sizeof(sysctl_tcp_mem),
498                 .data           = &sysctl_tcp_mem,
499                 .mode           = 0644,
500                 .proc_handler   = proc_doulongvec_minmax,
501         },
502         {
503                 .procname       = "tcp_wmem",
504                 .data           = &sysctl_tcp_wmem,
505                 .maxlen         = sizeof(sysctl_tcp_wmem),
506                 .mode           = 0644,
507                 .proc_handler   = proc_dointvec_minmax,
508                 .extra1         = &one,
509         },
510         {
511                 .procname       = "tcp_rmem",
512                 .data           = &sysctl_tcp_rmem,
513                 .maxlen         = sizeof(sysctl_tcp_rmem),
514                 .mode           = 0644,
515                 .proc_handler   = proc_dointvec_minmax,
516                 .extra1         = &one,
517         },
518         {
519                 .procname       = "tcp_app_win",
520                 .data           = &sysctl_tcp_app_win,
521                 .maxlen         = sizeof(int),
522                 .mode           = 0644,
523                 .proc_handler   = proc_dointvec
524         },
525         {
526                 .procname       = "tcp_adv_win_scale",
527                 .data           = &sysctl_tcp_adv_win_scale,
528                 .maxlen         = sizeof(int),
529                 .mode           = 0644,
530                 .proc_handler   = proc_dointvec_minmax,
531                 .extra1         = &tcp_adv_win_scale_min,
532                 .extra2         = &tcp_adv_win_scale_max,
533         },
534         {
535                 .procname       = "tcp_frto",
536                 .data           = &sysctl_tcp_frto,
537                 .maxlen         = sizeof(int),
538                 .mode           = 0644,
539                 .proc_handler   = proc_dointvec
540         },
541         {
542                 .procname       = "tcp_min_rtt_wlen",
543                 .data           = &sysctl_tcp_min_rtt_wlen,
544                 .maxlen         = sizeof(int),
545                 .mode           = 0644,
546                 .proc_handler   = proc_dointvec
547         },
548         {
549                 .procname       = "tcp_low_latency",
550                 .data           = &sysctl_tcp_low_latency,
551                 .maxlen         = sizeof(int),
552                 .mode           = 0644,
553                 .proc_handler   = proc_dointvec
554         },
555         {
556                 .procname       = "tcp_no_metrics_save",
557                 .data           = &sysctl_tcp_nometrics_save,
558                 .maxlen         = sizeof(int),
559                 .mode           = 0644,
560                 .proc_handler   = proc_dointvec,
561         },
562         {
563                 .procname       = "tcp_moderate_rcvbuf",
564                 .data           = &sysctl_tcp_moderate_rcvbuf,
565                 .maxlen         = sizeof(int),
566                 .mode           = 0644,
567                 .proc_handler   = proc_dointvec,
568         },
569         {
570                 .procname       = "tcp_tso_win_divisor",
571                 .data           = &sysctl_tcp_tso_win_divisor,
572                 .maxlen         = sizeof(int),
573                 .mode           = 0644,
574                 .proc_handler   = proc_dointvec,
575         },
576         {
577                 .procname       = "tcp_congestion_control",
578                 .mode           = 0644,
579                 .maxlen         = TCP_CA_NAME_MAX,
580                 .proc_handler   = proc_tcp_congestion_control,
581         },
582         {
583                 .procname       = "tcp_workaround_signed_windows",
584                 .data           = &sysctl_tcp_workaround_signed_windows,
585                 .maxlen         = sizeof(int),
586                 .mode           = 0644,
587                 .proc_handler   = proc_dointvec
588         },
589         {
590                 .procname       = "tcp_limit_output_bytes",
591                 .data           = &sysctl_tcp_limit_output_bytes,
592                 .maxlen         = sizeof(int),
593                 .mode           = 0644,
594                 .proc_handler   = proc_dointvec
595         },
596         {
597                 .procname       = "tcp_challenge_ack_limit",
598                 .data           = &sysctl_tcp_challenge_ack_limit,
599                 .maxlen         = sizeof(int),
600                 .mode           = 0644,
601                 .proc_handler   = proc_dointvec
602         },
603         {
604                 .procname       = "tcp_slow_start_after_idle",
605                 .data           = &sysctl_tcp_slow_start_after_idle,
606                 .maxlen         = sizeof(int),
607                 .mode           = 0644,
608                 .proc_handler   = proc_dointvec
609         },
610 #ifdef CONFIG_NETLABEL
611         {
612                 .procname       = "cipso_cache_enable",
613                 .data           = &cipso_v4_cache_enabled,
614                 .maxlen         = sizeof(int),
615                 .mode           = 0644,
616                 .proc_handler   = proc_dointvec,
617         },
618         {
619                 .procname       = "cipso_cache_bucket_size",
620                 .data           = &cipso_v4_cache_bucketsize,
621                 .maxlen         = sizeof(int),
622                 .mode           = 0644,
623                 .proc_handler   = proc_dointvec,
624         },
625         {
626                 .procname       = "cipso_rbm_optfmt",
627                 .data           = &cipso_v4_rbm_optfmt,
628                 .maxlen         = sizeof(int),
629                 .mode           = 0644,
630                 .proc_handler   = proc_dointvec,
631         },
632         {
633                 .procname       = "cipso_rbm_strictvalid",
634                 .data           = &cipso_v4_rbm_strictvalid,
635                 .maxlen         = sizeof(int),
636                 .mode           = 0644,
637                 .proc_handler   = proc_dointvec,
638         },
639 #endif /* CONFIG_NETLABEL */
640         {
641                 .procname       = "tcp_available_congestion_control",
642                 .maxlen         = TCP_CA_BUF_MAX,
643                 .mode           = 0444,
644                 .proc_handler   = proc_tcp_available_congestion_control,
645         },
646         {
647                 .procname       = "tcp_allowed_congestion_control",
648                 .maxlen         = TCP_CA_BUF_MAX,
649                 .mode           = 0644,
650                 .proc_handler   = proc_allowed_congestion_control,
651         },
652         {
653                 .procname       = "tcp_thin_linear_timeouts",
654                 .data           = &sysctl_tcp_thin_linear_timeouts,
655                 .maxlen         = sizeof(int),
656                 .mode           = 0644,
657                 .proc_handler   = proc_dointvec
658         },
659         {
660                 .procname       = "tcp_early_retrans",
661                 .data           = &sysctl_tcp_early_retrans,
662                 .maxlen         = sizeof(int),
663                 .mode           = 0644,
664                 .proc_handler   = proc_dointvec_minmax,
665                 .extra1         = &zero,
666                 .extra2         = &four,
667         },
668         {
669                 .procname       = "tcp_min_tso_segs",
670                 .data           = &sysctl_tcp_min_tso_segs,
671                 .maxlen         = sizeof(int),
672                 .mode           = 0644,
673                 .proc_handler   = proc_dointvec_minmax,
674                 .extra1         = &one,
675                 .extra2         = &gso_max_segs,
676         },
677         {
678                 .procname       = "tcp_pacing_ss_ratio",
679                 .data           = &sysctl_tcp_pacing_ss_ratio,
680                 .maxlen         = sizeof(int),
681                 .mode           = 0644,
682                 .proc_handler   = proc_dointvec_minmax,
683                 .extra1         = &zero,
684                 .extra2         = &thousand,
685         },
686         {
687                 .procname       = "tcp_pacing_ca_ratio",
688                 .data           = &sysctl_tcp_pacing_ca_ratio,
689                 .maxlen         = sizeof(int),
690                 .mode           = 0644,
691                 .proc_handler   = proc_dointvec_minmax,
692                 .extra1         = &zero,
693                 .extra2         = &thousand,
694         },
695         {
696                 .procname       = "tcp_autocorking",
697                 .data           = &sysctl_tcp_autocorking,
698                 .maxlen         = sizeof(int),
699                 .mode           = 0644,
700                 .proc_handler   = proc_dointvec_minmax,
701                 .extra1         = &zero,
702                 .extra2         = &one,
703         },
704         {
705                 .procname       = "tcp_invalid_ratelimit",
706                 .data           = &sysctl_tcp_invalid_ratelimit,
707                 .maxlen         = sizeof(int),
708                 .mode           = 0644,
709                 .proc_handler   = proc_dointvec_ms_jiffies,
710         },
711         {
712                 .procname       = "tcp_available_ulp",
713                 .maxlen         = TCP_ULP_BUF_MAX,
714                 .mode           = 0444,
715                 .proc_handler   = proc_tcp_available_ulp,
716         },
717         {
718                 .procname       = "icmp_msgs_per_sec",
719                 .data           = &sysctl_icmp_msgs_per_sec,
720                 .maxlen         = sizeof(int),
721                 .mode           = 0644,
722                 .proc_handler   = proc_dointvec_minmax,
723                 .extra1         = &zero,
724         },
725         {
726                 .procname       = "icmp_msgs_burst",
727                 .data           = &sysctl_icmp_msgs_burst,
728                 .maxlen         = sizeof(int),
729                 .mode           = 0644,
730                 .proc_handler   = proc_dointvec_minmax,
731                 .extra1         = &zero,
732         },
733         {
734                 .procname       = "udp_mem",
735                 .data           = &sysctl_udp_mem,
736                 .maxlen         = sizeof(sysctl_udp_mem),
737                 .mode           = 0644,
738                 .proc_handler   = proc_doulongvec_minmax,
739         },
740         {
741                 .procname       = "udp_rmem_min",
742                 .data           = &sysctl_udp_rmem_min,
743                 .maxlen         = sizeof(sysctl_udp_rmem_min),
744                 .mode           = 0644,
745                 .proc_handler   = proc_dointvec_minmax,
746                 .extra1         = &one
747         },
748         {
749                 .procname       = "udp_wmem_min",
750                 .data           = &sysctl_udp_wmem_min,
751                 .maxlen         = sizeof(sysctl_udp_wmem_min),
752                 .mode           = 0644,
753                 .proc_handler   = proc_dointvec_minmax,
754                 .extra1         = &one
755         },
756         { }
757 };
758
759 static struct ctl_table ipv4_net_table[] = {
760         {
761                 .procname       = "icmp_echo_ignore_all",
762                 .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
763                 .maxlen         = sizeof(int),
764                 .mode           = 0644,
765                 .proc_handler   = proc_dointvec
766         },
767         {
768                 .procname       = "icmp_echo_ignore_broadcasts",
769                 .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
770                 .maxlen         = sizeof(int),
771                 .mode           = 0644,
772                 .proc_handler   = proc_dointvec
773         },
774         {
775                 .procname       = "icmp_ignore_bogus_error_responses",
776                 .data           = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
777                 .maxlen         = sizeof(int),
778                 .mode           = 0644,
779                 .proc_handler   = proc_dointvec
780         },
781         {
782                 .procname       = "icmp_errors_use_inbound_ifaddr",
783                 .data           = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
784                 .maxlen         = sizeof(int),
785                 .mode           = 0644,
786                 .proc_handler   = proc_dointvec
787         },
788         {
789                 .procname       = "icmp_ratelimit",
790                 .data           = &init_net.ipv4.sysctl_icmp_ratelimit,
791                 .maxlen         = sizeof(int),
792                 .mode           = 0644,
793                 .proc_handler   = proc_dointvec_ms_jiffies,
794         },
795         {
796                 .procname       = "icmp_ratemask",
797                 .data           = &init_net.ipv4.sysctl_icmp_ratemask,
798                 .maxlen         = sizeof(int),
799                 .mode           = 0644,
800                 .proc_handler   = proc_dointvec
801         },
802         {
803                 .procname       = "ping_group_range",
804                 .data           = &init_net.ipv4.ping_group_range.range,
805                 .maxlen         = sizeof(gid_t)*2,
806                 .mode           = 0644,
807                 .proc_handler   = ipv4_ping_group_range,
808         },
809         {
810                 .procname       = "tcp_ecn",
811                 .data           = &init_net.ipv4.sysctl_tcp_ecn,
812                 .maxlen         = sizeof(int),
813                 .mode           = 0644,
814                 .proc_handler   = proc_dointvec
815         },
816         {
817                 .procname       = "tcp_ecn_fallback",
818                 .data           = &init_net.ipv4.sysctl_tcp_ecn_fallback,
819                 .maxlen         = sizeof(int),
820                 .mode           = 0644,
821                 .proc_handler   = proc_dointvec
822         },
823         {
824                 .procname       = "ip_dynaddr",
825                 .data           = &init_net.ipv4.sysctl_ip_dynaddr,
826                 .maxlen         = sizeof(int),
827                 .mode           = 0644,
828                 .proc_handler   = proc_dointvec
829         },
830         {
831                 .procname       = "ip_early_demux",
832                 .data           = &init_net.ipv4.sysctl_ip_early_demux,
833                 .maxlen         = sizeof(int),
834                 .mode           = 0644,
835                 .proc_handler   = proc_dointvec
836         },
837         {
838                 .procname       = "udp_early_demux",
839                 .data           = &init_net.ipv4.sysctl_udp_early_demux,
840                 .maxlen         = sizeof(int),
841                 .mode           = 0644,
842                 .proc_handler   = proc_udp_early_demux
843         },
844         {
845                 .procname       = "tcp_early_demux",
846                 .data           = &init_net.ipv4.sysctl_tcp_early_demux,
847                 .maxlen         = sizeof(int),
848                 .mode           = 0644,
849                 .proc_handler   = proc_tcp_early_demux
850         },
851         {
852                 .procname       = "ip_default_ttl",
853                 .data           = &init_net.ipv4.sysctl_ip_default_ttl,
854                 .maxlen         = sizeof(int),
855                 .mode           = 0644,
856                 .proc_handler   = proc_dointvec_minmax,
857                 .extra1         = &ip_ttl_min,
858                 .extra2         = &ip_ttl_max,
859         },
860         {
861                 .procname       = "ip_local_port_range",
862                 .maxlen         = sizeof(init_net.ipv4.ip_local_ports.range),
863                 .data           = &init_net.ipv4.ip_local_ports.range,
864                 .mode           = 0644,
865                 .proc_handler   = ipv4_local_port_range,
866         },
867         {
868                 .procname       = "ip_local_reserved_ports",
869                 .data           = &init_net.ipv4.sysctl_local_reserved_ports,
870                 .maxlen         = 65536,
871                 .mode           = 0644,
872                 .proc_handler   = proc_do_large_bitmap,
873         },
874         {
875                 .procname       = "ip_no_pmtu_disc",
876                 .data           = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
877                 .maxlen         = sizeof(int),
878                 .mode           = 0644,
879                 .proc_handler   = proc_dointvec
880         },
881         {
882                 .procname       = "ip_forward_use_pmtu",
883                 .data           = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
884                 .maxlen         = sizeof(int),
885                 .mode           = 0644,
886                 .proc_handler   = proc_dointvec,
887         },
888         {
889                 .procname       = "ip_nonlocal_bind",
890                 .data           = &init_net.ipv4.sysctl_ip_nonlocal_bind,
891                 .maxlen         = sizeof(int),
892                 .mode           = 0644,
893                 .proc_handler   = proc_dointvec
894         },
895         {
896                 .procname       = "fwmark_reflect",
897                 .data           = &init_net.ipv4.sysctl_fwmark_reflect,
898                 .maxlen         = sizeof(int),
899                 .mode           = 0644,
900                 .proc_handler   = proc_dointvec,
901         },
902         {
903                 .procname       = "tcp_fwmark_accept",
904                 .data           = &init_net.ipv4.sysctl_tcp_fwmark_accept,
905                 .maxlen         = sizeof(int),
906                 .mode           = 0644,
907                 .proc_handler   = proc_dointvec,
908         },
909 #ifdef CONFIG_NET_L3_MASTER_DEV
910         {
911                 .procname       = "tcp_l3mdev_accept",
912                 .data           = &init_net.ipv4.sysctl_tcp_l3mdev_accept,
913                 .maxlen         = sizeof(int),
914                 .mode           = 0644,
915                 .proc_handler   = proc_dointvec_minmax,
916                 .extra1         = &zero,
917                 .extra2         = &one,
918         },
919 #endif
920         {
921                 .procname       = "tcp_mtu_probing",
922                 .data           = &init_net.ipv4.sysctl_tcp_mtu_probing,
923                 .maxlen         = sizeof(int),
924                 .mode           = 0644,
925                 .proc_handler   = proc_dointvec,
926         },
927         {
928                 .procname       = "tcp_base_mss",
929                 .data           = &init_net.ipv4.sysctl_tcp_base_mss,
930                 .maxlen         = sizeof(int),
931                 .mode           = 0644,
932                 .proc_handler   = proc_dointvec,
933         },
934         {
935                 .procname       = "tcp_probe_threshold",
936                 .data           = &init_net.ipv4.sysctl_tcp_probe_threshold,
937                 .maxlen         = sizeof(int),
938                 .mode           = 0644,
939                 .proc_handler   = proc_dointvec,
940         },
941         {
942                 .procname       = "tcp_probe_interval",
943                 .data           = &init_net.ipv4.sysctl_tcp_probe_interval,
944                 .maxlen         = sizeof(int),
945                 .mode           = 0644,
946                 .proc_handler   = proc_dointvec,
947         },
948         {
949                 .procname       = "igmp_link_local_mcast_reports",
950                 .data           = &init_net.ipv4.sysctl_igmp_llm_reports,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec
954         },
955         {
956                 .procname       = "igmp_max_memberships",
957                 .data           = &init_net.ipv4.sysctl_igmp_max_memberships,
958                 .maxlen         = sizeof(int),
959                 .mode           = 0644,
960                 .proc_handler   = proc_dointvec
961         },
962         {
963                 .procname       = "igmp_max_msf",
964                 .data           = &init_net.ipv4.sysctl_igmp_max_msf,
965                 .maxlen         = sizeof(int),
966                 .mode           = 0644,
967                 .proc_handler   = proc_dointvec
968         },
969 #ifdef CONFIG_IP_MULTICAST
970         {
971                 .procname       = "igmp_qrv",
972                 .data           = &init_net.ipv4.sysctl_igmp_qrv,
973                 .maxlen         = sizeof(int),
974                 .mode           = 0644,
975                 .proc_handler   = proc_dointvec_minmax,
976                 .extra1         = &one
977         },
978 #endif
979         {
980                 .procname       = "tcp_keepalive_time",
981                 .data           = &init_net.ipv4.sysctl_tcp_keepalive_time,
982                 .maxlen         = sizeof(int),
983                 .mode           = 0644,
984                 .proc_handler   = proc_dointvec_jiffies,
985         },
986         {
987                 .procname       = "tcp_keepalive_probes",
988                 .data           = &init_net.ipv4.sysctl_tcp_keepalive_probes,
989                 .maxlen         = sizeof(int),
990                 .mode           = 0644,
991                 .proc_handler   = proc_dointvec
992         },
993         {
994                 .procname       = "tcp_keepalive_intvl",
995                 .data           = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
996                 .maxlen         = sizeof(int),
997                 .mode           = 0644,
998                 .proc_handler   = proc_dointvec_jiffies,
999         },
1000         {
1001                 .procname       = "tcp_syn_retries",
1002                 .data           = &init_net.ipv4.sysctl_tcp_syn_retries,
1003                 .maxlen         = sizeof(int),
1004                 .mode           = 0644,
1005                 .proc_handler   = proc_dointvec_minmax,
1006                 .extra1         = &tcp_syn_retries_min,
1007                 .extra2         = &tcp_syn_retries_max
1008         },
1009         {
1010                 .procname       = "tcp_synack_retries",
1011                 .data           = &init_net.ipv4.sysctl_tcp_synack_retries,
1012                 .maxlen         = sizeof(int),
1013                 .mode           = 0644,
1014                 .proc_handler   = proc_dointvec
1015         },
1016 #ifdef CONFIG_SYN_COOKIES
1017         {
1018                 .procname       = "tcp_syncookies",
1019                 .data           = &init_net.ipv4.sysctl_tcp_syncookies,
1020                 .maxlen         = sizeof(int),
1021                 .mode           = 0644,
1022                 .proc_handler   = proc_dointvec
1023         },
1024 #endif
1025         {
1026                 .procname       = "tcp_reordering",
1027                 .data           = &init_net.ipv4.sysctl_tcp_reordering,
1028                 .maxlen         = sizeof(int),
1029                 .mode           = 0644,
1030                 .proc_handler   = proc_dointvec
1031         },
1032         {
1033                 .procname       = "tcp_retries1",
1034                 .data           = &init_net.ipv4.sysctl_tcp_retries1,
1035                 .maxlen         = sizeof(int),
1036                 .mode           = 0644,
1037                 .proc_handler   = proc_dointvec_minmax,
1038                 .extra2         = &tcp_retr1_max
1039         },
1040         {
1041                 .procname       = "tcp_retries2",
1042                 .data           = &init_net.ipv4.sysctl_tcp_retries2,
1043                 .maxlen         = sizeof(int),
1044                 .mode           = 0644,
1045                 .proc_handler   = proc_dointvec
1046         },
1047         {
1048                 .procname       = "tcp_orphan_retries",
1049                 .data           = &init_net.ipv4.sysctl_tcp_orphan_retries,
1050                 .maxlen         = sizeof(int),
1051                 .mode           = 0644,
1052                 .proc_handler   = proc_dointvec
1053         },
1054         {
1055                 .procname       = "tcp_fin_timeout",
1056                 .data           = &init_net.ipv4.sysctl_tcp_fin_timeout,
1057                 .maxlen         = sizeof(int),
1058                 .mode           = 0644,
1059                 .proc_handler   = proc_dointvec_jiffies,
1060         },
1061         {
1062                 .procname       = "tcp_notsent_lowat",
1063                 .data           = &init_net.ipv4.sysctl_tcp_notsent_lowat,
1064                 .maxlen         = sizeof(unsigned int),
1065                 .mode           = 0644,
1066                 .proc_handler   = proc_douintvec,
1067         },
1068         {
1069                 .procname       = "tcp_tw_reuse",
1070                 .data           = &init_net.ipv4.sysctl_tcp_tw_reuse,
1071                 .maxlen         = sizeof(int),
1072                 .mode           = 0644,
1073                 .proc_handler   = proc_dointvec
1074         },
1075         {
1076                 .procname       = "tcp_max_tw_buckets",
1077                 .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
1078                 .maxlen         = sizeof(int),
1079                 .mode           = 0644,
1080                 .proc_handler   = proc_dointvec
1081         },
1082         {
1083                 .procname       = "tcp_max_syn_backlog",
1084                 .data           = &init_net.ipv4.sysctl_max_syn_backlog,
1085                 .maxlen         = sizeof(int),
1086                 .mode           = 0644,
1087                 .proc_handler   = proc_dointvec
1088         },
1089 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1090         {
1091                 .procname       = "fib_multipath_use_neigh",
1092                 .data           = &init_net.ipv4.sysctl_fib_multipath_use_neigh,
1093                 .maxlen         = sizeof(int),
1094                 .mode           = 0644,
1095                 .proc_handler   = proc_dointvec_minmax,
1096                 .extra1         = &zero,
1097                 .extra2         = &one,
1098         },
1099         {
1100                 .procname       = "fib_multipath_hash_policy",
1101                 .data           = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1102                 .maxlen         = sizeof(int),
1103                 .mode           = 0644,
1104                 .proc_handler   = proc_dointvec_minmax,
1105                 .extra1         = &zero,
1106                 .extra2         = &one,
1107         },
1108 #endif
1109         {
1110                 .procname       = "ip_unprivileged_port_start",
1111                 .maxlen         = sizeof(int),
1112                 .data           = &init_net.ipv4.sysctl_ip_prot_sock,
1113                 .mode           = 0644,
1114                 .proc_handler   = ipv4_privileged_ports,
1115         },
1116 #ifdef CONFIG_NET_L3_MASTER_DEV
1117         {
1118                 .procname       = "udp_l3mdev_accept",
1119                 .data           = &init_net.ipv4.sysctl_udp_l3mdev_accept,
1120                 .maxlen         = sizeof(int),
1121                 .mode           = 0644,
1122                 .proc_handler   = proc_dointvec_minmax,
1123                 .extra1         = &zero,
1124                 .extra2         = &one,
1125         },
1126 #endif
1127         {
1128                 .procname       = "tcp_sack",
1129                 .data           = &init_net.ipv4.sysctl_tcp_sack,
1130                 .maxlen         = sizeof(int),
1131                 .mode           = 0644,
1132                 .proc_handler   = proc_dointvec
1133         },
1134         {
1135                 .procname       = "tcp_window_scaling",
1136                 .data           = &init_net.ipv4.sysctl_tcp_window_scaling,
1137                 .maxlen         = sizeof(int),
1138                 .mode           = 0644,
1139                 .proc_handler   = proc_dointvec
1140         },
1141         {
1142                 .procname       = "tcp_timestamps",
1143                 .data           = &init_net.ipv4.sysctl_tcp_timestamps,
1144                 .maxlen         = sizeof(int),
1145                 .mode           = 0644,
1146                 .proc_handler   = proc_dointvec
1147         },
1148         { }
1149 };
1150
1151 static __net_init int ipv4_sysctl_init_net(struct net *net)
1152 {
1153         struct ctl_table *table;
1154
1155         table = ipv4_net_table;
1156         if (!net_eq(net, &init_net)) {
1157                 int i;
1158
1159                 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
1160                 if (!table)
1161                         goto err_alloc;
1162
1163                 /* Update the variables to point into the current struct net */
1164                 for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
1165                         table[i].data += (void *)net - (void *)&init_net;
1166         }
1167
1168         net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
1169         if (!net->ipv4.ipv4_hdr)
1170                 goto err_reg;
1171
1172         net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1173         if (!net->ipv4.sysctl_local_reserved_ports)
1174                 goto err_ports;
1175
1176         return 0;
1177
1178 err_ports:
1179         unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1180 err_reg:
1181         if (!net_eq(net, &init_net))
1182                 kfree(table);
1183 err_alloc:
1184         return -ENOMEM;
1185 }
1186
1187 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1188 {
1189         struct ctl_table *table;
1190
1191         kfree(net->ipv4.sysctl_local_reserved_ports);
1192         table = net->ipv4.ipv4_hdr->ctl_table_arg;
1193         unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1194         kfree(table);
1195 }
1196
1197 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1198         .init = ipv4_sysctl_init_net,
1199         .exit = ipv4_sysctl_exit_net,
1200 };
1201
1202 static __init int sysctl_ipv4_init(void)
1203 {
1204         struct ctl_table_header *hdr;
1205
1206         hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1207         if (!hdr)
1208                 return -ENOMEM;
1209
1210         if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1211                 unregister_net_sysctl_table(hdr);
1212                 return -ENOMEM;
1213         }
1214
1215         return 0;
1216 }
1217
1218 __initcall(sysctl_ipv4_init);