netfilter: rpfilter/fib: Set ->flowic_uid correctly for user namespaces.
[linux-2.6-microblaze.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227         if (ifa->ifa_dev)
228                 in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236
237 void in_dev_finish_destroy(struct in_device *idev)
238 {
239         struct net_device *dev = idev->dev;
240
241         WARN_ON(idev->ifa_list);
242         WARN_ON(idev->mc_list);
243         kfree(rcu_dereference_protected(idev->mc_hash, 1));
244 #ifdef NET_REFCNT_DEBUG
245         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
246 #endif
247         netdev_put(dev, &idev->dev_tracker);
248         if (!idev->dead)
249                 pr_err("Freeing alive in_device %p\n", idev);
250         else
251                 kfree(idev);
252 }
253 EXPORT_SYMBOL(in_dev_finish_destroy);
254
255 static struct in_device *inetdev_init(struct net_device *dev)
256 {
257         struct in_device *in_dev;
258         int err = -ENOMEM;
259
260         ASSERT_RTNL();
261
262         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
263         if (!in_dev)
264                 goto out;
265         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
266                         sizeof(in_dev->cnf));
267         in_dev->cnf.sysctl = NULL;
268         in_dev->dev = dev;
269         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
270         if (!in_dev->arp_parms)
271                 goto out_kfree;
272         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
273                 dev_disable_lro(dev);
274         /* Reference in_dev->dev */
275         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
276         /* Account for reference dev->ip_ptr (below) */
277         refcount_set(&in_dev->refcnt, 1);
278
279         err = devinet_sysctl_register(in_dev);
280         if (err) {
281                 in_dev->dead = 1;
282                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
283                 in_dev_put(in_dev);
284                 in_dev = NULL;
285                 goto out;
286         }
287         ip_mc_init_dev(in_dev);
288         if (dev->flags & IFF_UP)
289                 ip_mc_up(in_dev);
290
291         /* we can receive as soon as ip_ptr is set -- do this last */
292         rcu_assign_pointer(dev->ip_ptr, in_dev);
293 out:
294         return in_dev ?: ERR_PTR(err);
295 out_kfree:
296         kfree(in_dev);
297         in_dev = NULL;
298         goto out;
299 }
300
301 static void in_dev_rcu_put(struct rcu_head *head)
302 {
303         struct in_device *idev = container_of(head, struct in_device, rcu_head);
304         in_dev_put(idev);
305 }
306
307 static void inetdev_destroy(struct in_device *in_dev)
308 {
309         struct net_device *dev;
310         struct in_ifaddr *ifa;
311
312         ASSERT_RTNL();
313
314         dev = in_dev->dev;
315
316         in_dev->dead = 1;
317
318         ip_mc_destroy_dev(in_dev);
319
320         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
321                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
322                 inet_free_ifa(ifa);
323         }
324
325         RCU_INIT_POINTER(dev->ip_ptr, NULL);
326
327         devinet_sysctl_unregister(in_dev);
328         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
329         arp_ifdown(dev);
330
331         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
332 }
333
334 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
335 {
336         const struct in_ifaddr *ifa;
337
338         rcu_read_lock();
339         in_dev_for_each_ifa_rcu(ifa, in_dev) {
340                 if (inet_ifa_match(a, ifa)) {
341                         if (!b || inet_ifa_match(b, ifa)) {
342                                 rcu_read_unlock();
343                                 return 1;
344                         }
345                 }
346         }
347         rcu_read_unlock();
348         return 0;
349 }
350
351 static void __inet_del_ifa(struct in_device *in_dev,
352                            struct in_ifaddr __rcu **ifap,
353                            int destroy, struct nlmsghdr *nlh, u32 portid)
354 {
355         struct in_ifaddr *promote = NULL;
356         struct in_ifaddr *ifa, *ifa1;
357         struct in_ifaddr *last_prim;
358         struct in_ifaddr *prev_prom = NULL;
359         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
360
361         ASSERT_RTNL();
362
363         ifa1 = rtnl_dereference(*ifap);
364         last_prim = rtnl_dereference(in_dev->ifa_list);
365         if (in_dev->dead)
366                 goto no_promotions;
367
368         /* 1. Deleting primary ifaddr forces deletion all secondaries
369          * unless alias promotion is set
370          **/
371
372         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
373                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
374
375                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
376                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
377                             ifa1->ifa_scope <= ifa->ifa_scope)
378                                 last_prim = ifa;
379
380                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
381                             ifa1->ifa_mask != ifa->ifa_mask ||
382                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
383                                 ifap1 = &ifa->ifa_next;
384                                 prev_prom = ifa;
385                                 continue;
386                         }
387
388                         if (!do_promote) {
389                                 inet_hash_remove(ifa);
390                                 *ifap1 = ifa->ifa_next;
391
392                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
393                                 blocking_notifier_call_chain(&inetaddr_chain,
394                                                 NETDEV_DOWN, ifa);
395                                 inet_free_ifa(ifa);
396                         } else {
397                                 promote = ifa;
398                                 break;
399                         }
400                 }
401         }
402
403         /* On promotion all secondaries from subnet are changing
404          * the primary IP, we must remove all their routes silently
405          * and later to add them back with new prefsrc. Do this
406          * while all addresses are on the device list.
407          */
408         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
409                 if (ifa1->ifa_mask == ifa->ifa_mask &&
410                     inet_ifa_match(ifa1->ifa_address, ifa))
411                         fib_del_ifaddr(ifa, ifa1);
412         }
413
414 no_promotions:
415         /* 2. Unlink it */
416
417         *ifap = ifa1->ifa_next;
418         inet_hash_remove(ifa1);
419
420         /* 3. Announce address deletion */
421
422         /* Send message first, then call notifier.
423            At first sight, FIB update triggered by notifier
424            will refer to already deleted ifaddr, that could confuse
425            netlink listeners. It is not true: look, gated sees
426            that route deleted and if it still thinks that ifaddr
427            is valid, it will try to restore deleted routes... Grr.
428            So that, this order is correct.
429          */
430         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
431         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
432
433         if (promote) {
434                 struct in_ifaddr *next_sec;
435
436                 next_sec = rtnl_dereference(promote->ifa_next);
437                 if (prev_prom) {
438                         struct in_ifaddr *last_sec;
439
440                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
441
442                         last_sec = rtnl_dereference(last_prim->ifa_next);
443                         rcu_assign_pointer(promote->ifa_next, last_sec);
444                         rcu_assign_pointer(last_prim->ifa_next, promote);
445                 }
446
447                 promote->ifa_flags &= ~IFA_F_SECONDARY;
448                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
449                 blocking_notifier_call_chain(&inetaddr_chain,
450                                 NETDEV_UP, promote);
451                 for (ifa = next_sec; ifa;
452                      ifa = rtnl_dereference(ifa->ifa_next)) {
453                         if (ifa1->ifa_mask != ifa->ifa_mask ||
454                             !inet_ifa_match(ifa1->ifa_address, ifa))
455                                         continue;
456                         fib_add_ifaddr(ifa);
457                 }
458
459         }
460         if (destroy)
461                 inet_free_ifa(ifa1);
462 }
463
464 static void inet_del_ifa(struct in_device *in_dev,
465                          struct in_ifaddr __rcu **ifap,
466                          int destroy)
467 {
468         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
469 }
470
471 static void check_lifetime(struct work_struct *work);
472
473 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
474
475 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
476                              u32 portid, struct netlink_ext_ack *extack)
477 {
478         struct in_ifaddr __rcu **last_primary, **ifap;
479         struct in_device *in_dev = ifa->ifa_dev;
480         struct in_validator_info ivi;
481         struct in_ifaddr *ifa1;
482         int ret;
483
484         ASSERT_RTNL();
485
486         if (!ifa->ifa_local) {
487                 inet_free_ifa(ifa);
488                 return 0;
489         }
490
491         ifa->ifa_flags &= ~IFA_F_SECONDARY;
492         last_primary = &in_dev->ifa_list;
493
494         /* Don't set IPv6 only flags to IPv4 addresses */
495         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
496
497         ifap = &in_dev->ifa_list;
498         ifa1 = rtnl_dereference(*ifap);
499
500         while (ifa1) {
501                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
502                     ifa->ifa_scope <= ifa1->ifa_scope)
503                         last_primary = &ifa1->ifa_next;
504                 if (ifa1->ifa_mask == ifa->ifa_mask &&
505                     inet_ifa_match(ifa1->ifa_address, ifa)) {
506                         if (ifa1->ifa_local == ifa->ifa_local) {
507                                 inet_free_ifa(ifa);
508                                 return -EEXIST;
509                         }
510                         if (ifa1->ifa_scope != ifa->ifa_scope) {
511                                 inet_free_ifa(ifa);
512                                 return -EINVAL;
513                         }
514                         ifa->ifa_flags |= IFA_F_SECONDARY;
515                 }
516
517                 ifap = &ifa1->ifa_next;
518                 ifa1 = rtnl_dereference(*ifap);
519         }
520
521         /* Allow any devices that wish to register ifaddr validtors to weigh
522          * in now, before changes are committed.  The rntl lock is serializing
523          * access here, so the state should not change between a validator call
524          * and a final notify on commit.  This isn't invoked on promotion under
525          * the assumption that validators are checking the address itself, and
526          * not the flags.
527          */
528         ivi.ivi_addr = ifa->ifa_address;
529         ivi.ivi_dev = ifa->ifa_dev;
530         ivi.extack = extack;
531         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
532                                            NETDEV_UP, &ivi);
533         ret = notifier_to_errno(ret);
534         if (ret) {
535                 inet_free_ifa(ifa);
536                 return ret;
537         }
538
539         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
540                 ifap = last_primary;
541
542         rcu_assign_pointer(ifa->ifa_next, *ifap);
543         rcu_assign_pointer(*ifap, ifa);
544
545         inet_hash_insert(dev_net(in_dev->dev), ifa);
546
547         cancel_delayed_work(&check_lifetime_work);
548         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
549
550         /* Send message first, then call notifier.
551            Notifier will trigger FIB update, so that
552            listeners of netlink will know about new ifaddr */
553         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
554         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
555
556         return 0;
557 }
558
559 static int inet_insert_ifa(struct in_ifaddr *ifa)
560 {
561         return __inet_insert_ifa(ifa, NULL, 0, NULL);
562 }
563
564 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
565 {
566         struct in_device *in_dev = __in_dev_get_rtnl(dev);
567
568         ASSERT_RTNL();
569
570         if (!in_dev) {
571                 inet_free_ifa(ifa);
572                 return -ENOBUFS;
573         }
574         ipv4_devconf_setall(in_dev);
575         neigh_parms_data_state_setall(in_dev->arp_parms);
576         if (ifa->ifa_dev != in_dev) {
577                 WARN_ON(ifa->ifa_dev);
578                 in_dev_hold(in_dev);
579                 ifa->ifa_dev = in_dev;
580         }
581         if (ipv4_is_loopback(ifa->ifa_local))
582                 ifa->ifa_scope = RT_SCOPE_HOST;
583         return inet_insert_ifa(ifa);
584 }
585
586 /* Caller must hold RCU or RTNL :
587  * We dont take a reference on found in_device
588  */
589 struct in_device *inetdev_by_index(struct net *net, int ifindex)
590 {
591         struct net_device *dev;
592         struct in_device *in_dev = NULL;
593
594         rcu_read_lock();
595         dev = dev_get_by_index_rcu(net, ifindex);
596         if (dev)
597                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
598         rcu_read_unlock();
599         return in_dev;
600 }
601 EXPORT_SYMBOL(inetdev_by_index);
602
603 /* Called only from RTNL semaphored context. No locks. */
604
605 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
606                                     __be32 mask)
607 {
608         struct in_ifaddr *ifa;
609
610         ASSERT_RTNL();
611
612         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
613                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
614                         return ifa;
615         }
616         return NULL;
617 }
618
619 static int ip_mc_autojoin_config(struct net *net, bool join,
620                                  const struct in_ifaddr *ifa)
621 {
622 #if defined(CONFIG_IP_MULTICAST)
623         struct ip_mreqn mreq = {
624                 .imr_multiaddr.s_addr = ifa->ifa_address,
625                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
626         };
627         struct sock *sk = net->ipv4.mc_autojoin_sk;
628         int ret;
629
630         ASSERT_RTNL();
631
632         lock_sock(sk);
633         if (join)
634                 ret = ip_mc_join_group(sk, &mreq);
635         else
636                 ret = ip_mc_leave_group(sk, &mreq);
637         release_sock(sk);
638
639         return ret;
640 #else
641         return -EOPNOTSUPP;
642 #endif
643 }
644
645 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
646                             struct netlink_ext_ack *extack)
647 {
648         struct net *net = sock_net(skb->sk);
649         struct in_ifaddr __rcu **ifap;
650         struct nlattr *tb[IFA_MAX+1];
651         struct in_device *in_dev;
652         struct ifaddrmsg *ifm;
653         struct in_ifaddr *ifa;
654         int err;
655
656         ASSERT_RTNL();
657
658         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
659                                      ifa_ipv4_policy, extack);
660         if (err < 0)
661                 goto errout;
662
663         ifm = nlmsg_data(nlh);
664         in_dev = inetdev_by_index(net, ifm->ifa_index);
665         if (!in_dev) {
666                 err = -ENODEV;
667                 goto errout;
668         }
669
670         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
671              ifap = &ifa->ifa_next) {
672                 if (tb[IFA_LOCAL] &&
673                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
674                         continue;
675
676                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
677                         continue;
678
679                 if (tb[IFA_ADDRESS] &&
680                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
681                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
682                         continue;
683
684                 if (ipv4_is_multicast(ifa->ifa_address))
685                         ip_mc_autojoin_config(net, false, ifa);
686                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
687                 return 0;
688         }
689
690         err = -EADDRNOTAVAIL;
691 errout:
692         return err;
693 }
694
695 #define INFINITY_LIFE_TIME      0xFFFFFFFF
696
697 static void check_lifetime(struct work_struct *work)
698 {
699         unsigned long now, next, next_sec, next_sched;
700         struct in_ifaddr *ifa;
701         struct hlist_node *n;
702         int i;
703
704         now = jiffies;
705         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
706
707         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
708                 bool change_needed = false;
709
710                 rcu_read_lock();
711                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
712                         unsigned long age;
713
714                         if (ifa->ifa_flags & IFA_F_PERMANENT)
715                                 continue;
716
717                         /* We try to batch several events at once. */
718                         age = (now - ifa->ifa_tstamp +
719                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720
721                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722                             age >= ifa->ifa_valid_lft) {
723                                 change_needed = true;
724                         } else if (ifa->ifa_preferred_lft ==
725                                    INFINITY_LIFE_TIME) {
726                                 continue;
727                         } else if (age >= ifa->ifa_preferred_lft) {
728                                 if (time_before(ifa->ifa_tstamp +
729                                                 ifa->ifa_valid_lft * HZ, next))
730                                         next = ifa->ifa_tstamp +
731                                                ifa->ifa_valid_lft * HZ;
732
733                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
734                                         change_needed = true;
735                         } else if (time_before(ifa->ifa_tstamp +
736                                                ifa->ifa_preferred_lft * HZ,
737                                                next)) {
738                                 next = ifa->ifa_tstamp +
739                                        ifa->ifa_preferred_lft * HZ;
740                         }
741                 }
742                 rcu_read_unlock();
743                 if (!change_needed)
744                         continue;
745                 rtnl_lock();
746                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
747                         unsigned long age;
748
749                         if (ifa->ifa_flags & IFA_F_PERMANENT)
750                                 continue;
751
752                         /* We try to batch several events at once. */
753                         age = (now - ifa->ifa_tstamp +
754                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
755
756                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
757                             age >= ifa->ifa_valid_lft) {
758                                 struct in_ifaddr __rcu **ifap;
759                                 struct in_ifaddr *tmp;
760
761                                 ifap = &ifa->ifa_dev->ifa_list;
762                                 tmp = rtnl_dereference(*ifap);
763                                 while (tmp) {
764                                         if (tmp == ifa) {
765                                                 inet_del_ifa(ifa->ifa_dev,
766                                                              ifap, 1);
767                                                 break;
768                                         }
769                                         ifap = &tmp->ifa_next;
770                                         tmp = rtnl_dereference(*ifap);
771                                 }
772                         } else if (ifa->ifa_preferred_lft !=
773                                    INFINITY_LIFE_TIME &&
774                                    age >= ifa->ifa_preferred_lft &&
775                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
776                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
777                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
778                         }
779                 }
780                 rtnl_unlock();
781         }
782
783         next_sec = round_jiffies_up(next);
784         next_sched = next;
785
786         /* If rounded timeout is accurate enough, accept it. */
787         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
788                 next_sched = next_sec;
789
790         now = jiffies;
791         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
792         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
793                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
794
795         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
796                         next_sched - now);
797 }
798
799 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
800                              __u32 prefered_lft)
801 {
802         unsigned long timeout;
803
804         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
805
806         timeout = addrconf_timeout_fixup(valid_lft, HZ);
807         if (addrconf_finite_timeout(timeout))
808                 ifa->ifa_valid_lft = timeout;
809         else
810                 ifa->ifa_flags |= IFA_F_PERMANENT;
811
812         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
813         if (addrconf_finite_timeout(timeout)) {
814                 if (timeout == 0)
815                         ifa->ifa_flags |= IFA_F_DEPRECATED;
816                 ifa->ifa_preferred_lft = timeout;
817         }
818         ifa->ifa_tstamp = jiffies;
819         if (!ifa->ifa_cstamp)
820                 ifa->ifa_cstamp = ifa->ifa_tstamp;
821 }
822
823 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
824                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
825                                        struct netlink_ext_ack *extack)
826 {
827         struct nlattr *tb[IFA_MAX+1];
828         struct in_ifaddr *ifa;
829         struct ifaddrmsg *ifm;
830         struct net_device *dev;
831         struct in_device *in_dev;
832         int err;
833
834         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
835                                      ifa_ipv4_policy, extack);
836         if (err < 0)
837                 goto errout;
838
839         ifm = nlmsg_data(nlh);
840         err = -EINVAL;
841         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
842                 goto errout;
843
844         dev = __dev_get_by_index(net, ifm->ifa_index);
845         err = -ENODEV;
846         if (!dev)
847                 goto errout;
848
849         in_dev = __in_dev_get_rtnl(dev);
850         err = -ENOBUFS;
851         if (!in_dev)
852                 goto errout;
853
854         ifa = inet_alloc_ifa();
855         if (!ifa)
856                 /*
857                  * A potential indev allocation can be left alive, it stays
858                  * assigned to its device and is destroy with it.
859                  */
860                 goto errout;
861
862         ipv4_devconf_setall(in_dev);
863         neigh_parms_data_state_setall(in_dev->arp_parms);
864         in_dev_hold(in_dev);
865
866         if (!tb[IFA_ADDRESS])
867                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
868
869         INIT_HLIST_NODE(&ifa->hash);
870         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
871         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
872         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
873                                          ifm->ifa_flags;
874         ifa->ifa_scope = ifm->ifa_scope;
875         ifa->ifa_dev = in_dev;
876
877         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
878         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
879
880         if (tb[IFA_BROADCAST])
881                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
882
883         if (tb[IFA_LABEL])
884                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
885         else
886                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
887
888         if (tb[IFA_RT_PRIORITY])
889                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
890
891         if (tb[IFA_PROTO])
892                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
893
894         if (tb[IFA_CACHEINFO]) {
895                 struct ifa_cacheinfo *ci;
896
897                 ci = nla_data(tb[IFA_CACHEINFO]);
898                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
899                         err = -EINVAL;
900                         goto errout_free;
901                 }
902                 *pvalid_lft = ci->ifa_valid;
903                 *pprefered_lft = ci->ifa_prefered;
904         }
905
906         return ifa;
907
908 errout_free:
909         inet_free_ifa(ifa);
910 errout:
911         return ERR_PTR(err);
912 }
913
914 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
915 {
916         struct in_device *in_dev = ifa->ifa_dev;
917         struct in_ifaddr *ifa1;
918
919         if (!ifa->ifa_local)
920                 return NULL;
921
922         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
923                 if (ifa1->ifa_mask == ifa->ifa_mask &&
924                     inet_ifa_match(ifa1->ifa_address, ifa) &&
925                     ifa1->ifa_local == ifa->ifa_local)
926                         return ifa1;
927         }
928         return NULL;
929 }
930
931 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
932                             struct netlink_ext_ack *extack)
933 {
934         struct net *net = sock_net(skb->sk);
935         struct in_ifaddr *ifa;
936         struct in_ifaddr *ifa_existing;
937         __u32 valid_lft = INFINITY_LIFE_TIME;
938         __u32 prefered_lft = INFINITY_LIFE_TIME;
939
940         ASSERT_RTNL();
941
942         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
943         if (IS_ERR(ifa))
944                 return PTR_ERR(ifa);
945
946         ifa_existing = find_matching_ifa(ifa);
947         if (!ifa_existing) {
948                 /* It would be best to check for !NLM_F_CREATE here but
949                  * userspace already relies on not having to provide this.
950                  */
951                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
952                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
953                         int ret = ip_mc_autojoin_config(net, true, ifa);
954
955                         if (ret < 0) {
956                                 inet_free_ifa(ifa);
957                                 return ret;
958                         }
959                 }
960                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
961                                          extack);
962         } else {
963                 u32 new_metric = ifa->ifa_rt_priority;
964
965                 inet_free_ifa(ifa);
966
967                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
968                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
969                         return -EEXIST;
970                 ifa = ifa_existing;
971
972                 if (ifa->ifa_rt_priority != new_metric) {
973                         fib_modify_prefix_metric(ifa, new_metric);
974                         ifa->ifa_rt_priority = new_metric;
975                 }
976
977                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
978                 cancel_delayed_work(&check_lifetime_work);
979                 queue_delayed_work(system_power_efficient_wq,
980                                 &check_lifetime_work, 0);
981                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
982         }
983         return 0;
984 }
985
986 /*
987  *      Determine a default network mask, based on the IP address.
988  */
989
990 static int inet_abc_len(__be32 addr)
991 {
992         int rc = -1;    /* Something else, probably a multicast. */
993
994         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
995                 rc = 0;
996         else {
997                 __u32 haddr = ntohl(addr);
998                 if (IN_CLASSA(haddr))
999                         rc = 8;
1000                 else if (IN_CLASSB(haddr))
1001                         rc = 16;
1002                 else if (IN_CLASSC(haddr))
1003                         rc = 24;
1004                 else if (IN_CLASSE(haddr))
1005                         rc = 32;
1006         }
1007
1008         return rc;
1009 }
1010
1011
1012 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1013 {
1014         struct sockaddr_in sin_orig;
1015         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1016         struct in_ifaddr __rcu **ifap = NULL;
1017         struct in_device *in_dev;
1018         struct in_ifaddr *ifa = NULL;
1019         struct net_device *dev;
1020         char *colon;
1021         int ret = -EFAULT;
1022         int tryaddrmatch = 0;
1023
1024         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1025
1026         /* save original address for comparison */
1027         memcpy(&sin_orig, sin, sizeof(*sin));
1028
1029         colon = strchr(ifr->ifr_name, ':');
1030         if (colon)
1031                 *colon = 0;
1032
1033         dev_load(net, ifr->ifr_name);
1034
1035         switch (cmd) {
1036         case SIOCGIFADDR:       /* Get interface address */
1037         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1038         case SIOCGIFDSTADDR:    /* Get the destination address */
1039         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1040                 /* Note that these ioctls will not sleep,
1041                    so that we do not impose a lock.
1042                    One day we will be forced to put shlock here (I mean SMP)
1043                  */
1044                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1045                 memset(sin, 0, sizeof(*sin));
1046                 sin->sin_family = AF_INET;
1047                 break;
1048
1049         case SIOCSIFFLAGS:
1050                 ret = -EPERM;
1051                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1052                         goto out;
1053                 break;
1054         case SIOCSIFADDR:       /* Set interface address (and family) */
1055         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1056         case SIOCSIFDSTADDR:    /* Set the destination address */
1057         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1058                 ret = -EPERM;
1059                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1060                         goto out;
1061                 ret = -EINVAL;
1062                 if (sin->sin_family != AF_INET)
1063                         goto out;
1064                 break;
1065         default:
1066                 ret = -EINVAL;
1067                 goto out;
1068         }
1069
1070         rtnl_lock();
1071
1072         ret = -ENODEV;
1073         dev = __dev_get_by_name(net, ifr->ifr_name);
1074         if (!dev)
1075                 goto done;
1076
1077         if (colon)
1078                 *colon = ':';
1079
1080         in_dev = __in_dev_get_rtnl(dev);
1081         if (in_dev) {
1082                 if (tryaddrmatch) {
1083                         /* Matthias Andree */
1084                         /* compare label and address (4.4BSD style) */
1085                         /* note: we only do this for a limited set of ioctls
1086                            and only if the original address family was AF_INET.
1087                            This is checked above. */
1088
1089                         for (ifap = &in_dev->ifa_list;
1090                              (ifa = rtnl_dereference(*ifap)) != NULL;
1091                              ifap = &ifa->ifa_next) {
1092                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1093                                     sin_orig.sin_addr.s_addr ==
1094                                                         ifa->ifa_local) {
1095                                         break; /* found */
1096                                 }
1097                         }
1098                 }
1099                 /* we didn't get a match, maybe the application is
1100                    4.3BSD-style and passed in junk so we fall back to
1101                    comparing just the label */
1102                 if (!ifa) {
1103                         for (ifap = &in_dev->ifa_list;
1104                              (ifa = rtnl_dereference(*ifap)) != NULL;
1105                              ifap = &ifa->ifa_next)
1106                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1107                                         break;
1108                 }
1109         }
1110
1111         ret = -EADDRNOTAVAIL;
1112         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1113                 goto done;
1114
1115         switch (cmd) {
1116         case SIOCGIFADDR:       /* Get interface address */
1117                 ret = 0;
1118                 sin->sin_addr.s_addr = ifa->ifa_local;
1119                 break;
1120
1121         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1122                 ret = 0;
1123                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1124                 break;
1125
1126         case SIOCGIFDSTADDR:    /* Get the destination address */
1127                 ret = 0;
1128                 sin->sin_addr.s_addr = ifa->ifa_address;
1129                 break;
1130
1131         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1132                 ret = 0;
1133                 sin->sin_addr.s_addr = ifa->ifa_mask;
1134                 break;
1135
1136         case SIOCSIFFLAGS:
1137                 if (colon) {
1138                         ret = -EADDRNOTAVAIL;
1139                         if (!ifa)
1140                                 break;
1141                         ret = 0;
1142                         if (!(ifr->ifr_flags & IFF_UP))
1143                                 inet_del_ifa(in_dev, ifap, 1);
1144                         break;
1145                 }
1146                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1147                 break;
1148
1149         case SIOCSIFADDR:       /* Set interface address (and family) */
1150                 ret = -EINVAL;
1151                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1152                         break;
1153
1154                 if (!ifa) {
1155                         ret = -ENOBUFS;
1156                         ifa = inet_alloc_ifa();
1157                         if (!ifa)
1158                                 break;
1159                         INIT_HLIST_NODE(&ifa->hash);
1160                         if (colon)
1161                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1162                         else
1163                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1164                 } else {
1165                         ret = 0;
1166                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1167                                 break;
1168                         inet_del_ifa(in_dev, ifap, 0);
1169                         ifa->ifa_broadcast = 0;
1170                         ifa->ifa_scope = 0;
1171                 }
1172
1173                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1174
1175                 if (!(dev->flags & IFF_POINTOPOINT)) {
1176                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1177                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1178                         if ((dev->flags & IFF_BROADCAST) &&
1179                             ifa->ifa_prefixlen < 31)
1180                                 ifa->ifa_broadcast = ifa->ifa_address |
1181                                                      ~ifa->ifa_mask;
1182                 } else {
1183                         ifa->ifa_prefixlen = 32;
1184                         ifa->ifa_mask = inet_make_mask(32);
1185                 }
1186                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1187                 ret = inet_set_ifa(dev, ifa);
1188                 break;
1189
1190         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1191                 ret = 0;
1192                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1193                         inet_del_ifa(in_dev, ifap, 0);
1194                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1195                         inet_insert_ifa(ifa);
1196                 }
1197                 break;
1198
1199         case SIOCSIFDSTADDR:    /* Set the destination address */
1200                 ret = 0;
1201                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1202                         break;
1203                 ret = -EINVAL;
1204                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1205                         break;
1206                 ret = 0;
1207                 inet_del_ifa(in_dev, ifap, 0);
1208                 ifa->ifa_address = sin->sin_addr.s_addr;
1209                 inet_insert_ifa(ifa);
1210                 break;
1211
1212         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1213
1214                 /*
1215                  *      The mask we set must be legal.
1216                  */
1217                 ret = -EINVAL;
1218                 if (bad_mask(sin->sin_addr.s_addr, 0))
1219                         break;
1220                 ret = 0;
1221                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1222                         __be32 old_mask = ifa->ifa_mask;
1223                         inet_del_ifa(in_dev, ifap, 0);
1224                         ifa->ifa_mask = sin->sin_addr.s_addr;
1225                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1226
1227                         /* See if current broadcast address matches
1228                          * with current netmask, then recalculate
1229                          * the broadcast address. Otherwise it's a
1230                          * funny address, so don't touch it since
1231                          * the user seems to know what (s)he's doing...
1232                          */
1233                         if ((dev->flags & IFF_BROADCAST) &&
1234                             (ifa->ifa_prefixlen < 31) &&
1235                             (ifa->ifa_broadcast ==
1236                              (ifa->ifa_local|~old_mask))) {
1237                                 ifa->ifa_broadcast = (ifa->ifa_local |
1238                                                       ~sin->sin_addr.s_addr);
1239                         }
1240                         inet_insert_ifa(ifa);
1241                 }
1242                 break;
1243         }
1244 done:
1245         rtnl_unlock();
1246 out:
1247         return ret;
1248 }
1249
1250 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1251 {
1252         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1253         const struct in_ifaddr *ifa;
1254         struct ifreq ifr;
1255         int done = 0;
1256
1257         if (WARN_ON(size > sizeof(struct ifreq)))
1258                 goto out;
1259
1260         if (!in_dev)
1261                 goto out;
1262
1263         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1264                 if (!buf) {
1265                         done += size;
1266                         continue;
1267                 }
1268                 if (len < size)
1269                         break;
1270                 memset(&ifr, 0, sizeof(struct ifreq));
1271                 strcpy(ifr.ifr_name, ifa->ifa_label);
1272
1273                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1274                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1275                                                                 ifa->ifa_local;
1276
1277                 if (copy_to_user(buf + done, &ifr, size)) {
1278                         done = -EFAULT;
1279                         break;
1280                 }
1281                 len  -= size;
1282                 done += size;
1283         }
1284 out:
1285         return done;
1286 }
1287
1288 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1289                                  int scope)
1290 {
1291         const struct in_ifaddr *ifa;
1292
1293         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1294                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1295                         continue;
1296                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1297                     ifa->ifa_scope <= scope)
1298                         return ifa->ifa_local;
1299         }
1300
1301         return 0;
1302 }
1303
1304 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1305 {
1306         const struct in_ifaddr *ifa;
1307         __be32 addr = 0;
1308         unsigned char localnet_scope = RT_SCOPE_HOST;
1309         struct in_device *in_dev;
1310         struct net *net = dev_net(dev);
1311         int master_idx;
1312
1313         rcu_read_lock();
1314         in_dev = __in_dev_get_rcu(dev);
1315         if (!in_dev)
1316                 goto no_in_dev;
1317
1318         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1319                 localnet_scope = RT_SCOPE_LINK;
1320
1321         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1322                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1323                         continue;
1324                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1325                         continue;
1326                 if (!dst || inet_ifa_match(dst, ifa)) {
1327                         addr = ifa->ifa_local;
1328                         break;
1329                 }
1330                 if (!addr)
1331                         addr = ifa->ifa_local;
1332         }
1333
1334         if (addr)
1335                 goto out_unlock;
1336 no_in_dev:
1337         master_idx = l3mdev_master_ifindex_rcu(dev);
1338
1339         /* For VRFs, the VRF device takes the place of the loopback device,
1340          * with addresses on it being preferred.  Note in such cases the
1341          * loopback device will be among the devices that fail the master_idx
1342          * equality check in the loop below.
1343          */
1344         if (master_idx &&
1345             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1346             (in_dev = __in_dev_get_rcu(dev))) {
1347                 addr = in_dev_select_addr(in_dev, scope);
1348                 if (addr)
1349                         goto out_unlock;
1350         }
1351
1352         /* Not loopback addresses on loopback should be preferred
1353            in this case. It is important that lo is the first interface
1354            in dev_base list.
1355          */
1356         for_each_netdev_rcu(net, dev) {
1357                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1358                         continue;
1359
1360                 in_dev = __in_dev_get_rcu(dev);
1361                 if (!in_dev)
1362                         continue;
1363
1364                 addr = in_dev_select_addr(in_dev, scope);
1365                 if (addr)
1366                         goto out_unlock;
1367         }
1368 out_unlock:
1369         rcu_read_unlock();
1370         return addr;
1371 }
1372 EXPORT_SYMBOL(inet_select_addr);
1373
1374 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1375                               __be32 local, int scope)
1376 {
1377         unsigned char localnet_scope = RT_SCOPE_HOST;
1378         const struct in_ifaddr *ifa;
1379         __be32 addr = 0;
1380         int same = 0;
1381
1382         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1383                 localnet_scope = RT_SCOPE_LINK;
1384
1385         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1386                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1387
1388                 if (!addr &&
1389                     (local == ifa->ifa_local || !local) &&
1390                     min_scope <= scope) {
1391                         addr = ifa->ifa_local;
1392                         if (same)
1393                                 break;
1394                 }
1395                 if (!same) {
1396                         same = (!local || inet_ifa_match(local, ifa)) &&
1397                                 (!dst || inet_ifa_match(dst, ifa));
1398                         if (same && addr) {
1399                                 if (local || !dst)
1400                                         break;
1401                                 /* Is the selected addr into dst subnet? */
1402                                 if (inet_ifa_match(addr, ifa))
1403                                         break;
1404                                 /* No, then can we use new local src? */
1405                                 if (min_scope <= scope) {
1406                                         addr = ifa->ifa_local;
1407                                         break;
1408                                 }
1409                                 /* search for large dst subnet for addr */
1410                                 same = 0;
1411                         }
1412                 }
1413         }
1414
1415         return same ? addr : 0;
1416 }
1417
1418 /*
1419  * Confirm that local IP address exists using wildcards:
1420  * - net: netns to check, cannot be NULL
1421  * - in_dev: only on this interface, NULL=any interface
1422  * - dst: only in the same subnet as dst, 0=any dst
1423  * - local: address, 0=autoselect the local address
1424  * - scope: maximum allowed scope value for the local address
1425  */
1426 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1427                          __be32 dst, __be32 local, int scope)
1428 {
1429         __be32 addr = 0;
1430         struct net_device *dev;
1431
1432         if (in_dev)
1433                 return confirm_addr_indev(in_dev, dst, local, scope);
1434
1435         rcu_read_lock();
1436         for_each_netdev_rcu(net, dev) {
1437                 in_dev = __in_dev_get_rcu(dev);
1438                 if (in_dev) {
1439                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1440                         if (addr)
1441                                 break;
1442                 }
1443         }
1444         rcu_read_unlock();
1445
1446         return addr;
1447 }
1448 EXPORT_SYMBOL(inet_confirm_addr);
1449
1450 /*
1451  *      Device notifier
1452  */
1453
1454 int register_inetaddr_notifier(struct notifier_block *nb)
1455 {
1456         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1457 }
1458 EXPORT_SYMBOL(register_inetaddr_notifier);
1459
1460 int unregister_inetaddr_notifier(struct notifier_block *nb)
1461 {
1462         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1463 }
1464 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1465
1466 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1467 {
1468         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1469 }
1470 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1471
1472 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1473 {
1474         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1475             nb);
1476 }
1477 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1478
1479 /* Rename ifa_labels for a device name change. Make some effort to preserve
1480  * existing alias numbering and to create unique labels if possible.
1481 */
1482 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1483 {
1484         struct in_ifaddr *ifa;
1485         int named = 0;
1486
1487         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1488                 char old[IFNAMSIZ], *dot;
1489
1490                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1491                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492                 if (named++ == 0)
1493                         goto skip;
1494                 dot = strchr(old, ':');
1495                 if (!dot) {
1496                         sprintf(old, ":%d", named);
1497                         dot = old;
1498                 }
1499                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1500                         strcat(ifa->ifa_label, dot);
1501                 else
1502                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1503 skip:
1504                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1505         }
1506 }
1507
1508 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1509                                         struct in_device *in_dev)
1510
1511 {
1512         const struct in_ifaddr *ifa;
1513
1514         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1515                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1516                          ifa->ifa_local, dev,
1517                          ifa->ifa_local, NULL,
1518                          dev->dev_addr, NULL);
1519         }
1520 }
1521
1522 /* Called only under RTNL semaphore */
1523
1524 static int inetdev_event(struct notifier_block *this, unsigned long event,
1525                          void *ptr)
1526 {
1527         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1528         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1529
1530         ASSERT_RTNL();
1531
1532         if (!in_dev) {
1533                 if (event == NETDEV_REGISTER) {
1534                         in_dev = inetdev_init(dev);
1535                         if (IS_ERR(in_dev))
1536                                 return notifier_from_errno(PTR_ERR(in_dev));
1537                         if (dev->flags & IFF_LOOPBACK) {
1538                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1539                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1540                         }
1541                 } else if (event == NETDEV_CHANGEMTU) {
1542                         /* Re-enabling IP */
1543                         if (inetdev_valid_mtu(dev->mtu))
1544                                 in_dev = inetdev_init(dev);
1545                 }
1546                 goto out;
1547         }
1548
1549         switch (event) {
1550         case NETDEV_REGISTER:
1551                 pr_debug("%s: bug\n", __func__);
1552                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1553                 break;
1554         case NETDEV_UP:
1555                 if (!inetdev_valid_mtu(dev->mtu))
1556                         break;
1557                 if (dev->flags & IFF_LOOPBACK) {
1558                         struct in_ifaddr *ifa = inet_alloc_ifa();
1559
1560                         if (ifa) {
1561                                 INIT_HLIST_NODE(&ifa->hash);
1562                                 ifa->ifa_local =
1563                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1564                                 ifa->ifa_prefixlen = 8;
1565                                 ifa->ifa_mask = inet_make_mask(8);
1566                                 in_dev_hold(in_dev);
1567                                 ifa->ifa_dev = in_dev;
1568                                 ifa->ifa_scope = RT_SCOPE_HOST;
1569                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1570                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1571                                                  INFINITY_LIFE_TIME);
1572                                 ipv4_devconf_setall(in_dev);
1573                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1574                                 inet_insert_ifa(ifa);
1575                         }
1576                 }
1577                 ip_mc_up(in_dev);
1578                 fallthrough;
1579         case NETDEV_CHANGEADDR:
1580                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1581                         break;
1582                 fallthrough;
1583         case NETDEV_NOTIFY_PEERS:
1584                 /* Send gratuitous ARP to notify of link change */
1585                 inetdev_send_gratuitous_arp(dev, in_dev);
1586                 break;
1587         case NETDEV_DOWN:
1588                 ip_mc_down(in_dev);
1589                 break;
1590         case NETDEV_PRE_TYPE_CHANGE:
1591                 ip_mc_unmap(in_dev);
1592                 break;
1593         case NETDEV_POST_TYPE_CHANGE:
1594                 ip_mc_remap(in_dev);
1595                 break;
1596         case NETDEV_CHANGEMTU:
1597                 if (inetdev_valid_mtu(dev->mtu))
1598                         break;
1599                 /* disable IP when MTU is not enough */
1600                 fallthrough;
1601         case NETDEV_UNREGISTER:
1602                 inetdev_destroy(in_dev);
1603                 break;
1604         case NETDEV_CHANGENAME:
1605                 /* Do not notify about label change, this event is
1606                  * not interesting to applications using netlink.
1607                  */
1608                 inetdev_changename(dev, in_dev);
1609
1610                 devinet_sysctl_unregister(in_dev);
1611                 devinet_sysctl_register(in_dev);
1612                 break;
1613         }
1614 out:
1615         return NOTIFY_DONE;
1616 }
1617
1618 static struct notifier_block ip_netdev_notifier = {
1619         .notifier_call = inetdev_event,
1620 };
1621
1622 static size_t inet_nlmsg_size(void)
1623 {
1624         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1625                + nla_total_size(4) /* IFA_ADDRESS */
1626                + nla_total_size(4) /* IFA_LOCAL */
1627                + nla_total_size(4) /* IFA_BROADCAST */
1628                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1629                + nla_total_size(4)  /* IFA_FLAGS */
1630                + nla_total_size(1)  /* IFA_PROTO */
1631                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1632                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1633 }
1634
1635 static inline u32 cstamp_delta(unsigned long cstamp)
1636 {
1637         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1638 }
1639
1640 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1641                          unsigned long tstamp, u32 preferred, u32 valid)
1642 {
1643         struct ifa_cacheinfo ci;
1644
1645         ci.cstamp = cstamp_delta(cstamp);
1646         ci.tstamp = cstamp_delta(tstamp);
1647         ci.ifa_prefered = preferred;
1648         ci.ifa_valid = valid;
1649
1650         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1651 }
1652
1653 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1654                             struct inet_fill_args *args)
1655 {
1656         struct ifaddrmsg *ifm;
1657         struct nlmsghdr  *nlh;
1658         u32 preferred, valid;
1659
1660         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1661                         args->flags);
1662         if (!nlh)
1663                 return -EMSGSIZE;
1664
1665         ifm = nlmsg_data(nlh);
1666         ifm->ifa_family = AF_INET;
1667         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1668         ifm->ifa_flags = ifa->ifa_flags;
1669         ifm->ifa_scope = ifa->ifa_scope;
1670         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1671
1672         if (args->netnsid >= 0 &&
1673             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1674                 goto nla_put_failure;
1675
1676         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1677                 preferred = ifa->ifa_preferred_lft;
1678                 valid = ifa->ifa_valid_lft;
1679                 if (preferred != INFINITY_LIFE_TIME) {
1680                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1681
1682                         if (preferred > tval)
1683                                 preferred -= tval;
1684                         else
1685                                 preferred = 0;
1686                         if (valid != INFINITY_LIFE_TIME) {
1687                                 if (valid > tval)
1688                                         valid -= tval;
1689                                 else
1690                                         valid = 0;
1691                         }
1692                 }
1693         } else {
1694                 preferred = INFINITY_LIFE_TIME;
1695                 valid = INFINITY_LIFE_TIME;
1696         }
1697         if ((ifa->ifa_address &&
1698              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1699             (ifa->ifa_local &&
1700              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1701             (ifa->ifa_broadcast &&
1702              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1703             (ifa->ifa_label[0] &&
1704              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1705             (ifa->ifa_proto &&
1706              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1707             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1708             (ifa->ifa_rt_priority &&
1709              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1710             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1711                           preferred, valid))
1712                 goto nla_put_failure;
1713
1714         nlmsg_end(skb, nlh);
1715         return 0;
1716
1717 nla_put_failure:
1718         nlmsg_cancel(skb, nlh);
1719         return -EMSGSIZE;
1720 }
1721
1722 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1723                                       struct inet_fill_args *fillargs,
1724                                       struct net **tgt_net, struct sock *sk,
1725                                       struct netlink_callback *cb)
1726 {
1727         struct netlink_ext_ack *extack = cb->extack;
1728         struct nlattr *tb[IFA_MAX+1];
1729         struct ifaddrmsg *ifm;
1730         int err, i;
1731
1732         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1733                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1734                 return -EINVAL;
1735         }
1736
1737         ifm = nlmsg_data(nlh);
1738         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1739                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1740                 return -EINVAL;
1741         }
1742
1743         fillargs->ifindex = ifm->ifa_index;
1744         if (fillargs->ifindex) {
1745                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1746                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1747         }
1748
1749         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1750                                             ifa_ipv4_policy, extack);
1751         if (err < 0)
1752                 return err;
1753
1754         for (i = 0; i <= IFA_MAX; ++i) {
1755                 if (!tb[i])
1756                         continue;
1757
1758                 if (i == IFA_TARGET_NETNSID) {
1759                         struct net *net;
1760
1761                         fillargs->netnsid = nla_get_s32(tb[i]);
1762
1763                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1764                         if (IS_ERR(net)) {
1765                                 fillargs->netnsid = -1;
1766                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1767                                 return PTR_ERR(net);
1768                         }
1769                         *tgt_net = net;
1770                 } else {
1771                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1772                         return -EINVAL;
1773                 }
1774         }
1775
1776         return 0;
1777 }
1778
1779 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1780                             struct netlink_callback *cb, int s_ip_idx,
1781                             struct inet_fill_args *fillargs)
1782 {
1783         struct in_ifaddr *ifa;
1784         int ip_idx = 0;
1785         int err;
1786
1787         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1788                 if (ip_idx < s_ip_idx) {
1789                         ip_idx++;
1790                         continue;
1791                 }
1792                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1793                 if (err < 0)
1794                         goto done;
1795
1796                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1797                 ip_idx++;
1798         }
1799         err = 0;
1800
1801 done:
1802         cb->args[2] = ip_idx;
1803
1804         return err;
1805 }
1806
1807 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1808 {
1809         const struct nlmsghdr *nlh = cb->nlh;
1810         struct inet_fill_args fillargs = {
1811                 .portid = NETLINK_CB(cb->skb).portid,
1812                 .seq = nlh->nlmsg_seq,
1813                 .event = RTM_NEWADDR,
1814                 .flags = NLM_F_MULTI,
1815                 .netnsid = -1,
1816         };
1817         struct net *net = sock_net(skb->sk);
1818         struct net *tgt_net = net;
1819         int h, s_h;
1820         int idx, s_idx;
1821         int s_ip_idx;
1822         struct net_device *dev;
1823         struct in_device *in_dev;
1824         struct hlist_head *head;
1825         int err = 0;
1826
1827         s_h = cb->args[0];
1828         s_idx = idx = cb->args[1];
1829         s_ip_idx = cb->args[2];
1830
1831         if (cb->strict_check) {
1832                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1833                                                  skb->sk, cb);
1834                 if (err < 0)
1835                         goto put_tgt_net;
1836
1837                 err = 0;
1838                 if (fillargs.ifindex) {
1839                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1840                         if (!dev) {
1841                                 err = -ENODEV;
1842                                 goto put_tgt_net;
1843                         }
1844
1845                         in_dev = __in_dev_get_rtnl(dev);
1846                         if (in_dev) {
1847                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1848                                                        &fillargs);
1849                         }
1850                         goto put_tgt_net;
1851                 }
1852         }
1853
1854         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1855                 idx = 0;
1856                 head = &tgt_net->dev_index_head[h];
1857                 rcu_read_lock();
1858                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1859                           tgt_net->dev_base_seq;
1860                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1861                         if (idx < s_idx)
1862                                 goto cont;
1863                         if (h > s_h || idx > s_idx)
1864                                 s_ip_idx = 0;
1865                         in_dev = __in_dev_get_rcu(dev);
1866                         if (!in_dev)
1867                                 goto cont;
1868
1869                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1870                                                &fillargs);
1871                         if (err < 0) {
1872                                 rcu_read_unlock();
1873                                 goto done;
1874                         }
1875 cont:
1876                         idx++;
1877                 }
1878                 rcu_read_unlock();
1879         }
1880
1881 done:
1882         cb->args[0] = h;
1883         cb->args[1] = idx;
1884 put_tgt_net:
1885         if (fillargs.netnsid >= 0)
1886                 put_net(tgt_net);
1887
1888         return skb->len ? : err;
1889 }
1890
1891 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1892                       u32 portid)
1893 {
1894         struct inet_fill_args fillargs = {
1895                 .portid = portid,
1896                 .seq = nlh ? nlh->nlmsg_seq : 0,
1897                 .event = event,
1898                 .flags = 0,
1899                 .netnsid = -1,
1900         };
1901         struct sk_buff *skb;
1902         int err = -ENOBUFS;
1903         struct net *net;
1904
1905         net = dev_net(ifa->ifa_dev->dev);
1906         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1907         if (!skb)
1908                 goto errout;
1909
1910         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1911         if (err < 0) {
1912                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1913                 WARN_ON(err == -EMSGSIZE);
1914                 kfree_skb(skb);
1915                 goto errout;
1916         }
1917         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1918         return;
1919 errout:
1920         if (err < 0)
1921                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1922 }
1923
1924 static size_t inet_get_link_af_size(const struct net_device *dev,
1925                                     u32 ext_filter_mask)
1926 {
1927         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1928
1929         if (!in_dev)
1930                 return 0;
1931
1932         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1933 }
1934
1935 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1936                              u32 ext_filter_mask)
1937 {
1938         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1939         struct nlattr *nla;
1940         int i;
1941
1942         if (!in_dev)
1943                 return -ENODATA;
1944
1945         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1946         if (!nla)
1947                 return -EMSGSIZE;
1948
1949         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1950                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1951
1952         return 0;
1953 }
1954
1955 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1956         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1957 };
1958
1959 static int inet_validate_link_af(const struct net_device *dev,
1960                                  const struct nlattr *nla,
1961                                  struct netlink_ext_ack *extack)
1962 {
1963         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1964         int err, rem;
1965
1966         if (dev && !__in_dev_get_rtnl(dev))
1967                 return -EAFNOSUPPORT;
1968
1969         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1970                                           inet_af_policy, extack);
1971         if (err < 0)
1972                 return err;
1973
1974         if (tb[IFLA_INET_CONF]) {
1975                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1976                         int cfgid = nla_type(a);
1977
1978                         if (nla_len(a) < 4)
1979                                 return -EINVAL;
1980
1981                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1982                                 return -EINVAL;
1983                 }
1984         }
1985
1986         return 0;
1987 }
1988
1989 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
1990                             struct netlink_ext_ack *extack)
1991 {
1992         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1993         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1994         int rem;
1995
1996         if (!in_dev)
1997                 return -EAFNOSUPPORT;
1998
1999         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2000                 return -EINVAL;
2001
2002         if (tb[IFLA_INET_CONF]) {
2003                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2004                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2005         }
2006
2007         return 0;
2008 }
2009
2010 static int inet_netconf_msgsize_devconf(int type)
2011 {
2012         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2013                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2014         bool all = false;
2015
2016         if (type == NETCONFA_ALL)
2017                 all = true;
2018
2019         if (all || type == NETCONFA_FORWARDING)
2020                 size += nla_total_size(4);
2021         if (all || type == NETCONFA_RP_FILTER)
2022                 size += nla_total_size(4);
2023         if (all || type == NETCONFA_MC_FORWARDING)
2024                 size += nla_total_size(4);
2025         if (all || type == NETCONFA_BC_FORWARDING)
2026                 size += nla_total_size(4);
2027         if (all || type == NETCONFA_PROXY_NEIGH)
2028                 size += nla_total_size(4);
2029         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2030                 size += nla_total_size(4);
2031
2032         return size;
2033 }
2034
2035 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2036                                      struct ipv4_devconf *devconf, u32 portid,
2037                                      u32 seq, int event, unsigned int flags,
2038                                      int type)
2039 {
2040         struct nlmsghdr  *nlh;
2041         struct netconfmsg *ncm;
2042         bool all = false;
2043
2044         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2045                         flags);
2046         if (!nlh)
2047                 return -EMSGSIZE;
2048
2049         if (type == NETCONFA_ALL)
2050                 all = true;
2051
2052         ncm = nlmsg_data(nlh);
2053         ncm->ncm_family = AF_INET;
2054
2055         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2056                 goto nla_put_failure;
2057
2058         if (!devconf)
2059                 goto out;
2060
2061         if ((all || type == NETCONFA_FORWARDING) &&
2062             nla_put_s32(skb, NETCONFA_FORWARDING,
2063                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2064                 goto nla_put_failure;
2065         if ((all || type == NETCONFA_RP_FILTER) &&
2066             nla_put_s32(skb, NETCONFA_RP_FILTER,
2067                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2068                 goto nla_put_failure;
2069         if ((all || type == NETCONFA_MC_FORWARDING) &&
2070             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2071                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2072                 goto nla_put_failure;
2073         if ((all || type == NETCONFA_BC_FORWARDING) &&
2074             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2075                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2076                 goto nla_put_failure;
2077         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2078             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2079                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2080                 goto nla_put_failure;
2081         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2082             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2083                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2084                 goto nla_put_failure;
2085
2086 out:
2087         nlmsg_end(skb, nlh);
2088         return 0;
2089
2090 nla_put_failure:
2091         nlmsg_cancel(skb, nlh);
2092         return -EMSGSIZE;
2093 }
2094
2095 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2096                                  int ifindex, struct ipv4_devconf *devconf)
2097 {
2098         struct sk_buff *skb;
2099         int err = -ENOBUFS;
2100
2101         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2102         if (!skb)
2103                 goto errout;
2104
2105         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2106                                         event, 0, type);
2107         if (err < 0) {
2108                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2109                 WARN_ON(err == -EMSGSIZE);
2110                 kfree_skb(skb);
2111                 goto errout;
2112         }
2113         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2114         return;
2115 errout:
2116         if (err < 0)
2117                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2118 }
2119
2120 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2121         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2122         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2123         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2124         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2125         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2126 };
2127
2128 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2129                                       const struct nlmsghdr *nlh,
2130                                       struct nlattr **tb,
2131                                       struct netlink_ext_ack *extack)
2132 {
2133         int i, err;
2134
2135         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2136                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2137                 return -EINVAL;
2138         }
2139
2140         if (!netlink_strict_get_check(skb))
2141                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2142                                               tb, NETCONFA_MAX,
2143                                               devconf_ipv4_policy, extack);
2144
2145         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2146                                             tb, NETCONFA_MAX,
2147                                             devconf_ipv4_policy, extack);
2148         if (err)
2149                 return err;
2150
2151         for (i = 0; i <= NETCONFA_MAX; i++) {
2152                 if (!tb[i])
2153                         continue;
2154
2155                 switch (i) {
2156                 case NETCONFA_IFINDEX:
2157                         break;
2158                 default:
2159                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2160                         return -EINVAL;
2161                 }
2162         }
2163
2164         return 0;
2165 }
2166
2167 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2168                                     struct nlmsghdr *nlh,
2169                                     struct netlink_ext_ack *extack)
2170 {
2171         struct net *net = sock_net(in_skb->sk);
2172         struct nlattr *tb[NETCONFA_MAX+1];
2173         struct sk_buff *skb;
2174         struct ipv4_devconf *devconf;
2175         struct in_device *in_dev;
2176         struct net_device *dev;
2177         int ifindex;
2178         int err;
2179
2180         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2181         if (err)
2182                 goto errout;
2183
2184         err = -EINVAL;
2185         if (!tb[NETCONFA_IFINDEX])
2186                 goto errout;
2187
2188         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2189         switch (ifindex) {
2190         case NETCONFA_IFINDEX_ALL:
2191                 devconf = net->ipv4.devconf_all;
2192                 break;
2193         case NETCONFA_IFINDEX_DEFAULT:
2194                 devconf = net->ipv4.devconf_dflt;
2195                 break;
2196         default:
2197                 dev = __dev_get_by_index(net, ifindex);
2198                 if (!dev)
2199                         goto errout;
2200                 in_dev = __in_dev_get_rtnl(dev);
2201                 if (!in_dev)
2202                         goto errout;
2203                 devconf = &in_dev->cnf;
2204                 break;
2205         }
2206
2207         err = -ENOBUFS;
2208         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2209         if (!skb)
2210                 goto errout;
2211
2212         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2213                                         NETLINK_CB(in_skb).portid,
2214                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2215                                         NETCONFA_ALL);
2216         if (err < 0) {
2217                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218                 WARN_ON(err == -EMSGSIZE);
2219                 kfree_skb(skb);
2220                 goto errout;
2221         }
2222         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2223 errout:
2224         return err;
2225 }
2226
2227 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2228                                      struct netlink_callback *cb)
2229 {
2230         const struct nlmsghdr *nlh = cb->nlh;
2231         struct net *net = sock_net(skb->sk);
2232         int h, s_h;
2233         int idx, s_idx;
2234         struct net_device *dev;
2235         struct in_device *in_dev;
2236         struct hlist_head *head;
2237
2238         if (cb->strict_check) {
2239                 struct netlink_ext_ack *extack = cb->extack;
2240                 struct netconfmsg *ncm;
2241
2242                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2243                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2244                         return -EINVAL;
2245                 }
2246
2247                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2248                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2249                         return -EINVAL;
2250                 }
2251         }
2252
2253         s_h = cb->args[0];
2254         s_idx = idx = cb->args[1];
2255
2256         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2257                 idx = 0;
2258                 head = &net->dev_index_head[h];
2259                 rcu_read_lock();
2260                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2261                           net->dev_base_seq;
2262                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2263                         if (idx < s_idx)
2264                                 goto cont;
2265                         in_dev = __in_dev_get_rcu(dev);
2266                         if (!in_dev)
2267                                 goto cont;
2268
2269                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2270                                                       &in_dev->cnf,
2271                                                       NETLINK_CB(cb->skb).portid,
2272                                                       nlh->nlmsg_seq,
2273                                                       RTM_NEWNETCONF,
2274                                                       NLM_F_MULTI,
2275                                                       NETCONFA_ALL) < 0) {
2276                                 rcu_read_unlock();
2277                                 goto done;
2278                         }
2279                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2280 cont:
2281                         idx++;
2282                 }
2283                 rcu_read_unlock();
2284         }
2285         if (h == NETDEV_HASHENTRIES) {
2286                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2287                                               net->ipv4.devconf_all,
2288                                               NETLINK_CB(cb->skb).portid,
2289                                               nlh->nlmsg_seq,
2290                                               RTM_NEWNETCONF, NLM_F_MULTI,
2291                                               NETCONFA_ALL) < 0)
2292                         goto done;
2293                 else
2294                         h++;
2295         }
2296         if (h == NETDEV_HASHENTRIES + 1) {
2297                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2298                                               net->ipv4.devconf_dflt,
2299                                               NETLINK_CB(cb->skb).portid,
2300                                               nlh->nlmsg_seq,
2301                                               RTM_NEWNETCONF, NLM_F_MULTI,
2302                                               NETCONFA_ALL) < 0)
2303                         goto done;
2304                 else
2305                         h++;
2306         }
2307 done:
2308         cb->args[0] = h;
2309         cb->args[1] = idx;
2310
2311         return skb->len;
2312 }
2313
2314 #ifdef CONFIG_SYSCTL
2315
2316 static void devinet_copy_dflt_conf(struct net *net, int i)
2317 {
2318         struct net_device *dev;
2319
2320         rcu_read_lock();
2321         for_each_netdev_rcu(net, dev) {
2322                 struct in_device *in_dev;
2323
2324                 in_dev = __in_dev_get_rcu(dev);
2325                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2326                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2327         }
2328         rcu_read_unlock();
2329 }
2330
2331 /* called with RTNL locked */
2332 static void inet_forward_change(struct net *net)
2333 {
2334         struct net_device *dev;
2335         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2336
2337         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2338         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2339         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2340                                     NETCONFA_FORWARDING,
2341                                     NETCONFA_IFINDEX_ALL,
2342                                     net->ipv4.devconf_all);
2343         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344                                     NETCONFA_FORWARDING,
2345                                     NETCONFA_IFINDEX_DEFAULT,
2346                                     net->ipv4.devconf_dflt);
2347
2348         for_each_netdev(net, dev) {
2349                 struct in_device *in_dev;
2350
2351                 if (on)
2352                         dev_disable_lro(dev);
2353
2354                 in_dev = __in_dev_get_rtnl(dev);
2355                 if (in_dev) {
2356                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2357                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2358                                                     NETCONFA_FORWARDING,
2359                                                     dev->ifindex, &in_dev->cnf);
2360                 }
2361         }
2362 }
2363
2364 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2365 {
2366         if (cnf == net->ipv4.devconf_dflt)
2367                 return NETCONFA_IFINDEX_DEFAULT;
2368         else if (cnf == net->ipv4.devconf_all)
2369                 return NETCONFA_IFINDEX_ALL;
2370         else {
2371                 struct in_device *idev
2372                         = container_of(cnf, struct in_device, cnf);
2373                 return idev->dev->ifindex;
2374         }
2375 }
2376
2377 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2378                              void *buffer, size_t *lenp, loff_t *ppos)
2379 {
2380         int old_value = *(int *)ctl->data;
2381         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382         int new_value = *(int *)ctl->data;
2383
2384         if (write) {
2385                 struct ipv4_devconf *cnf = ctl->extra1;
2386                 struct net *net = ctl->extra2;
2387                 int i = (int *)ctl->data - cnf->data;
2388                 int ifindex;
2389
2390                 set_bit(i, cnf->state);
2391
2392                 if (cnf == net->ipv4.devconf_dflt)
2393                         devinet_copy_dflt_conf(net, i);
2394                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2395                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2396                         if ((new_value == 0) && (old_value != 0))
2397                                 rt_cache_flush(net);
2398
2399                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2400                     new_value != old_value)
2401                         rt_cache_flush(net);
2402
2403                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2404                     new_value != old_value) {
2405                         ifindex = devinet_conf_ifindex(net, cnf);
2406                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2407                                                     NETCONFA_RP_FILTER,
2408                                                     ifindex, cnf);
2409                 }
2410                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2411                     new_value != old_value) {
2412                         ifindex = devinet_conf_ifindex(net, cnf);
2413                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2414                                                     NETCONFA_PROXY_NEIGH,
2415                                                     ifindex, cnf);
2416                 }
2417                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2418                     new_value != old_value) {
2419                         ifindex = devinet_conf_ifindex(net, cnf);
2420                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2421                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2422                                                     ifindex, cnf);
2423                 }
2424         }
2425
2426         return ret;
2427 }
2428
2429 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2430                                   void *buffer, size_t *lenp, loff_t *ppos)
2431 {
2432         int *valp = ctl->data;
2433         int val = *valp;
2434         loff_t pos = *ppos;
2435         struct net *net = ctl->extra2;
2436         int ret;
2437
2438         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2439                 return -EPERM;
2440
2441         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2442
2443         if (write && *valp != val) {
2444                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2445                         if (!rtnl_trylock()) {
2446                                 /* Restore the original values before restarting */
2447                                 *valp = val;
2448                                 *ppos = pos;
2449                                 return restart_syscall();
2450                         }
2451                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2452                                 inet_forward_change(net);
2453                         } else {
2454                                 struct ipv4_devconf *cnf = ctl->extra1;
2455                                 struct in_device *idev =
2456                                         container_of(cnf, struct in_device, cnf);
2457                                 if (*valp)
2458                                         dev_disable_lro(idev->dev);
2459                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2460                                                             NETCONFA_FORWARDING,
2461                                                             idev->dev->ifindex,
2462                                                             cnf);
2463                         }
2464                         rtnl_unlock();
2465                         rt_cache_flush(net);
2466                 } else
2467                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2468                                                     NETCONFA_FORWARDING,
2469                                                     NETCONFA_IFINDEX_DEFAULT,
2470                                                     net->ipv4.devconf_dflt);
2471         }
2472
2473         return ret;
2474 }
2475
2476 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2477                                 void *buffer, size_t *lenp, loff_t *ppos)
2478 {
2479         int *valp = ctl->data;
2480         int val = *valp;
2481         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2482         struct net *net = ctl->extra2;
2483
2484         if (write && *valp != val)
2485                 rt_cache_flush(net);
2486
2487         return ret;
2488 }
2489
2490 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2491         { \
2492                 .procname       = name, \
2493                 .data           = ipv4_devconf.data + \
2494                                   IPV4_DEVCONF_ ## attr - 1, \
2495                 .maxlen         = sizeof(int), \
2496                 .mode           = mval, \
2497                 .proc_handler   = proc, \
2498                 .extra1         = &ipv4_devconf, \
2499         }
2500
2501 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2502         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2503
2504 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2505         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2506
2507 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2508         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2509
2510 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2511         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2512
2513 static struct devinet_sysctl_table {
2514         struct ctl_table_header *sysctl_header;
2515         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2516 } devinet_sysctl = {
2517         .devinet_vars = {
2518                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2519                                              devinet_sysctl_forward),
2520                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2521                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2522
2523                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2524                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2525                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2526                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2527                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2528                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2529                                         "accept_source_route"),
2530                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2531                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2532                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2533                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2534                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2535                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2536                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2537                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2538                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2539                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2540                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2541                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2542                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2543                                         "arp_evict_nocarrier"),
2544                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2545                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2546                                         "force_igmp_version"),
2547                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2548                                         "igmpv2_unsolicited_report_interval"),
2549                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2550                                         "igmpv3_unsolicited_report_interval"),
2551                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2552                                         "ignore_routes_with_linkdown"),
2553                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2554                                         "drop_gratuitous_arp"),
2555
2556                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2557                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2558                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2559                                               "promote_secondaries"),
2560                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2561                                               "route_localnet"),
2562                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2563                                               "drop_unicast_in_l2_multicast"),
2564         },
2565 };
2566
2567 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2568                                      int ifindex, struct ipv4_devconf *p)
2569 {
2570         int i;
2571         struct devinet_sysctl_table *t;
2572         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2573
2574         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2575         if (!t)
2576                 goto out;
2577
2578         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2579                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2580                 t->devinet_vars[i].extra1 = p;
2581                 t->devinet_vars[i].extra2 = net;
2582         }
2583
2584         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2585
2586         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2587         if (!t->sysctl_header)
2588                 goto free;
2589
2590         p->sysctl = t;
2591
2592         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2593                                     ifindex, p);
2594         return 0;
2595
2596 free:
2597         kfree(t);
2598 out:
2599         return -ENOMEM;
2600 }
2601
2602 static void __devinet_sysctl_unregister(struct net *net,
2603                                         struct ipv4_devconf *cnf, int ifindex)
2604 {
2605         struct devinet_sysctl_table *t = cnf->sysctl;
2606
2607         if (t) {
2608                 cnf->sysctl = NULL;
2609                 unregister_net_sysctl_table(t->sysctl_header);
2610                 kfree(t);
2611         }
2612
2613         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2614 }
2615
2616 static int devinet_sysctl_register(struct in_device *idev)
2617 {
2618         int err;
2619
2620         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2621                 return -EINVAL;
2622
2623         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2624         if (err)
2625                 return err;
2626         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2627                                         idev->dev->ifindex, &idev->cnf);
2628         if (err)
2629                 neigh_sysctl_unregister(idev->arp_parms);
2630         return err;
2631 }
2632
2633 static void devinet_sysctl_unregister(struct in_device *idev)
2634 {
2635         struct net *net = dev_net(idev->dev);
2636
2637         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2638         neigh_sysctl_unregister(idev->arp_parms);
2639 }
2640
2641 static struct ctl_table ctl_forward_entry[] = {
2642         {
2643                 .procname       = "ip_forward",
2644                 .data           = &ipv4_devconf.data[
2645                                         IPV4_DEVCONF_FORWARDING - 1],
2646                 .maxlen         = sizeof(int),
2647                 .mode           = 0644,
2648                 .proc_handler   = devinet_sysctl_forward,
2649                 .extra1         = &ipv4_devconf,
2650                 .extra2         = &init_net,
2651         },
2652         { },
2653 };
2654 #endif
2655
2656 static __net_init int devinet_init_net(struct net *net)
2657 {
2658         int err;
2659         struct ipv4_devconf *all, *dflt;
2660 #ifdef CONFIG_SYSCTL
2661         struct ctl_table *tbl;
2662         struct ctl_table_header *forw_hdr;
2663 #endif
2664
2665         err = -ENOMEM;
2666         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2667         if (!all)
2668                 goto err_alloc_all;
2669
2670         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2671         if (!dflt)
2672                 goto err_alloc_dflt;
2673
2674 #ifdef CONFIG_SYSCTL
2675         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2676         if (!tbl)
2677                 goto err_alloc_ctl;
2678
2679         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2680         tbl[0].extra1 = all;
2681         tbl[0].extra2 = net;
2682 #endif
2683
2684         if (!net_eq(net, &init_net)) {
2685                 switch (net_inherit_devconf()) {
2686                 case 3:
2687                         /* copy from the current netns */
2688                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2689                                sizeof(ipv4_devconf));
2690                         memcpy(dflt,
2691                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2692                                sizeof(ipv4_devconf_dflt));
2693                         break;
2694                 case 0:
2695                 case 1:
2696                         /* copy from init_net */
2697                         memcpy(all, init_net.ipv4.devconf_all,
2698                                sizeof(ipv4_devconf));
2699                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2700                                sizeof(ipv4_devconf_dflt));
2701                         break;
2702                 case 2:
2703                         /* use compiled values */
2704                         break;
2705                 }
2706         }
2707
2708 #ifdef CONFIG_SYSCTL
2709         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2710         if (err < 0)
2711                 goto err_reg_all;
2712
2713         err = __devinet_sysctl_register(net, "default",
2714                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2715         if (err < 0)
2716                 goto err_reg_dflt;
2717
2718         err = -ENOMEM;
2719         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2720         if (!forw_hdr)
2721                 goto err_reg_ctl;
2722         net->ipv4.forw_hdr = forw_hdr;
2723 #endif
2724
2725         net->ipv4.devconf_all = all;
2726         net->ipv4.devconf_dflt = dflt;
2727         return 0;
2728
2729 #ifdef CONFIG_SYSCTL
2730 err_reg_ctl:
2731         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2732 err_reg_dflt:
2733         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2734 err_reg_all:
2735         kfree(tbl);
2736 err_alloc_ctl:
2737 #endif
2738         kfree(dflt);
2739 err_alloc_dflt:
2740         kfree(all);
2741 err_alloc_all:
2742         return err;
2743 }
2744
2745 static __net_exit void devinet_exit_net(struct net *net)
2746 {
2747 #ifdef CONFIG_SYSCTL
2748         struct ctl_table *tbl;
2749
2750         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2751         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2752         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2753                                     NETCONFA_IFINDEX_DEFAULT);
2754         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2755                                     NETCONFA_IFINDEX_ALL);
2756         kfree(tbl);
2757 #endif
2758         kfree(net->ipv4.devconf_dflt);
2759         kfree(net->ipv4.devconf_all);
2760 }
2761
2762 static __net_initdata struct pernet_operations devinet_ops = {
2763         .init = devinet_init_net,
2764         .exit = devinet_exit_net,
2765 };
2766
2767 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2768         .family           = AF_INET,
2769         .fill_link_af     = inet_fill_link_af,
2770         .get_link_af_size = inet_get_link_af_size,
2771         .validate_link_af = inet_validate_link_af,
2772         .set_link_af      = inet_set_link_af,
2773 };
2774
2775 void __init devinet_init(void)
2776 {
2777         int i;
2778
2779         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2780                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2781
2782         register_pernet_subsys(&devinet_ops);
2783         register_netdevice_notifier(&ip_netdev_notifier);
2784
2785         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2786
2787         rtnl_af_register(&inet_af_ops);
2788
2789         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2790         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2791         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2792         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2793                       inet_netconf_dump_devconf, 0);
2794 }