Linux 6.9-rc1
[linux-2.6-microblaze.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227         if (ifa->ifa_dev)
228                 in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239         struct in_device *idev = container_of(head, struct in_device, rcu_head);
240
241         kfree(rcu_dereference_protected(idev->mc_hash, 1));
242         kfree(idev);
243 }
244
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247         struct net_device *dev = idev->dev;
248
249         WARN_ON(idev->ifa_list);
250         WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254         netdev_put(dev, &idev->dev_tracker);
255         if (!idev->dead)
256                 pr_err("Freeing alive in_device %p\n", idev);
257         else
258                 call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264         struct in_device *in_dev;
265         int err = -ENOMEM;
266
267         ASSERT_RTNL();
268
269         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270         if (!in_dev)
271                 goto out;
272         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273                         sizeof(in_dev->cnf));
274         in_dev->cnf.sysctl = NULL;
275         in_dev->dev = dev;
276         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277         if (!in_dev->arp_parms)
278                 goto out_kfree;
279         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280                 dev_disable_lro(dev);
281         /* Reference in_dev->dev */
282         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283         /* Account for reference dev->ip_ptr (below) */
284         refcount_set(&in_dev->refcnt, 1);
285
286         err = devinet_sysctl_register(in_dev);
287         if (err) {
288                 in_dev->dead = 1;
289                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290                 in_dev_put(in_dev);
291                 in_dev = NULL;
292                 goto out;
293         }
294         ip_mc_init_dev(in_dev);
295         if (dev->flags & IFF_UP)
296                 ip_mc_up(in_dev);
297
298         /* we can receive as soon as ip_ptr is set -- do this last */
299         rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301         return in_dev ?: ERR_PTR(err);
302 out_kfree:
303         kfree(in_dev);
304         in_dev = NULL;
305         goto out;
306 }
307
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310         struct net_device *dev;
311         struct in_ifaddr *ifa;
312
313         ASSERT_RTNL();
314
315         dev = in_dev->dev;
316
317         in_dev->dead = 1;
318
319         ip_mc_destroy_dev(in_dev);
320
321         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323                 inet_free_ifa(ifa);
324         }
325
326         RCU_INIT_POINTER(dev->ip_ptr, NULL);
327
328         devinet_sysctl_unregister(in_dev);
329         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330         arp_ifdown(dev);
331
332         in_dev_put(in_dev);
333 }
334
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337         const struct in_ifaddr *ifa;
338
339         rcu_read_lock();
340         in_dev_for_each_ifa_rcu(ifa, in_dev) {
341                 if (inet_ifa_match(a, ifa)) {
342                         if (!b || inet_ifa_match(b, ifa)) {
343                                 rcu_read_unlock();
344                                 return 1;
345                         }
346                 }
347         }
348         rcu_read_unlock();
349         return 0;
350 }
351
352 static void __inet_del_ifa(struct in_device *in_dev,
353                            struct in_ifaddr __rcu **ifap,
354                            int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356         struct in_ifaddr *promote = NULL;
357         struct in_ifaddr *ifa, *ifa1;
358         struct in_ifaddr __rcu **last_prim;
359         struct in_ifaddr *prev_prom = NULL;
360         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361
362         ASSERT_RTNL();
363
364         ifa1 = rtnl_dereference(*ifap);
365         last_prim = ifap;
366         if (in_dev->dead)
367                 goto no_promotions;
368
369         /* 1. Deleting primary ifaddr forces deletion all secondaries
370          * unless alias promotion is set
371          **/
372
373         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375
376                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378                             ifa1->ifa_scope <= ifa->ifa_scope)
379                                 last_prim = &ifa->ifa_next;
380
381                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382                             ifa1->ifa_mask != ifa->ifa_mask ||
383                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
384                                 ifap1 = &ifa->ifa_next;
385                                 prev_prom = ifa;
386                                 continue;
387                         }
388
389                         if (!do_promote) {
390                                 inet_hash_remove(ifa);
391                                 *ifap1 = ifa->ifa_next;
392
393                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394                                 blocking_notifier_call_chain(&inetaddr_chain,
395                                                 NETDEV_DOWN, ifa);
396                                 inet_free_ifa(ifa);
397                         } else {
398                                 promote = ifa;
399                                 break;
400                         }
401                 }
402         }
403
404         /* On promotion all secondaries from subnet are changing
405          * the primary IP, we must remove all their routes silently
406          * and later to add them back with new prefsrc. Do this
407          * while all addresses are on the device list.
408          */
409         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410                 if (ifa1->ifa_mask == ifa->ifa_mask &&
411                     inet_ifa_match(ifa1->ifa_address, ifa))
412                         fib_del_ifaddr(ifa, ifa1);
413         }
414
415 no_promotions:
416         /* 2. Unlink it */
417
418         *ifap = ifa1->ifa_next;
419         inet_hash_remove(ifa1);
420
421         /* 3. Announce address deletion */
422
423         /* Send message first, then call notifier.
424            At first sight, FIB update triggered by notifier
425            will refer to already deleted ifaddr, that could confuse
426            netlink listeners. It is not true: look, gated sees
427            that route deleted and if it still thinks that ifaddr
428            is valid, it will try to restore deleted routes... Grr.
429            So that, this order is correct.
430          */
431         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433
434         if (promote) {
435                 struct in_ifaddr *next_sec;
436
437                 next_sec = rtnl_dereference(promote->ifa_next);
438                 if (prev_prom) {
439                         struct in_ifaddr *last_sec;
440
441                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442
443                         last_sec = rtnl_dereference(*last_prim);
444                         rcu_assign_pointer(promote->ifa_next, last_sec);
445                         rcu_assign_pointer(*last_prim, promote);
446                 }
447
448                 promote->ifa_flags &= ~IFA_F_SECONDARY;
449                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450                 blocking_notifier_call_chain(&inetaddr_chain,
451                                 NETDEV_UP, promote);
452                 for (ifa = next_sec; ifa;
453                      ifa = rtnl_dereference(ifa->ifa_next)) {
454                         if (ifa1->ifa_mask != ifa->ifa_mask ||
455                             !inet_ifa_match(ifa1->ifa_address, ifa))
456                                         continue;
457                         fib_add_ifaddr(ifa);
458                 }
459
460         }
461         if (destroy)
462                 inet_free_ifa(ifa1);
463 }
464
465 static void inet_del_ifa(struct in_device *in_dev,
466                          struct in_ifaddr __rcu **ifap,
467                          int destroy)
468 {
469         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471
472 static void check_lifetime(struct work_struct *work);
473
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477                              u32 portid, struct netlink_ext_ack *extack)
478 {
479         struct in_ifaddr __rcu **last_primary, **ifap;
480         struct in_device *in_dev = ifa->ifa_dev;
481         struct in_validator_info ivi;
482         struct in_ifaddr *ifa1;
483         int ret;
484
485         ASSERT_RTNL();
486
487         if (!ifa->ifa_local) {
488                 inet_free_ifa(ifa);
489                 return 0;
490         }
491
492         ifa->ifa_flags &= ~IFA_F_SECONDARY;
493         last_primary = &in_dev->ifa_list;
494
495         /* Don't set IPv6 only flags to IPv4 addresses */
496         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497
498         ifap = &in_dev->ifa_list;
499         ifa1 = rtnl_dereference(*ifap);
500
501         while (ifa1) {
502                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503                     ifa->ifa_scope <= ifa1->ifa_scope)
504                         last_primary = &ifa1->ifa_next;
505                 if (ifa1->ifa_mask == ifa->ifa_mask &&
506                     inet_ifa_match(ifa1->ifa_address, ifa)) {
507                         if (ifa1->ifa_local == ifa->ifa_local) {
508                                 inet_free_ifa(ifa);
509                                 return -EEXIST;
510                         }
511                         if (ifa1->ifa_scope != ifa->ifa_scope) {
512                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513                                 inet_free_ifa(ifa);
514                                 return -EINVAL;
515                         }
516                         ifa->ifa_flags |= IFA_F_SECONDARY;
517                 }
518
519                 ifap = &ifa1->ifa_next;
520                 ifa1 = rtnl_dereference(*ifap);
521         }
522
523         /* Allow any devices that wish to register ifaddr validtors to weigh
524          * in now, before changes are committed.  The rntl lock is serializing
525          * access here, so the state should not change between a validator call
526          * and a final notify on commit.  This isn't invoked on promotion under
527          * the assumption that validators are checking the address itself, and
528          * not the flags.
529          */
530         ivi.ivi_addr = ifa->ifa_address;
531         ivi.ivi_dev = ifa->ifa_dev;
532         ivi.extack = extack;
533         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534                                            NETDEV_UP, &ivi);
535         ret = notifier_to_errno(ret);
536         if (ret) {
537                 inet_free_ifa(ifa);
538                 return ret;
539         }
540
541         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542                 ifap = last_primary;
543
544         rcu_assign_pointer(ifa->ifa_next, *ifap);
545         rcu_assign_pointer(*ifap, ifa);
546
547         inet_hash_insert(dev_net(in_dev->dev), ifa);
548
549         cancel_delayed_work(&check_lifetime_work);
550         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551
552         /* Send message first, then call notifier.
553            Notifier will trigger FIB update, so that
554            listeners of netlink will know about new ifaddr */
555         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557
558         return 0;
559 }
560
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563         return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568         struct in_device *in_dev = __in_dev_get_rtnl(dev);
569
570         ASSERT_RTNL();
571
572         if (!in_dev) {
573                 inet_free_ifa(ifa);
574                 return -ENOBUFS;
575         }
576         ipv4_devconf_setall(in_dev);
577         neigh_parms_data_state_setall(in_dev->arp_parms);
578         if (ifa->ifa_dev != in_dev) {
579                 WARN_ON(ifa->ifa_dev);
580                 in_dev_hold(in_dev);
581                 ifa->ifa_dev = in_dev;
582         }
583         if (ipv4_is_loopback(ifa->ifa_local))
584                 ifa->ifa_scope = RT_SCOPE_HOST;
585         return inet_insert_ifa(ifa);
586 }
587
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593         struct net_device *dev;
594         struct in_device *in_dev = NULL;
595
596         rcu_read_lock();
597         dev = dev_get_by_index_rcu(net, ifindex);
598         if (dev)
599                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600         rcu_read_unlock();
601         return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604
605 /* Called only from RTNL semaphored context. No locks. */
606
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608                                     __be32 mask)
609 {
610         struct in_ifaddr *ifa;
611
612         ASSERT_RTNL();
613
614         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616                         return ifa;
617         }
618         return NULL;
619 }
620
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622                                  const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625         struct ip_mreqn mreq = {
626                 .imr_multiaddr.s_addr = ifa->ifa_address,
627                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
628         };
629         struct sock *sk = net->ipv4.mc_autojoin_sk;
630         int ret;
631
632         ASSERT_RTNL();
633
634         lock_sock(sk);
635         if (join)
636                 ret = ip_mc_join_group(sk, &mreq);
637         else
638                 ret = ip_mc_leave_group(sk, &mreq);
639         release_sock(sk);
640
641         return ret;
642 #else
643         return -EOPNOTSUPP;
644 #endif
645 }
646
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648                             struct netlink_ext_ack *extack)
649 {
650         struct net *net = sock_net(skb->sk);
651         struct in_ifaddr __rcu **ifap;
652         struct nlattr *tb[IFA_MAX+1];
653         struct in_device *in_dev;
654         struct ifaddrmsg *ifm;
655         struct in_ifaddr *ifa;
656         int err;
657
658         ASSERT_RTNL();
659
660         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661                                      ifa_ipv4_policy, extack);
662         if (err < 0)
663                 goto errout;
664
665         ifm = nlmsg_data(nlh);
666         in_dev = inetdev_by_index(net, ifm->ifa_index);
667         if (!in_dev) {
668                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669                 err = -ENODEV;
670                 goto errout;
671         }
672
673         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674              ifap = &ifa->ifa_next) {
675                 if (tb[IFA_LOCAL] &&
676                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677                         continue;
678
679                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680                         continue;
681
682                 if (tb[IFA_ADDRESS] &&
683                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685                         continue;
686
687                 if (ipv4_is_multicast(ifa->ifa_address))
688                         ip_mc_autojoin_config(net, false, ifa);
689                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690                 return 0;
691         }
692
693         NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694         err = -EADDRNOTAVAIL;
695 errout:
696         return err;
697 }
698
699 #define INFINITY_LIFE_TIME      0xFFFFFFFF
700
701 static void check_lifetime(struct work_struct *work)
702 {
703         unsigned long now, next, next_sec, next_sched;
704         struct in_ifaddr *ifa;
705         struct hlist_node *n;
706         int i;
707
708         now = jiffies;
709         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710
711         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712                 bool change_needed = false;
713
714                 rcu_read_lock();
715                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716                         unsigned long age, tstamp;
717                         u32 preferred_lft;
718                         u32 valid_lft;
719                         u32 flags;
720
721                         flags = READ_ONCE(ifa->ifa_flags);
722                         if (flags & IFA_F_PERMANENT)
723                                 continue;
724
725                         preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
726                         valid_lft = READ_ONCE(ifa->ifa_valid_lft);
727                         tstamp = READ_ONCE(ifa->ifa_tstamp);
728                         /* We try to batch several events at once. */
729                         age = (now - tstamp +
730                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
731
732                         if (valid_lft != INFINITY_LIFE_TIME &&
733                             age >= valid_lft) {
734                                 change_needed = true;
735                         } else if (preferred_lft ==
736                                    INFINITY_LIFE_TIME) {
737                                 continue;
738                         } else if (age >= preferred_lft) {
739                                 if (time_before(tstamp + valid_lft * HZ, next))
740                                         next = tstamp + valid_lft * HZ;
741
742                                 if (!(flags & IFA_F_DEPRECATED))
743                                         change_needed = true;
744                         } else if (time_before(tstamp + preferred_lft * HZ,
745                                                next)) {
746                                 next = tstamp + preferred_lft * HZ;
747                         }
748                 }
749                 rcu_read_unlock();
750                 if (!change_needed)
751                         continue;
752                 rtnl_lock();
753                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
754                         unsigned long age;
755
756                         if (ifa->ifa_flags & IFA_F_PERMANENT)
757                                 continue;
758
759                         /* We try to batch several events at once. */
760                         age = (now - ifa->ifa_tstamp +
761                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
762
763                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
764                             age >= ifa->ifa_valid_lft) {
765                                 struct in_ifaddr __rcu **ifap;
766                                 struct in_ifaddr *tmp;
767
768                                 ifap = &ifa->ifa_dev->ifa_list;
769                                 tmp = rtnl_dereference(*ifap);
770                                 while (tmp) {
771                                         if (tmp == ifa) {
772                                                 inet_del_ifa(ifa->ifa_dev,
773                                                              ifap, 1);
774                                                 break;
775                                         }
776                                         ifap = &tmp->ifa_next;
777                                         tmp = rtnl_dereference(*ifap);
778                                 }
779                         } else if (ifa->ifa_preferred_lft !=
780                                    INFINITY_LIFE_TIME &&
781                                    age >= ifa->ifa_preferred_lft &&
782                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
783                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
784                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
785                         }
786                 }
787                 rtnl_unlock();
788         }
789
790         next_sec = round_jiffies_up(next);
791         next_sched = next;
792
793         /* If rounded timeout is accurate enough, accept it. */
794         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
795                 next_sched = next_sec;
796
797         now = jiffies;
798         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
799         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
800                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
801
802         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
803                         next_sched - now);
804 }
805
806 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
807                              __u32 prefered_lft)
808 {
809         unsigned long timeout;
810         u32 flags;
811
812         flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
813
814         timeout = addrconf_timeout_fixup(valid_lft, HZ);
815         if (addrconf_finite_timeout(timeout))
816                 WRITE_ONCE(ifa->ifa_valid_lft, timeout);
817         else
818                 flags |= IFA_F_PERMANENT;
819
820         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
821         if (addrconf_finite_timeout(timeout)) {
822                 if (timeout == 0)
823                         flags |= IFA_F_DEPRECATED;
824                 WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
825         }
826         WRITE_ONCE(ifa->ifa_flags, flags);
827         WRITE_ONCE(ifa->ifa_tstamp, jiffies);
828         if (!ifa->ifa_cstamp)
829                 WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
830 }
831
832 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
833                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
834                                        struct netlink_ext_ack *extack)
835 {
836         struct nlattr *tb[IFA_MAX+1];
837         struct in_ifaddr *ifa;
838         struct ifaddrmsg *ifm;
839         struct net_device *dev;
840         struct in_device *in_dev;
841         int err;
842
843         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
844                                      ifa_ipv4_policy, extack);
845         if (err < 0)
846                 goto errout;
847
848         ifm = nlmsg_data(nlh);
849         err = -EINVAL;
850
851         if (ifm->ifa_prefixlen > 32) {
852                 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
853                 goto errout;
854         }
855
856         if (!tb[IFA_LOCAL]) {
857                 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
858                 goto errout;
859         }
860
861         dev = __dev_get_by_index(net, ifm->ifa_index);
862         err = -ENODEV;
863         if (!dev) {
864                 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
865                 goto errout;
866         }
867
868         in_dev = __in_dev_get_rtnl(dev);
869         err = -ENOBUFS;
870         if (!in_dev)
871                 goto errout;
872
873         ifa = inet_alloc_ifa();
874         if (!ifa)
875                 /*
876                  * A potential indev allocation can be left alive, it stays
877                  * assigned to its device and is destroy with it.
878                  */
879                 goto errout;
880
881         ipv4_devconf_setall(in_dev);
882         neigh_parms_data_state_setall(in_dev->arp_parms);
883         in_dev_hold(in_dev);
884
885         if (!tb[IFA_ADDRESS])
886                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
887
888         INIT_HLIST_NODE(&ifa->hash);
889         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
890         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
891         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
892                                          ifm->ifa_flags;
893         ifa->ifa_scope = ifm->ifa_scope;
894         ifa->ifa_dev = in_dev;
895
896         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
897         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
898
899         if (tb[IFA_BROADCAST])
900                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
901
902         if (tb[IFA_LABEL])
903                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
904         else
905                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
906
907         if (tb[IFA_RT_PRIORITY])
908                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
909
910         if (tb[IFA_PROTO])
911                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
912
913         if (tb[IFA_CACHEINFO]) {
914                 struct ifa_cacheinfo *ci;
915
916                 ci = nla_data(tb[IFA_CACHEINFO]);
917                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
918                         NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
919                         err = -EINVAL;
920                         goto errout_free;
921                 }
922                 *pvalid_lft = ci->ifa_valid;
923                 *pprefered_lft = ci->ifa_prefered;
924         }
925
926         return ifa;
927
928 errout_free:
929         inet_free_ifa(ifa);
930 errout:
931         return ERR_PTR(err);
932 }
933
934 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
935 {
936         struct in_device *in_dev = ifa->ifa_dev;
937         struct in_ifaddr *ifa1;
938
939         if (!ifa->ifa_local)
940                 return NULL;
941
942         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
943                 if (ifa1->ifa_mask == ifa->ifa_mask &&
944                     inet_ifa_match(ifa1->ifa_address, ifa) &&
945                     ifa1->ifa_local == ifa->ifa_local)
946                         return ifa1;
947         }
948         return NULL;
949 }
950
951 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
952                             struct netlink_ext_ack *extack)
953 {
954         struct net *net = sock_net(skb->sk);
955         struct in_ifaddr *ifa;
956         struct in_ifaddr *ifa_existing;
957         __u32 valid_lft = INFINITY_LIFE_TIME;
958         __u32 prefered_lft = INFINITY_LIFE_TIME;
959
960         ASSERT_RTNL();
961
962         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
963         if (IS_ERR(ifa))
964                 return PTR_ERR(ifa);
965
966         ifa_existing = find_matching_ifa(ifa);
967         if (!ifa_existing) {
968                 /* It would be best to check for !NLM_F_CREATE here but
969                  * userspace already relies on not having to provide this.
970                  */
971                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
972                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
973                         int ret = ip_mc_autojoin_config(net, true, ifa);
974
975                         if (ret < 0) {
976                                 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
977                                 inet_free_ifa(ifa);
978                                 return ret;
979                         }
980                 }
981                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
982                                          extack);
983         } else {
984                 u32 new_metric = ifa->ifa_rt_priority;
985                 u8 new_proto = ifa->ifa_proto;
986
987                 inet_free_ifa(ifa);
988
989                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
990                     !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
991                         NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
992                         return -EEXIST;
993                 }
994                 ifa = ifa_existing;
995
996                 if (ifa->ifa_rt_priority != new_metric) {
997                         fib_modify_prefix_metric(ifa, new_metric);
998                         ifa->ifa_rt_priority = new_metric;
999                 }
1000
1001                 ifa->ifa_proto = new_proto;
1002
1003                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1004                 cancel_delayed_work(&check_lifetime_work);
1005                 queue_delayed_work(system_power_efficient_wq,
1006                                 &check_lifetime_work, 0);
1007                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1008         }
1009         return 0;
1010 }
1011
1012 /*
1013  *      Determine a default network mask, based on the IP address.
1014  */
1015
1016 static int inet_abc_len(__be32 addr)
1017 {
1018         int rc = -1;    /* Something else, probably a multicast. */
1019
1020         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1021                 rc = 0;
1022         else {
1023                 __u32 haddr = ntohl(addr);
1024                 if (IN_CLASSA(haddr))
1025                         rc = 8;
1026                 else if (IN_CLASSB(haddr))
1027                         rc = 16;
1028                 else if (IN_CLASSC(haddr))
1029                         rc = 24;
1030                 else if (IN_CLASSE(haddr))
1031                         rc = 32;
1032         }
1033
1034         return rc;
1035 }
1036
1037
1038 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1039 {
1040         struct sockaddr_in sin_orig;
1041         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1042         struct in_ifaddr __rcu **ifap = NULL;
1043         struct in_device *in_dev;
1044         struct in_ifaddr *ifa = NULL;
1045         struct net_device *dev;
1046         char *colon;
1047         int ret = -EFAULT;
1048         int tryaddrmatch = 0;
1049
1050         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1051
1052         /* save original address for comparison */
1053         memcpy(&sin_orig, sin, sizeof(*sin));
1054
1055         colon = strchr(ifr->ifr_name, ':');
1056         if (colon)
1057                 *colon = 0;
1058
1059         dev_load(net, ifr->ifr_name);
1060
1061         switch (cmd) {
1062         case SIOCGIFADDR:       /* Get interface address */
1063         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1064         case SIOCGIFDSTADDR:    /* Get the destination address */
1065         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1066                 /* Note that these ioctls will not sleep,
1067                    so that we do not impose a lock.
1068                    One day we will be forced to put shlock here (I mean SMP)
1069                  */
1070                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1071                 memset(sin, 0, sizeof(*sin));
1072                 sin->sin_family = AF_INET;
1073                 break;
1074
1075         case SIOCSIFFLAGS:
1076                 ret = -EPERM;
1077                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1078                         goto out;
1079                 break;
1080         case SIOCSIFADDR:       /* Set interface address (and family) */
1081         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1082         case SIOCSIFDSTADDR:    /* Set the destination address */
1083         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1084                 ret = -EPERM;
1085                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1086                         goto out;
1087                 ret = -EINVAL;
1088                 if (sin->sin_family != AF_INET)
1089                         goto out;
1090                 break;
1091         default:
1092                 ret = -EINVAL;
1093                 goto out;
1094         }
1095
1096         rtnl_lock();
1097
1098         ret = -ENODEV;
1099         dev = __dev_get_by_name(net, ifr->ifr_name);
1100         if (!dev)
1101                 goto done;
1102
1103         if (colon)
1104                 *colon = ':';
1105
1106         in_dev = __in_dev_get_rtnl(dev);
1107         if (in_dev) {
1108                 if (tryaddrmatch) {
1109                         /* Matthias Andree */
1110                         /* compare label and address (4.4BSD style) */
1111                         /* note: we only do this for a limited set of ioctls
1112                            and only if the original address family was AF_INET.
1113                            This is checked above. */
1114
1115                         for (ifap = &in_dev->ifa_list;
1116                              (ifa = rtnl_dereference(*ifap)) != NULL;
1117                              ifap = &ifa->ifa_next) {
1118                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1119                                     sin_orig.sin_addr.s_addr ==
1120                                                         ifa->ifa_local) {
1121                                         break; /* found */
1122                                 }
1123                         }
1124                 }
1125                 /* we didn't get a match, maybe the application is
1126                    4.3BSD-style and passed in junk so we fall back to
1127                    comparing just the label */
1128                 if (!ifa) {
1129                         for (ifap = &in_dev->ifa_list;
1130                              (ifa = rtnl_dereference(*ifap)) != NULL;
1131                              ifap = &ifa->ifa_next)
1132                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1133                                         break;
1134                 }
1135         }
1136
1137         ret = -EADDRNOTAVAIL;
1138         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1139                 goto done;
1140
1141         switch (cmd) {
1142         case SIOCGIFADDR:       /* Get interface address */
1143                 ret = 0;
1144                 sin->sin_addr.s_addr = ifa->ifa_local;
1145                 break;
1146
1147         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1148                 ret = 0;
1149                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1150                 break;
1151
1152         case SIOCGIFDSTADDR:    /* Get the destination address */
1153                 ret = 0;
1154                 sin->sin_addr.s_addr = ifa->ifa_address;
1155                 break;
1156
1157         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1158                 ret = 0;
1159                 sin->sin_addr.s_addr = ifa->ifa_mask;
1160                 break;
1161
1162         case SIOCSIFFLAGS:
1163                 if (colon) {
1164                         ret = -EADDRNOTAVAIL;
1165                         if (!ifa)
1166                                 break;
1167                         ret = 0;
1168                         if (!(ifr->ifr_flags & IFF_UP))
1169                                 inet_del_ifa(in_dev, ifap, 1);
1170                         break;
1171                 }
1172                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1173                 break;
1174
1175         case SIOCSIFADDR:       /* Set interface address (and family) */
1176                 ret = -EINVAL;
1177                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1178                         break;
1179
1180                 if (!ifa) {
1181                         ret = -ENOBUFS;
1182                         ifa = inet_alloc_ifa();
1183                         if (!ifa)
1184                                 break;
1185                         INIT_HLIST_NODE(&ifa->hash);
1186                         if (colon)
1187                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1188                         else
1189                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1190                 } else {
1191                         ret = 0;
1192                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1193                                 break;
1194                         inet_del_ifa(in_dev, ifap, 0);
1195                         ifa->ifa_broadcast = 0;
1196                         ifa->ifa_scope = 0;
1197                 }
1198
1199                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1200
1201                 if (!(dev->flags & IFF_POINTOPOINT)) {
1202                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1203                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1204                         if ((dev->flags & IFF_BROADCAST) &&
1205                             ifa->ifa_prefixlen < 31)
1206                                 ifa->ifa_broadcast = ifa->ifa_address |
1207                                                      ~ifa->ifa_mask;
1208                 } else {
1209                         ifa->ifa_prefixlen = 32;
1210                         ifa->ifa_mask = inet_make_mask(32);
1211                 }
1212                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1213                 ret = inet_set_ifa(dev, ifa);
1214                 break;
1215
1216         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1217                 ret = 0;
1218                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1219                         inet_del_ifa(in_dev, ifap, 0);
1220                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1221                         inet_insert_ifa(ifa);
1222                 }
1223                 break;
1224
1225         case SIOCSIFDSTADDR:    /* Set the destination address */
1226                 ret = 0;
1227                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1228                         break;
1229                 ret = -EINVAL;
1230                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1231                         break;
1232                 ret = 0;
1233                 inet_del_ifa(in_dev, ifap, 0);
1234                 ifa->ifa_address = sin->sin_addr.s_addr;
1235                 inet_insert_ifa(ifa);
1236                 break;
1237
1238         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1239
1240                 /*
1241                  *      The mask we set must be legal.
1242                  */
1243                 ret = -EINVAL;
1244                 if (bad_mask(sin->sin_addr.s_addr, 0))
1245                         break;
1246                 ret = 0;
1247                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1248                         __be32 old_mask = ifa->ifa_mask;
1249                         inet_del_ifa(in_dev, ifap, 0);
1250                         ifa->ifa_mask = sin->sin_addr.s_addr;
1251                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1252
1253                         /* See if current broadcast address matches
1254                          * with current netmask, then recalculate
1255                          * the broadcast address. Otherwise it's a
1256                          * funny address, so don't touch it since
1257                          * the user seems to know what (s)he's doing...
1258                          */
1259                         if ((dev->flags & IFF_BROADCAST) &&
1260                             (ifa->ifa_prefixlen < 31) &&
1261                             (ifa->ifa_broadcast ==
1262                              (ifa->ifa_local|~old_mask))) {
1263                                 ifa->ifa_broadcast = (ifa->ifa_local |
1264                                                       ~sin->sin_addr.s_addr);
1265                         }
1266                         inet_insert_ifa(ifa);
1267                 }
1268                 break;
1269         }
1270 done:
1271         rtnl_unlock();
1272 out:
1273         return ret;
1274 }
1275
1276 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1277 {
1278         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1279         const struct in_ifaddr *ifa;
1280         struct ifreq ifr;
1281         int done = 0;
1282
1283         if (WARN_ON(size > sizeof(struct ifreq)))
1284                 goto out;
1285
1286         if (!in_dev)
1287                 goto out;
1288
1289         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1290                 if (!buf) {
1291                         done += size;
1292                         continue;
1293                 }
1294                 if (len < size)
1295                         break;
1296                 memset(&ifr, 0, sizeof(struct ifreq));
1297                 strcpy(ifr.ifr_name, ifa->ifa_label);
1298
1299                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1300                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1301                                                                 ifa->ifa_local;
1302
1303                 if (copy_to_user(buf + done, &ifr, size)) {
1304                         done = -EFAULT;
1305                         break;
1306                 }
1307                 len  -= size;
1308                 done += size;
1309         }
1310 out:
1311         return done;
1312 }
1313
1314 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1315                                  int scope)
1316 {
1317         const struct in_ifaddr *ifa;
1318
1319         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320                 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1321                         continue;
1322                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1323                     ifa->ifa_scope <= scope)
1324                         return ifa->ifa_local;
1325         }
1326
1327         return 0;
1328 }
1329
1330 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1331 {
1332         const struct in_ifaddr *ifa;
1333         __be32 addr = 0;
1334         unsigned char localnet_scope = RT_SCOPE_HOST;
1335         struct in_device *in_dev;
1336         struct net *net = dev_net(dev);
1337         int master_idx;
1338
1339         rcu_read_lock();
1340         in_dev = __in_dev_get_rcu(dev);
1341         if (!in_dev)
1342                 goto no_in_dev;
1343
1344         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1345                 localnet_scope = RT_SCOPE_LINK;
1346
1347         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1348                 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1349                         continue;
1350                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1351                         continue;
1352                 if (!dst || inet_ifa_match(dst, ifa)) {
1353                         addr = ifa->ifa_local;
1354                         break;
1355                 }
1356                 if (!addr)
1357                         addr = ifa->ifa_local;
1358         }
1359
1360         if (addr)
1361                 goto out_unlock;
1362 no_in_dev:
1363         master_idx = l3mdev_master_ifindex_rcu(dev);
1364
1365         /* For VRFs, the VRF device takes the place of the loopback device,
1366          * with addresses on it being preferred.  Note in such cases the
1367          * loopback device will be among the devices that fail the master_idx
1368          * equality check in the loop below.
1369          */
1370         if (master_idx &&
1371             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1372             (in_dev = __in_dev_get_rcu(dev))) {
1373                 addr = in_dev_select_addr(in_dev, scope);
1374                 if (addr)
1375                         goto out_unlock;
1376         }
1377
1378         /* Not loopback addresses on loopback should be preferred
1379            in this case. It is important that lo is the first interface
1380            in dev_base list.
1381          */
1382         for_each_netdev_rcu(net, dev) {
1383                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1384                         continue;
1385
1386                 in_dev = __in_dev_get_rcu(dev);
1387                 if (!in_dev)
1388                         continue;
1389
1390                 addr = in_dev_select_addr(in_dev, scope);
1391                 if (addr)
1392                         goto out_unlock;
1393         }
1394 out_unlock:
1395         rcu_read_unlock();
1396         return addr;
1397 }
1398 EXPORT_SYMBOL(inet_select_addr);
1399
1400 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1401                               __be32 local, int scope)
1402 {
1403         unsigned char localnet_scope = RT_SCOPE_HOST;
1404         const struct in_ifaddr *ifa;
1405         __be32 addr = 0;
1406         int same = 0;
1407
1408         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1409                 localnet_scope = RT_SCOPE_LINK;
1410
1411         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1412                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1413
1414                 if (!addr &&
1415                     (local == ifa->ifa_local || !local) &&
1416                     min_scope <= scope) {
1417                         addr = ifa->ifa_local;
1418                         if (same)
1419                                 break;
1420                 }
1421                 if (!same) {
1422                         same = (!local || inet_ifa_match(local, ifa)) &&
1423                                 (!dst || inet_ifa_match(dst, ifa));
1424                         if (same && addr) {
1425                                 if (local || !dst)
1426                                         break;
1427                                 /* Is the selected addr into dst subnet? */
1428                                 if (inet_ifa_match(addr, ifa))
1429                                         break;
1430                                 /* No, then can we use new local src? */
1431                                 if (min_scope <= scope) {
1432                                         addr = ifa->ifa_local;
1433                                         break;
1434                                 }
1435                                 /* search for large dst subnet for addr */
1436                                 same = 0;
1437                         }
1438                 }
1439         }
1440
1441         return same ? addr : 0;
1442 }
1443
1444 /*
1445  * Confirm that local IP address exists using wildcards:
1446  * - net: netns to check, cannot be NULL
1447  * - in_dev: only on this interface, NULL=any interface
1448  * - dst: only in the same subnet as dst, 0=any dst
1449  * - local: address, 0=autoselect the local address
1450  * - scope: maximum allowed scope value for the local address
1451  */
1452 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1453                          __be32 dst, __be32 local, int scope)
1454 {
1455         __be32 addr = 0;
1456         struct net_device *dev;
1457
1458         if (in_dev)
1459                 return confirm_addr_indev(in_dev, dst, local, scope);
1460
1461         rcu_read_lock();
1462         for_each_netdev_rcu(net, dev) {
1463                 in_dev = __in_dev_get_rcu(dev);
1464                 if (in_dev) {
1465                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1466                         if (addr)
1467                                 break;
1468                 }
1469         }
1470         rcu_read_unlock();
1471
1472         return addr;
1473 }
1474 EXPORT_SYMBOL(inet_confirm_addr);
1475
1476 /*
1477  *      Device notifier
1478  */
1479
1480 int register_inetaddr_notifier(struct notifier_block *nb)
1481 {
1482         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1483 }
1484 EXPORT_SYMBOL(register_inetaddr_notifier);
1485
1486 int unregister_inetaddr_notifier(struct notifier_block *nb)
1487 {
1488         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1489 }
1490 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1491
1492 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1493 {
1494         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1495 }
1496 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1497
1498 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1499 {
1500         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1501             nb);
1502 }
1503 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1504
1505 /* Rename ifa_labels for a device name change. Make some effort to preserve
1506  * existing alias numbering and to create unique labels if possible.
1507 */
1508 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1509 {
1510         struct in_ifaddr *ifa;
1511         int named = 0;
1512
1513         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1514                 char old[IFNAMSIZ], *dot;
1515
1516                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1517                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1518                 if (named++ == 0)
1519                         goto skip;
1520                 dot = strchr(old, ':');
1521                 if (!dot) {
1522                         sprintf(old, ":%d", named);
1523                         dot = old;
1524                 }
1525                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1526                         strcat(ifa->ifa_label, dot);
1527                 else
1528                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1529 skip:
1530                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1531         }
1532 }
1533
1534 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1535                                         struct in_device *in_dev)
1536
1537 {
1538         const struct in_ifaddr *ifa;
1539
1540         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1541                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1542                          ifa->ifa_local, dev,
1543                          ifa->ifa_local, NULL,
1544                          dev->dev_addr, NULL);
1545         }
1546 }
1547
1548 /* Called only under RTNL semaphore */
1549
1550 static int inetdev_event(struct notifier_block *this, unsigned long event,
1551                          void *ptr)
1552 {
1553         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1554         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1555
1556         ASSERT_RTNL();
1557
1558         if (!in_dev) {
1559                 if (event == NETDEV_REGISTER) {
1560                         in_dev = inetdev_init(dev);
1561                         if (IS_ERR(in_dev))
1562                                 return notifier_from_errno(PTR_ERR(in_dev));
1563                         if (dev->flags & IFF_LOOPBACK) {
1564                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1565                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1566                         }
1567                 } else if (event == NETDEV_CHANGEMTU) {
1568                         /* Re-enabling IP */
1569                         if (inetdev_valid_mtu(dev->mtu))
1570                                 in_dev = inetdev_init(dev);
1571                 }
1572                 goto out;
1573         }
1574
1575         switch (event) {
1576         case NETDEV_REGISTER:
1577                 pr_debug("%s: bug\n", __func__);
1578                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1579                 break;
1580         case NETDEV_UP:
1581                 if (!inetdev_valid_mtu(dev->mtu))
1582                         break;
1583                 if (dev->flags & IFF_LOOPBACK) {
1584                         struct in_ifaddr *ifa = inet_alloc_ifa();
1585
1586                         if (ifa) {
1587                                 INIT_HLIST_NODE(&ifa->hash);
1588                                 ifa->ifa_local =
1589                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1590                                 ifa->ifa_prefixlen = 8;
1591                                 ifa->ifa_mask = inet_make_mask(8);
1592                                 in_dev_hold(in_dev);
1593                                 ifa->ifa_dev = in_dev;
1594                                 ifa->ifa_scope = RT_SCOPE_HOST;
1595                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1596                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1597                                                  INFINITY_LIFE_TIME);
1598                                 ipv4_devconf_setall(in_dev);
1599                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1600                                 inet_insert_ifa(ifa);
1601                         }
1602                 }
1603                 ip_mc_up(in_dev);
1604                 fallthrough;
1605         case NETDEV_CHANGEADDR:
1606                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1607                         break;
1608                 fallthrough;
1609         case NETDEV_NOTIFY_PEERS:
1610                 /* Send gratuitous ARP to notify of link change */
1611                 inetdev_send_gratuitous_arp(dev, in_dev);
1612                 break;
1613         case NETDEV_DOWN:
1614                 ip_mc_down(in_dev);
1615                 break;
1616         case NETDEV_PRE_TYPE_CHANGE:
1617                 ip_mc_unmap(in_dev);
1618                 break;
1619         case NETDEV_POST_TYPE_CHANGE:
1620                 ip_mc_remap(in_dev);
1621                 break;
1622         case NETDEV_CHANGEMTU:
1623                 if (inetdev_valid_mtu(dev->mtu))
1624                         break;
1625                 /* disable IP when MTU is not enough */
1626                 fallthrough;
1627         case NETDEV_UNREGISTER:
1628                 inetdev_destroy(in_dev);
1629                 break;
1630         case NETDEV_CHANGENAME:
1631                 /* Do not notify about label change, this event is
1632                  * not interesting to applications using netlink.
1633                  */
1634                 inetdev_changename(dev, in_dev);
1635
1636                 devinet_sysctl_unregister(in_dev);
1637                 devinet_sysctl_register(in_dev);
1638                 break;
1639         }
1640 out:
1641         return NOTIFY_DONE;
1642 }
1643
1644 static struct notifier_block ip_netdev_notifier = {
1645         .notifier_call = inetdev_event,
1646 };
1647
1648 static size_t inet_nlmsg_size(void)
1649 {
1650         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1651                + nla_total_size(4) /* IFA_ADDRESS */
1652                + nla_total_size(4) /* IFA_LOCAL */
1653                + nla_total_size(4) /* IFA_BROADCAST */
1654                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1655                + nla_total_size(4)  /* IFA_FLAGS */
1656                + nla_total_size(1)  /* IFA_PROTO */
1657                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1658                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1659 }
1660
1661 static inline u32 cstamp_delta(unsigned long cstamp)
1662 {
1663         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1664 }
1665
1666 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1667                          unsigned long tstamp, u32 preferred, u32 valid)
1668 {
1669         struct ifa_cacheinfo ci;
1670
1671         ci.cstamp = cstamp_delta(cstamp);
1672         ci.tstamp = cstamp_delta(tstamp);
1673         ci.ifa_prefered = preferred;
1674         ci.ifa_valid = valid;
1675
1676         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1677 }
1678
1679 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1680                             struct inet_fill_args *args)
1681 {
1682         struct ifaddrmsg *ifm;
1683         struct nlmsghdr  *nlh;
1684         unsigned long tstamp;
1685         u32 preferred, valid;
1686
1687         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1688                         args->flags);
1689         if (!nlh)
1690                 return -EMSGSIZE;
1691
1692         ifm = nlmsg_data(nlh);
1693         ifm->ifa_family = AF_INET;
1694         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1695         ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
1696         ifm->ifa_scope = ifa->ifa_scope;
1697         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1698
1699         if (args->netnsid >= 0 &&
1700             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1701                 goto nla_put_failure;
1702
1703         tstamp = READ_ONCE(ifa->ifa_tstamp);
1704         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1705                 preferred = READ_ONCE(ifa->ifa_preferred_lft);
1706                 valid = READ_ONCE(ifa->ifa_valid_lft);
1707                 if (preferred != INFINITY_LIFE_TIME) {
1708                         long tval = (jiffies - tstamp) / HZ;
1709
1710                         if (preferred > tval)
1711                                 preferred -= tval;
1712                         else
1713                                 preferred = 0;
1714                         if (valid != INFINITY_LIFE_TIME) {
1715                                 if (valid > tval)
1716                                         valid -= tval;
1717                                 else
1718                                         valid = 0;
1719                         }
1720                 }
1721         } else {
1722                 preferred = INFINITY_LIFE_TIME;
1723                 valid = INFINITY_LIFE_TIME;
1724         }
1725         if ((ifa->ifa_address &&
1726              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1727             (ifa->ifa_local &&
1728              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1729             (ifa->ifa_broadcast &&
1730              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1731             (ifa->ifa_label[0] &&
1732              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1733             (ifa->ifa_proto &&
1734              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1735             nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
1736             (ifa->ifa_rt_priority &&
1737              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1738             put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1739                           preferred, valid))
1740                 goto nla_put_failure;
1741
1742         nlmsg_end(skb, nlh);
1743         return 0;
1744
1745 nla_put_failure:
1746         nlmsg_cancel(skb, nlh);
1747         return -EMSGSIZE;
1748 }
1749
1750 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1751                                       struct inet_fill_args *fillargs,
1752                                       struct net **tgt_net, struct sock *sk,
1753                                       struct netlink_callback *cb)
1754 {
1755         struct netlink_ext_ack *extack = cb->extack;
1756         struct nlattr *tb[IFA_MAX+1];
1757         struct ifaddrmsg *ifm;
1758         int err, i;
1759
1760         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1761                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1762                 return -EINVAL;
1763         }
1764
1765         ifm = nlmsg_data(nlh);
1766         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1767                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1768                 return -EINVAL;
1769         }
1770
1771         fillargs->ifindex = ifm->ifa_index;
1772         if (fillargs->ifindex) {
1773                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1774                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1775         }
1776
1777         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1778                                             ifa_ipv4_policy, extack);
1779         if (err < 0)
1780                 return err;
1781
1782         for (i = 0; i <= IFA_MAX; ++i) {
1783                 if (!tb[i])
1784                         continue;
1785
1786                 if (i == IFA_TARGET_NETNSID) {
1787                         struct net *net;
1788
1789                         fillargs->netnsid = nla_get_s32(tb[i]);
1790
1791                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1792                         if (IS_ERR(net)) {
1793                                 fillargs->netnsid = -1;
1794                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1795                                 return PTR_ERR(net);
1796                         }
1797                         *tgt_net = net;
1798                 } else {
1799                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1800                         return -EINVAL;
1801                 }
1802         }
1803
1804         return 0;
1805 }
1806
1807 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1808                             struct netlink_callback *cb, int *s_ip_idx,
1809                             struct inet_fill_args *fillargs)
1810 {
1811         struct in_ifaddr *ifa;
1812         int ip_idx = 0;
1813         int err;
1814
1815         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1816                 if (ip_idx < *s_ip_idx) {
1817                         ip_idx++;
1818                         continue;
1819                 }
1820                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1821                 if (err < 0)
1822                         goto done;
1823
1824                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1825                 ip_idx++;
1826         }
1827         err = 0;
1828         ip_idx = 0;
1829 done:
1830         *s_ip_idx = ip_idx;
1831
1832         return err;
1833 }
1834
1835 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1836  */
1837 static u32 inet_base_seq(const struct net *net)
1838 {
1839         u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1840                   READ_ONCE(net->dev_base_seq);
1841
1842         /* Must not return 0 (see nl_dump_check_consistent()).
1843          * Chose a value far away from 0.
1844          */
1845         if (!res)
1846                 res = 0x80000000;
1847         return res;
1848 }
1849
1850 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1851 {
1852         const struct nlmsghdr *nlh = cb->nlh;
1853         struct inet_fill_args fillargs = {
1854                 .portid = NETLINK_CB(cb->skb).portid,
1855                 .seq = nlh->nlmsg_seq,
1856                 .event = RTM_NEWADDR,
1857                 .flags = NLM_F_MULTI,
1858                 .netnsid = -1,
1859         };
1860         struct net *net = sock_net(skb->sk);
1861         struct net *tgt_net = net;
1862         struct {
1863                 unsigned long ifindex;
1864                 int ip_idx;
1865         } *ctx = (void *)cb->ctx;
1866         struct in_device *in_dev;
1867         struct net_device *dev;
1868         int err = 0;
1869
1870         rcu_read_lock();
1871         if (cb->strict_check) {
1872                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1873                                                  skb->sk, cb);
1874                 if (err < 0)
1875                         goto done;
1876
1877                 if (fillargs.ifindex) {
1878                         err = -ENODEV;
1879                         dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1880                         if (!dev)
1881                                 goto done;
1882                         in_dev = __in_dev_get_rcu(dev);
1883                         if (!in_dev)
1884                                 goto done;
1885                         err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1886                                                &fillargs);
1887                         goto done;
1888                 }
1889         }
1890
1891         cb->seq = inet_base_seq(tgt_net);
1892
1893         for_each_netdev_dump(net, dev, ctx->ifindex) {
1894                 in_dev = __in_dev_get_rcu(dev);
1895                 if (!in_dev)
1896                         continue;
1897                 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1898                                        &fillargs);
1899                 if (err < 0)
1900                         goto done;
1901         }
1902 done:
1903         if (fillargs.netnsid >= 0)
1904                 put_net(tgt_net);
1905         rcu_read_unlock();
1906         return err;
1907 }
1908
1909 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1910                       u32 portid)
1911 {
1912         struct inet_fill_args fillargs = {
1913                 .portid = portid,
1914                 .seq = nlh ? nlh->nlmsg_seq : 0,
1915                 .event = event,
1916                 .flags = 0,
1917                 .netnsid = -1,
1918         };
1919         struct sk_buff *skb;
1920         int err = -ENOBUFS;
1921         struct net *net;
1922
1923         net = dev_net(ifa->ifa_dev->dev);
1924         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1925         if (!skb)
1926                 goto errout;
1927
1928         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1929         if (err < 0) {
1930                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1931                 WARN_ON(err == -EMSGSIZE);
1932                 kfree_skb(skb);
1933                 goto errout;
1934         }
1935         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1936         return;
1937 errout:
1938         if (err < 0)
1939                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1940 }
1941
1942 static size_t inet_get_link_af_size(const struct net_device *dev,
1943                                     u32 ext_filter_mask)
1944 {
1945         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1946
1947         if (!in_dev)
1948                 return 0;
1949
1950         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1951 }
1952
1953 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1954                              u32 ext_filter_mask)
1955 {
1956         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1957         struct nlattr *nla;
1958         int i;
1959
1960         if (!in_dev)
1961                 return -ENODATA;
1962
1963         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1964         if (!nla)
1965                 return -EMSGSIZE;
1966
1967         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1968                 ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1969
1970         return 0;
1971 }
1972
1973 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1974         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1975 };
1976
1977 static int inet_validate_link_af(const struct net_device *dev,
1978                                  const struct nlattr *nla,
1979                                  struct netlink_ext_ack *extack)
1980 {
1981         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1982         int err, rem;
1983
1984         if (dev && !__in_dev_get_rtnl(dev))
1985                 return -EAFNOSUPPORT;
1986
1987         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1988                                           inet_af_policy, extack);
1989         if (err < 0)
1990                 return err;
1991
1992         if (tb[IFLA_INET_CONF]) {
1993                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1994                         int cfgid = nla_type(a);
1995
1996                         if (nla_len(a) < 4)
1997                                 return -EINVAL;
1998
1999                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2000                                 return -EINVAL;
2001                 }
2002         }
2003
2004         return 0;
2005 }
2006
2007 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2008                             struct netlink_ext_ack *extack)
2009 {
2010         struct in_device *in_dev = __in_dev_get_rtnl(dev);
2011         struct nlattr *a, *tb[IFLA_INET_MAX+1];
2012         int rem;
2013
2014         if (!in_dev)
2015                 return -EAFNOSUPPORT;
2016
2017         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2018                 return -EINVAL;
2019
2020         if (tb[IFLA_INET_CONF]) {
2021                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2022                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2023         }
2024
2025         return 0;
2026 }
2027
2028 static int inet_netconf_msgsize_devconf(int type)
2029 {
2030         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2031                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2032         bool all = false;
2033
2034         if (type == NETCONFA_ALL)
2035                 all = true;
2036
2037         if (all || type == NETCONFA_FORWARDING)
2038                 size += nla_total_size(4);
2039         if (all || type == NETCONFA_RP_FILTER)
2040                 size += nla_total_size(4);
2041         if (all || type == NETCONFA_MC_FORWARDING)
2042                 size += nla_total_size(4);
2043         if (all || type == NETCONFA_BC_FORWARDING)
2044                 size += nla_total_size(4);
2045         if (all || type == NETCONFA_PROXY_NEIGH)
2046                 size += nla_total_size(4);
2047         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2048                 size += nla_total_size(4);
2049
2050         return size;
2051 }
2052
2053 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2054                                      const struct ipv4_devconf *devconf,
2055                                      u32 portid, u32 seq, int event,
2056                                      unsigned int flags, int type)
2057 {
2058         struct nlmsghdr  *nlh;
2059         struct netconfmsg *ncm;
2060         bool all = false;
2061
2062         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2063                         flags);
2064         if (!nlh)
2065                 return -EMSGSIZE;
2066
2067         if (type == NETCONFA_ALL)
2068                 all = true;
2069
2070         ncm = nlmsg_data(nlh);
2071         ncm->ncm_family = AF_INET;
2072
2073         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2074                 goto nla_put_failure;
2075
2076         if (!devconf)
2077                 goto out;
2078
2079         if ((all || type == NETCONFA_FORWARDING) &&
2080             nla_put_s32(skb, NETCONFA_FORWARDING,
2081                         IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2082                 goto nla_put_failure;
2083         if ((all || type == NETCONFA_RP_FILTER) &&
2084             nla_put_s32(skb, NETCONFA_RP_FILTER,
2085                         IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2086                 goto nla_put_failure;
2087         if ((all || type == NETCONFA_MC_FORWARDING) &&
2088             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2089                         IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2090                 goto nla_put_failure;
2091         if ((all || type == NETCONFA_BC_FORWARDING) &&
2092             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2093                         IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2094                 goto nla_put_failure;
2095         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2096             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2097                         IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2098                 goto nla_put_failure;
2099         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2100             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2101                         IPV4_DEVCONF_RO(*devconf,
2102                                         IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2103                 goto nla_put_failure;
2104
2105 out:
2106         nlmsg_end(skb, nlh);
2107         return 0;
2108
2109 nla_put_failure:
2110         nlmsg_cancel(skb, nlh);
2111         return -EMSGSIZE;
2112 }
2113
2114 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2115                                  int ifindex, struct ipv4_devconf *devconf)
2116 {
2117         struct sk_buff *skb;
2118         int err = -ENOBUFS;
2119
2120         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2121         if (!skb)
2122                 goto errout;
2123
2124         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2125                                         event, 0, type);
2126         if (err < 0) {
2127                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2128                 WARN_ON(err == -EMSGSIZE);
2129                 kfree_skb(skb);
2130                 goto errout;
2131         }
2132         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2133         return;
2134 errout:
2135         if (err < 0)
2136                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2137 }
2138
2139 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2140         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2141         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2142         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2143         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2144         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2145 };
2146
2147 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2148                                       const struct nlmsghdr *nlh,
2149                                       struct nlattr **tb,
2150                                       struct netlink_ext_ack *extack)
2151 {
2152         int i, err;
2153
2154         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2155                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2156                 return -EINVAL;
2157         }
2158
2159         if (!netlink_strict_get_check(skb))
2160                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2161                                               tb, NETCONFA_MAX,
2162                                               devconf_ipv4_policy, extack);
2163
2164         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2165                                             tb, NETCONFA_MAX,
2166                                             devconf_ipv4_policy, extack);
2167         if (err)
2168                 return err;
2169
2170         for (i = 0; i <= NETCONFA_MAX; i++) {
2171                 if (!tb[i])
2172                         continue;
2173
2174                 switch (i) {
2175                 case NETCONFA_IFINDEX:
2176                         break;
2177                 default:
2178                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2179                         return -EINVAL;
2180                 }
2181         }
2182
2183         return 0;
2184 }
2185
2186 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2187                                     struct nlmsghdr *nlh,
2188                                     struct netlink_ext_ack *extack)
2189 {
2190         struct net *net = sock_net(in_skb->sk);
2191         struct nlattr *tb[NETCONFA_MAX + 1];
2192         const struct ipv4_devconf *devconf;
2193         struct in_device *in_dev = NULL;
2194         struct net_device *dev = NULL;
2195         struct sk_buff *skb;
2196         int ifindex;
2197         int err;
2198
2199         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2200         if (err)
2201                 return err;
2202
2203         if (!tb[NETCONFA_IFINDEX])
2204                 return -EINVAL;
2205
2206         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2207         switch (ifindex) {
2208         case NETCONFA_IFINDEX_ALL:
2209                 devconf = net->ipv4.devconf_all;
2210                 break;
2211         case NETCONFA_IFINDEX_DEFAULT:
2212                 devconf = net->ipv4.devconf_dflt;
2213                 break;
2214         default:
2215                 err = -ENODEV;
2216                 dev = dev_get_by_index(net, ifindex);
2217                 if (dev)
2218                         in_dev = in_dev_get(dev);
2219                 if (!in_dev)
2220                         goto errout;
2221                 devconf = &in_dev->cnf;
2222                 break;
2223         }
2224
2225         err = -ENOBUFS;
2226         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2227         if (!skb)
2228                 goto errout;
2229
2230         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2231                                         NETLINK_CB(in_skb).portid,
2232                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2233                                         NETCONFA_ALL);
2234         if (err < 0) {
2235                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2236                 WARN_ON(err == -EMSGSIZE);
2237                 kfree_skb(skb);
2238                 goto errout;
2239         }
2240         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2241 errout:
2242         if (in_dev)
2243                 in_dev_put(in_dev);
2244         dev_put(dev);
2245         return err;
2246 }
2247
2248 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2249                                      struct netlink_callback *cb)
2250 {
2251         const struct nlmsghdr *nlh = cb->nlh;
2252         struct net *net = sock_net(skb->sk);
2253         struct {
2254                 unsigned long ifindex;
2255                 unsigned int all_default;
2256         } *ctx = (void *)cb->ctx;
2257         const struct in_device *in_dev;
2258         struct net_device *dev;
2259         int err = 0;
2260
2261         if (cb->strict_check) {
2262                 struct netlink_ext_ack *extack = cb->extack;
2263                 struct netconfmsg *ncm;
2264
2265                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2266                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2267                         return -EINVAL;
2268                 }
2269
2270                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2271                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2272                         return -EINVAL;
2273                 }
2274         }
2275
2276         rcu_read_lock();
2277         for_each_netdev_dump(net, dev, ctx->ifindex) {
2278                 in_dev = __in_dev_get_rcu(dev);
2279                 if (!in_dev)
2280                         continue;
2281                 err = inet_netconf_fill_devconf(skb, dev->ifindex,
2282                                                 &in_dev->cnf,
2283                                                 NETLINK_CB(cb->skb).portid,
2284                                                 nlh->nlmsg_seq,
2285                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2286                                                 NETCONFA_ALL);
2287                 if (err < 0)
2288                         goto done;
2289         }
2290         if (ctx->all_default == 0) {
2291                 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2292                                                 net->ipv4.devconf_all,
2293                                                 NETLINK_CB(cb->skb).portid,
2294                                                 nlh->nlmsg_seq,
2295                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2296                                                 NETCONFA_ALL);
2297                 if (err < 0)
2298                         goto done;
2299                 ctx->all_default++;
2300         }
2301         if (ctx->all_default == 1) {
2302                 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2303                                                 net->ipv4.devconf_dflt,
2304                                                 NETLINK_CB(cb->skb).portid,
2305                                                 nlh->nlmsg_seq,
2306                                                 RTM_NEWNETCONF, NLM_F_MULTI,
2307                                                 NETCONFA_ALL);
2308                 if (err < 0)
2309                         goto done;
2310                 ctx->all_default++;
2311         }
2312 done:
2313         rcu_read_unlock();
2314         return err;
2315 }
2316
2317 #ifdef CONFIG_SYSCTL
2318
2319 static void devinet_copy_dflt_conf(struct net *net, int i)
2320 {
2321         struct net_device *dev;
2322
2323         rcu_read_lock();
2324         for_each_netdev_rcu(net, dev) {
2325                 struct in_device *in_dev;
2326
2327                 in_dev = __in_dev_get_rcu(dev);
2328                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2329                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2330         }
2331         rcu_read_unlock();
2332 }
2333
2334 /* called with RTNL locked */
2335 static void inet_forward_change(struct net *net)
2336 {
2337         struct net_device *dev;
2338         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2339
2340         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2341         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2342         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2343                                     NETCONFA_FORWARDING,
2344                                     NETCONFA_IFINDEX_ALL,
2345                                     net->ipv4.devconf_all);
2346         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2347                                     NETCONFA_FORWARDING,
2348                                     NETCONFA_IFINDEX_DEFAULT,
2349                                     net->ipv4.devconf_dflt);
2350
2351         for_each_netdev(net, dev) {
2352                 struct in_device *in_dev;
2353
2354                 if (on)
2355                         dev_disable_lro(dev);
2356
2357                 in_dev = __in_dev_get_rtnl(dev);
2358                 if (in_dev) {
2359                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2360                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2361                                                     NETCONFA_FORWARDING,
2362                                                     dev->ifindex, &in_dev->cnf);
2363                 }
2364         }
2365 }
2366
2367 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2368 {
2369         if (cnf == net->ipv4.devconf_dflt)
2370                 return NETCONFA_IFINDEX_DEFAULT;
2371         else if (cnf == net->ipv4.devconf_all)
2372                 return NETCONFA_IFINDEX_ALL;
2373         else {
2374                 struct in_device *idev
2375                         = container_of(cnf, struct in_device, cnf);
2376                 return idev->dev->ifindex;
2377         }
2378 }
2379
2380 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2381                              void *buffer, size_t *lenp, loff_t *ppos)
2382 {
2383         int old_value = *(int *)ctl->data;
2384         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2385         int new_value = *(int *)ctl->data;
2386
2387         if (write) {
2388                 struct ipv4_devconf *cnf = ctl->extra1;
2389                 struct net *net = ctl->extra2;
2390                 int i = (int *)ctl->data - cnf->data;
2391                 int ifindex;
2392
2393                 set_bit(i, cnf->state);
2394
2395                 if (cnf == net->ipv4.devconf_dflt)
2396                         devinet_copy_dflt_conf(net, i);
2397                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2398                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2399                         if ((new_value == 0) && (old_value != 0))
2400                                 rt_cache_flush(net);
2401
2402                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2403                     new_value != old_value)
2404                         rt_cache_flush(net);
2405
2406                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2407                     new_value != old_value) {
2408                         ifindex = devinet_conf_ifindex(net, cnf);
2409                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2410                                                     NETCONFA_RP_FILTER,
2411                                                     ifindex, cnf);
2412                 }
2413                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2414                     new_value != old_value) {
2415                         ifindex = devinet_conf_ifindex(net, cnf);
2416                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2417                                                     NETCONFA_PROXY_NEIGH,
2418                                                     ifindex, cnf);
2419                 }
2420                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2421                     new_value != old_value) {
2422                         ifindex = devinet_conf_ifindex(net, cnf);
2423                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2424                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2425                                                     ifindex, cnf);
2426                 }
2427         }
2428
2429         return ret;
2430 }
2431
2432 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2433                                   void *buffer, size_t *lenp, loff_t *ppos)
2434 {
2435         int *valp = ctl->data;
2436         int val = *valp;
2437         loff_t pos = *ppos;
2438         struct net *net = ctl->extra2;
2439         int ret;
2440
2441         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2442                 return -EPERM;
2443
2444         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2445
2446         if (write && *valp != val) {
2447                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2448                         if (!rtnl_trylock()) {
2449                                 /* Restore the original values before restarting */
2450                                 *valp = val;
2451                                 *ppos = pos;
2452                                 return restart_syscall();
2453                         }
2454                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2455                                 inet_forward_change(net);
2456                         } else {
2457                                 struct ipv4_devconf *cnf = ctl->extra1;
2458                                 struct in_device *idev =
2459                                         container_of(cnf, struct in_device, cnf);
2460                                 if (*valp)
2461                                         dev_disable_lro(idev->dev);
2462                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2463                                                             NETCONFA_FORWARDING,
2464                                                             idev->dev->ifindex,
2465                                                             cnf);
2466                         }
2467                         rtnl_unlock();
2468                         rt_cache_flush(net);
2469                 } else
2470                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2471                                                     NETCONFA_FORWARDING,
2472                                                     NETCONFA_IFINDEX_DEFAULT,
2473                                                     net->ipv4.devconf_dflt);
2474         }
2475
2476         return ret;
2477 }
2478
2479 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2480                                 void *buffer, size_t *lenp, loff_t *ppos)
2481 {
2482         int *valp = ctl->data;
2483         int val = *valp;
2484         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2485         struct net *net = ctl->extra2;
2486
2487         if (write && *valp != val)
2488                 rt_cache_flush(net);
2489
2490         return ret;
2491 }
2492
2493 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2494         { \
2495                 .procname       = name, \
2496                 .data           = ipv4_devconf.data + \
2497                                   IPV4_DEVCONF_ ## attr - 1, \
2498                 .maxlen         = sizeof(int), \
2499                 .mode           = mval, \
2500                 .proc_handler   = proc, \
2501                 .extra1         = &ipv4_devconf, \
2502         }
2503
2504 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2505         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2506
2507 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2508         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2509
2510 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2511         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2512
2513 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2514         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2515
2516 static struct devinet_sysctl_table {
2517         struct ctl_table_header *sysctl_header;
2518         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2519 } devinet_sysctl = {
2520         .devinet_vars = {
2521                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2522                                              devinet_sysctl_forward),
2523                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2524                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2525
2526                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2527                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2528                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2529                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2530                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2531                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2532                                         "accept_source_route"),
2533                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2534                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2535                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2536                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2537                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2538                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2539                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2540                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2541                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2542                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2543                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2544                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2545                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2546                                         "arp_evict_nocarrier"),
2547                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2548                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2549                                         "force_igmp_version"),
2550                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2551                                         "igmpv2_unsolicited_report_interval"),
2552                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2553                                         "igmpv3_unsolicited_report_interval"),
2554                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2555                                         "ignore_routes_with_linkdown"),
2556                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2557                                         "drop_gratuitous_arp"),
2558
2559                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2560                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2561                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2562                                               "promote_secondaries"),
2563                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2564                                               "route_localnet"),
2565                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2566                                               "drop_unicast_in_l2_multicast"),
2567         },
2568 };
2569
2570 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2571                                      int ifindex, struct ipv4_devconf *p)
2572 {
2573         int i;
2574         struct devinet_sysctl_table *t;
2575         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2576
2577         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2578         if (!t)
2579                 goto out;
2580
2581         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2582                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2583                 t->devinet_vars[i].extra1 = p;
2584                 t->devinet_vars[i].extra2 = net;
2585         }
2586
2587         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2588
2589         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2590         if (!t->sysctl_header)
2591                 goto free;
2592
2593         p->sysctl = t;
2594
2595         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2596                                     ifindex, p);
2597         return 0;
2598
2599 free:
2600         kfree(t);
2601 out:
2602         return -ENOMEM;
2603 }
2604
2605 static void __devinet_sysctl_unregister(struct net *net,
2606                                         struct ipv4_devconf *cnf, int ifindex)
2607 {
2608         struct devinet_sysctl_table *t = cnf->sysctl;
2609
2610         if (t) {
2611                 cnf->sysctl = NULL;
2612                 unregister_net_sysctl_table(t->sysctl_header);
2613                 kfree(t);
2614         }
2615
2616         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2617 }
2618
2619 static int devinet_sysctl_register(struct in_device *idev)
2620 {
2621         int err;
2622
2623         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2624                 return -EINVAL;
2625
2626         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2627         if (err)
2628                 return err;
2629         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2630                                         idev->dev->ifindex, &idev->cnf);
2631         if (err)
2632                 neigh_sysctl_unregister(idev->arp_parms);
2633         return err;
2634 }
2635
2636 static void devinet_sysctl_unregister(struct in_device *idev)
2637 {
2638         struct net *net = dev_net(idev->dev);
2639
2640         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2641         neigh_sysctl_unregister(idev->arp_parms);
2642 }
2643
2644 static struct ctl_table ctl_forward_entry[] = {
2645         {
2646                 .procname       = "ip_forward",
2647                 .data           = &ipv4_devconf.data[
2648                                         IPV4_DEVCONF_FORWARDING - 1],
2649                 .maxlen         = sizeof(int),
2650                 .mode           = 0644,
2651                 .proc_handler   = devinet_sysctl_forward,
2652                 .extra1         = &ipv4_devconf,
2653                 .extra2         = &init_net,
2654         },
2655         { },
2656 };
2657 #endif
2658
2659 static __net_init int devinet_init_net(struct net *net)
2660 {
2661         int err;
2662         struct ipv4_devconf *all, *dflt;
2663 #ifdef CONFIG_SYSCTL
2664         struct ctl_table *tbl;
2665         struct ctl_table_header *forw_hdr;
2666 #endif
2667
2668         err = -ENOMEM;
2669         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2670         if (!all)
2671                 goto err_alloc_all;
2672
2673         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2674         if (!dflt)
2675                 goto err_alloc_dflt;
2676
2677 #ifdef CONFIG_SYSCTL
2678         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2679         if (!tbl)
2680                 goto err_alloc_ctl;
2681
2682         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2683         tbl[0].extra1 = all;
2684         tbl[0].extra2 = net;
2685 #endif
2686
2687         if (!net_eq(net, &init_net)) {
2688                 switch (net_inherit_devconf()) {
2689                 case 3:
2690                         /* copy from the current netns */
2691                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2692                                sizeof(ipv4_devconf));
2693                         memcpy(dflt,
2694                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2695                                sizeof(ipv4_devconf_dflt));
2696                         break;
2697                 case 0:
2698                 case 1:
2699                         /* copy from init_net */
2700                         memcpy(all, init_net.ipv4.devconf_all,
2701                                sizeof(ipv4_devconf));
2702                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2703                                sizeof(ipv4_devconf_dflt));
2704                         break;
2705                 case 2:
2706                         /* use compiled values */
2707                         break;
2708                 }
2709         }
2710
2711 #ifdef CONFIG_SYSCTL
2712         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2713         if (err < 0)
2714                 goto err_reg_all;
2715
2716         err = __devinet_sysctl_register(net, "default",
2717                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2718         if (err < 0)
2719                 goto err_reg_dflt;
2720
2721         err = -ENOMEM;
2722         forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2723                                           ARRAY_SIZE(ctl_forward_entry));
2724         if (!forw_hdr)
2725                 goto err_reg_ctl;
2726         net->ipv4.forw_hdr = forw_hdr;
2727 #endif
2728
2729         net->ipv4.devconf_all = all;
2730         net->ipv4.devconf_dflt = dflt;
2731         return 0;
2732
2733 #ifdef CONFIG_SYSCTL
2734 err_reg_ctl:
2735         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2736 err_reg_dflt:
2737         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2738 err_reg_all:
2739         kfree(tbl);
2740 err_alloc_ctl:
2741 #endif
2742         kfree(dflt);
2743 err_alloc_dflt:
2744         kfree(all);
2745 err_alloc_all:
2746         return err;
2747 }
2748
2749 static __net_exit void devinet_exit_net(struct net *net)
2750 {
2751 #ifdef CONFIG_SYSCTL
2752         struct ctl_table *tbl;
2753
2754         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2755         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2756         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2757                                     NETCONFA_IFINDEX_DEFAULT);
2758         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2759                                     NETCONFA_IFINDEX_ALL);
2760         kfree(tbl);
2761 #endif
2762         kfree(net->ipv4.devconf_dflt);
2763         kfree(net->ipv4.devconf_all);
2764 }
2765
2766 static __net_initdata struct pernet_operations devinet_ops = {
2767         .init = devinet_init_net,
2768         .exit = devinet_exit_net,
2769 };
2770
2771 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2772         .family           = AF_INET,
2773         .fill_link_af     = inet_fill_link_af,
2774         .get_link_af_size = inet_get_link_af_size,
2775         .validate_link_af = inet_validate_link_af,
2776         .set_link_af      = inet_set_link_af,
2777 };
2778
2779 void __init devinet_init(void)
2780 {
2781         int i;
2782
2783         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2784                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2785
2786         register_pernet_subsys(&devinet_ops);
2787         register_netdevice_notifier(&ip_netdev_notifier);
2788
2789         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2790
2791         rtnl_af_register(&inet_af_ops);
2792
2793         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2794         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2795         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2796                       RTNL_FLAG_DUMP_UNLOCKED);
2797         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2798                       inet_netconf_dump_devconf,
2799                       RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2800 }