media: imx274: get rid of mode_index
[linux-2.6-microblaze.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143
144         rcu_read_lock();
145         ifa = inet_lookup_ifaddr_rcu(net, addr);
146         if (!ifa) {
147                 struct flowi4 fl4 = { .daddr = addr };
148                 struct fib_result res = { 0 };
149                 struct fib_table *local;
150
151                 /* Fallback to FIB local table so that communication
152                  * over loopback subnets work.
153                  */
154                 local = fib_get_table(net, RT_TABLE_LOCAL);
155                 if (local &&
156                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
157                     res.type == RTN_LOCAL)
158                         result = FIB_RES_DEV(res);
159         } else {
160                 result = ifa->ifa_dev->dev;
161         }
162         if (result && devref)
163                 dev_hold(result);
164         rcu_read_unlock();
165         return result;
166 }
167 EXPORT_SYMBOL(__ip_dev_find);
168
169 /* called under RCU lock */
170 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
171 {
172         u32 hash = inet_addr_hash(net, addr);
173         struct in_ifaddr *ifa;
174
175         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
176                 if (ifa->ifa_local == addr &&
177                     net_eq(dev_net(ifa->ifa_dev->dev), net))
178                         return ifa;
179
180         return NULL;
181 }
182
183 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
184
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
186 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
187 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
188                          int destroy);
189 #ifdef CONFIG_SYSCTL
190 static int devinet_sysctl_register(struct in_device *idev);
191 static void devinet_sysctl_unregister(struct in_device *idev);
192 #else
193 static int devinet_sysctl_register(struct in_device *idev)
194 {
195         return 0;
196 }
197 static void devinet_sysctl_unregister(struct in_device *idev)
198 {
199 }
200 #endif
201
202 /* Locks all the inet devices. */
203
204 static struct in_ifaddr *inet_alloc_ifa(void)
205 {
206         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
207 }
208
209 static void inet_rcu_free_ifa(struct rcu_head *head)
210 {
211         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
212         if (ifa->ifa_dev)
213                 in_dev_put(ifa->ifa_dev);
214         kfree(ifa);
215 }
216
217 static void inet_free_ifa(struct in_ifaddr *ifa)
218 {
219         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
220 }
221
222 void in_dev_finish_destroy(struct in_device *idev)
223 {
224         struct net_device *dev = idev->dev;
225
226         WARN_ON(idev->ifa_list);
227         WARN_ON(idev->mc_list);
228         kfree(rcu_dereference_protected(idev->mc_hash, 1));
229 #ifdef NET_REFCNT_DEBUG
230         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
231 #endif
232         dev_put(dev);
233         if (!idev->dead)
234                 pr_err("Freeing alive in_device %p\n", idev);
235         else
236                 kfree(idev);
237 }
238 EXPORT_SYMBOL(in_dev_finish_destroy);
239
240 static struct in_device *inetdev_init(struct net_device *dev)
241 {
242         struct in_device *in_dev;
243         int err = -ENOMEM;
244
245         ASSERT_RTNL();
246
247         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
248         if (!in_dev)
249                 goto out;
250         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
251                         sizeof(in_dev->cnf));
252         in_dev->cnf.sysctl = NULL;
253         in_dev->dev = dev;
254         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
255         if (!in_dev->arp_parms)
256                 goto out_kfree;
257         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
258                 dev_disable_lro(dev);
259         /* Reference in_dev->dev */
260         dev_hold(dev);
261         /* Account for reference dev->ip_ptr (below) */
262         refcount_set(&in_dev->refcnt, 1);
263
264         err = devinet_sysctl_register(in_dev);
265         if (err) {
266                 in_dev->dead = 1;
267                 in_dev_put(in_dev);
268                 in_dev = NULL;
269                 goto out;
270         }
271         ip_mc_init_dev(in_dev);
272         if (dev->flags & IFF_UP)
273                 ip_mc_up(in_dev);
274
275         /* we can receive as soon as ip_ptr is set -- do this last */
276         rcu_assign_pointer(dev->ip_ptr, in_dev);
277 out:
278         return in_dev ?: ERR_PTR(err);
279 out_kfree:
280         kfree(in_dev);
281         in_dev = NULL;
282         goto out;
283 }
284
285 static void in_dev_rcu_put(struct rcu_head *head)
286 {
287         struct in_device *idev = container_of(head, struct in_device, rcu_head);
288         in_dev_put(idev);
289 }
290
291 static void inetdev_destroy(struct in_device *in_dev)
292 {
293         struct in_ifaddr *ifa;
294         struct net_device *dev;
295
296         ASSERT_RTNL();
297
298         dev = in_dev->dev;
299
300         in_dev->dead = 1;
301
302         ip_mc_destroy_dev(in_dev);
303
304         while ((ifa = in_dev->ifa_list) != NULL) {
305                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
306                 inet_free_ifa(ifa);
307         }
308
309         RCU_INIT_POINTER(dev->ip_ptr, NULL);
310
311         devinet_sysctl_unregister(in_dev);
312         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
313         arp_ifdown(dev);
314
315         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
316 }
317
318 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
319 {
320         rcu_read_lock();
321         for_primary_ifa(in_dev) {
322                 if (inet_ifa_match(a, ifa)) {
323                         if (!b || inet_ifa_match(b, ifa)) {
324                                 rcu_read_unlock();
325                                 return 1;
326                         }
327                 }
328         } endfor_ifa(in_dev);
329         rcu_read_unlock();
330         return 0;
331 }
332
333 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334                          int destroy, struct nlmsghdr *nlh, u32 portid)
335 {
336         struct in_ifaddr *promote = NULL;
337         struct in_ifaddr *ifa, *ifa1 = *ifap;
338         struct in_ifaddr *last_prim = in_dev->ifa_list;
339         struct in_ifaddr *prev_prom = NULL;
340         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
341
342         ASSERT_RTNL();
343
344         if (in_dev->dead)
345                 goto no_promotions;
346
347         /* 1. Deleting primary ifaddr forces deletion all secondaries
348          * unless alias promotion is set
349          **/
350
351         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
352                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
353
354                 while ((ifa = *ifap1) != NULL) {
355                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
356                             ifa1->ifa_scope <= ifa->ifa_scope)
357                                 last_prim = ifa;
358
359                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
360                             ifa1->ifa_mask != ifa->ifa_mask ||
361                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
362                                 ifap1 = &ifa->ifa_next;
363                                 prev_prom = ifa;
364                                 continue;
365                         }
366
367                         if (!do_promote) {
368                                 inet_hash_remove(ifa);
369                                 *ifap1 = ifa->ifa_next;
370
371                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
372                                 blocking_notifier_call_chain(&inetaddr_chain,
373                                                 NETDEV_DOWN, ifa);
374                                 inet_free_ifa(ifa);
375                         } else {
376                                 promote = ifa;
377                                 break;
378                         }
379                 }
380         }
381
382         /* On promotion all secondaries from subnet are changing
383          * the primary IP, we must remove all their routes silently
384          * and later to add them back with new prefsrc. Do this
385          * while all addresses are on the device list.
386          */
387         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
388                 if (ifa1->ifa_mask == ifa->ifa_mask &&
389                     inet_ifa_match(ifa1->ifa_address, ifa))
390                         fib_del_ifaddr(ifa, ifa1);
391         }
392
393 no_promotions:
394         /* 2. Unlink it */
395
396         *ifap = ifa1->ifa_next;
397         inet_hash_remove(ifa1);
398
399         /* 3. Announce address deletion */
400
401         /* Send message first, then call notifier.
402            At first sight, FIB update triggered by notifier
403            will refer to already deleted ifaddr, that could confuse
404            netlink listeners. It is not true: look, gated sees
405            that route deleted and if it still thinks that ifaddr
406            is valid, it will try to restore deleted routes... Grr.
407            So that, this order is correct.
408          */
409         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
410         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
411
412         if (promote) {
413                 struct in_ifaddr *next_sec = promote->ifa_next;
414
415                 if (prev_prom) {
416                         prev_prom->ifa_next = promote->ifa_next;
417                         promote->ifa_next = last_prim->ifa_next;
418                         last_prim->ifa_next = promote;
419                 }
420
421                 promote->ifa_flags &= ~IFA_F_SECONDARY;
422                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
423                 blocking_notifier_call_chain(&inetaddr_chain,
424                                 NETDEV_UP, promote);
425                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
426                         if (ifa1->ifa_mask != ifa->ifa_mask ||
427                             !inet_ifa_match(ifa1->ifa_address, ifa))
428                                         continue;
429                         fib_add_ifaddr(ifa);
430                 }
431
432         }
433         if (destroy)
434                 inet_free_ifa(ifa1);
435 }
436
437 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
438                          int destroy)
439 {
440         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
441 }
442
443 static void check_lifetime(struct work_struct *work);
444
445 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
446
447 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
448                              u32 portid, struct netlink_ext_ack *extack)
449 {
450         struct in_device *in_dev = ifa->ifa_dev;
451         struct in_ifaddr *ifa1, **ifap, **last_primary;
452         struct in_validator_info ivi;
453         int ret;
454
455         ASSERT_RTNL();
456
457         if (!ifa->ifa_local) {
458                 inet_free_ifa(ifa);
459                 return 0;
460         }
461
462         ifa->ifa_flags &= ~IFA_F_SECONDARY;
463         last_primary = &in_dev->ifa_list;
464
465         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
466              ifap = &ifa1->ifa_next) {
467                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
468                     ifa->ifa_scope <= ifa1->ifa_scope)
469                         last_primary = &ifa1->ifa_next;
470                 if (ifa1->ifa_mask == ifa->ifa_mask &&
471                     inet_ifa_match(ifa1->ifa_address, ifa)) {
472                         if (ifa1->ifa_local == ifa->ifa_local) {
473                                 inet_free_ifa(ifa);
474                                 return -EEXIST;
475                         }
476                         if (ifa1->ifa_scope != ifa->ifa_scope) {
477                                 inet_free_ifa(ifa);
478                                 return -EINVAL;
479                         }
480                         ifa->ifa_flags |= IFA_F_SECONDARY;
481                 }
482         }
483
484         /* Allow any devices that wish to register ifaddr validtors to weigh
485          * in now, before changes are committed.  The rntl lock is serializing
486          * access here, so the state should not change between a validator call
487          * and a final notify on commit.  This isn't invoked on promotion under
488          * the assumption that validators are checking the address itself, and
489          * not the flags.
490          */
491         ivi.ivi_addr = ifa->ifa_address;
492         ivi.ivi_dev = ifa->ifa_dev;
493         ivi.extack = extack;
494         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
495                                            NETDEV_UP, &ivi);
496         ret = notifier_to_errno(ret);
497         if (ret) {
498                 inet_free_ifa(ifa);
499                 return ret;
500         }
501
502         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
503                 prandom_seed((__force u32) ifa->ifa_local);
504                 ifap = last_primary;
505         }
506
507         ifa->ifa_next = *ifap;
508         *ifap = ifa;
509
510         inet_hash_insert(dev_net(in_dev->dev), ifa);
511
512         cancel_delayed_work(&check_lifetime_work);
513         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
514
515         /* Send message first, then call notifier.
516            Notifier will trigger FIB update, so that
517            listeners of netlink will know about new ifaddr */
518         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
519         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
520
521         return 0;
522 }
523
524 static int inet_insert_ifa(struct in_ifaddr *ifa)
525 {
526         return __inet_insert_ifa(ifa, NULL, 0, NULL);
527 }
528
529 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
530 {
531         struct in_device *in_dev = __in_dev_get_rtnl(dev);
532
533         ASSERT_RTNL();
534
535         if (!in_dev) {
536                 inet_free_ifa(ifa);
537                 return -ENOBUFS;
538         }
539         ipv4_devconf_setall(in_dev);
540         neigh_parms_data_state_setall(in_dev->arp_parms);
541         if (ifa->ifa_dev != in_dev) {
542                 WARN_ON(ifa->ifa_dev);
543                 in_dev_hold(in_dev);
544                 ifa->ifa_dev = in_dev;
545         }
546         if (ipv4_is_loopback(ifa->ifa_local))
547                 ifa->ifa_scope = RT_SCOPE_HOST;
548         return inet_insert_ifa(ifa);
549 }
550
551 /* Caller must hold RCU or RTNL :
552  * We dont take a reference on found in_device
553  */
554 struct in_device *inetdev_by_index(struct net *net, int ifindex)
555 {
556         struct net_device *dev;
557         struct in_device *in_dev = NULL;
558
559         rcu_read_lock();
560         dev = dev_get_by_index_rcu(net, ifindex);
561         if (dev)
562                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
563         rcu_read_unlock();
564         return in_dev;
565 }
566 EXPORT_SYMBOL(inetdev_by_index);
567
568 /* Called only from RTNL semaphored context. No locks. */
569
570 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
571                                     __be32 mask)
572 {
573         ASSERT_RTNL();
574
575         for_primary_ifa(in_dev) {
576                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
577                         return ifa;
578         } endfor_ifa(in_dev);
579         return NULL;
580 }
581
582 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
583 {
584         struct ip_mreqn mreq = {
585                 .imr_multiaddr.s_addr = ifa->ifa_address,
586                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
587         };
588         int ret;
589
590         ASSERT_RTNL();
591
592         lock_sock(sk);
593         if (join)
594                 ret = ip_mc_join_group(sk, &mreq);
595         else
596                 ret = ip_mc_leave_group(sk, &mreq);
597         release_sock(sk);
598
599         return ret;
600 }
601
602 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
603                             struct netlink_ext_ack *extack)
604 {
605         struct net *net = sock_net(skb->sk);
606         struct nlattr *tb[IFA_MAX+1];
607         struct in_device *in_dev;
608         struct ifaddrmsg *ifm;
609         struct in_ifaddr *ifa, **ifap;
610         int err = -EINVAL;
611
612         ASSERT_RTNL();
613
614         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
615                           extack);
616         if (err < 0)
617                 goto errout;
618
619         ifm = nlmsg_data(nlh);
620         in_dev = inetdev_by_index(net, ifm->ifa_index);
621         if (!in_dev) {
622                 err = -ENODEV;
623                 goto errout;
624         }
625
626         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
627              ifap = &ifa->ifa_next) {
628                 if (tb[IFA_LOCAL] &&
629                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
630                         continue;
631
632                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
633                         continue;
634
635                 if (tb[IFA_ADDRESS] &&
636                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
637                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
638                         continue;
639
640                 if (ipv4_is_multicast(ifa->ifa_address))
641                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
642                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
643                 return 0;
644         }
645
646         err = -EADDRNOTAVAIL;
647 errout:
648         return err;
649 }
650
651 #define INFINITY_LIFE_TIME      0xFFFFFFFF
652
653 static void check_lifetime(struct work_struct *work)
654 {
655         unsigned long now, next, next_sec, next_sched;
656         struct in_ifaddr *ifa;
657         struct hlist_node *n;
658         int i;
659
660         now = jiffies;
661         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
662
663         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
664                 bool change_needed = false;
665
666                 rcu_read_lock();
667                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
668                         unsigned long age;
669
670                         if (ifa->ifa_flags & IFA_F_PERMANENT)
671                                 continue;
672
673                         /* We try to batch several events at once. */
674                         age = (now - ifa->ifa_tstamp +
675                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
676
677                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
678                             age >= ifa->ifa_valid_lft) {
679                                 change_needed = true;
680                         } else if (ifa->ifa_preferred_lft ==
681                                    INFINITY_LIFE_TIME) {
682                                 continue;
683                         } else if (age >= ifa->ifa_preferred_lft) {
684                                 if (time_before(ifa->ifa_tstamp +
685                                                 ifa->ifa_valid_lft * HZ, next))
686                                         next = ifa->ifa_tstamp +
687                                                ifa->ifa_valid_lft * HZ;
688
689                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
690                                         change_needed = true;
691                         } else if (time_before(ifa->ifa_tstamp +
692                                                ifa->ifa_preferred_lft * HZ,
693                                                next)) {
694                                 next = ifa->ifa_tstamp +
695                                        ifa->ifa_preferred_lft * HZ;
696                         }
697                 }
698                 rcu_read_unlock();
699                 if (!change_needed)
700                         continue;
701                 rtnl_lock();
702                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
703                         unsigned long age;
704
705                         if (ifa->ifa_flags & IFA_F_PERMANENT)
706                                 continue;
707
708                         /* We try to batch several events at once. */
709                         age = (now - ifa->ifa_tstamp +
710                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
711
712                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
713                             age >= ifa->ifa_valid_lft) {
714                                 struct in_ifaddr **ifap;
715
716                                 for (ifap = &ifa->ifa_dev->ifa_list;
717                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
718                                         if (*ifap == ifa) {
719                                                 inet_del_ifa(ifa->ifa_dev,
720                                                              ifap, 1);
721                                                 break;
722                                         }
723                                 }
724                         } else if (ifa->ifa_preferred_lft !=
725                                    INFINITY_LIFE_TIME &&
726                                    age >= ifa->ifa_preferred_lft &&
727                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
728                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
729                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
730                         }
731                 }
732                 rtnl_unlock();
733         }
734
735         next_sec = round_jiffies_up(next);
736         next_sched = next;
737
738         /* If rounded timeout is accurate enough, accept it. */
739         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
740                 next_sched = next_sec;
741
742         now = jiffies;
743         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
744         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
745                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
746
747         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
748                         next_sched - now);
749 }
750
751 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
752                              __u32 prefered_lft)
753 {
754         unsigned long timeout;
755
756         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
757
758         timeout = addrconf_timeout_fixup(valid_lft, HZ);
759         if (addrconf_finite_timeout(timeout))
760                 ifa->ifa_valid_lft = timeout;
761         else
762                 ifa->ifa_flags |= IFA_F_PERMANENT;
763
764         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
765         if (addrconf_finite_timeout(timeout)) {
766                 if (timeout == 0)
767                         ifa->ifa_flags |= IFA_F_DEPRECATED;
768                 ifa->ifa_preferred_lft = timeout;
769         }
770         ifa->ifa_tstamp = jiffies;
771         if (!ifa->ifa_cstamp)
772                 ifa->ifa_cstamp = ifa->ifa_tstamp;
773 }
774
775 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
776                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
777 {
778         struct nlattr *tb[IFA_MAX+1];
779         struct in_ifaddr *ifa;
780         struct ifaddrmsg *ifm;
781         struct net_device *dev;
782         struct in_device *in_dev;
783         int err;
784
785         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
786                           NULL);
787         if (err < 0)
788                 goto errout;
789
790         ifm = nlmsg_data(nlh);
791         err = -EINVAL;
792         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
793                 goto errout;
794
795         dev = __dev_get_by_index(net, ifm->ifa_index);
796         err = -ENODEV;
797         if (!dev)
798                 goto errout;
799
800         in_dev = __in_dev_get_rtnl(dev);
801         err = -ENOBUFS;
802         if (!in_dev)
803                 goto errout;
804
805         ifa = inet_alloc_ifa();
806         if (!ifa)
807                 /*
808                  * A potential indev allocation can be left alive, it stays
809                  * assigned to its device and is destroy with it.
810                  */
811                 goto errout;
812
813         ipv4_devconf_setall(in_dev);
814         neigh_parms_data_state_setall(in_dev->arp_parms);
815         in_dev_hold(in_dev);
816
817         if (!tb[IFA_ADDRESS])
818                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
819
820         INIT_HLIST_NODE(&ifa->hash);
821         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
822         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
823         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
824                                          ifm->ifa_flags;
825         ifa->ifa_scope = ifm->ifa_scope;
826         ifa->ifa_dev = in_dev;
827
828         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
829         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
830
831         if (tb[IFA_BROADCAST])
832                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
833
834         if (tb[IFA_LABEL])
835                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
836         else
837                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
838
839         if (tb[IFA_RT_PRIORITY])
840                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
841
842         if (tb[IFA_CACHEINFO]) {
843                 struct ifa_cacheinfo *ci;
844
845                 ci = nla_data(tb[IFA_CACHEINFO]);
846                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
847                         err = -EINVAL;
848                         goto errout_free;
849                 }
850                 *pvalid_lft = ci->ifa_valid;
851                 *pprefered_lft = ci->ifa_prefered;
852         }
853
854         return ifa;
855
856 errout_free:
857         inet_free_ifa(ifa);
858 errout:
859         return ERR_PTR(err);
860 }
861
862 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
863 {
864         struct in_device *in_dev = ifa->ifa_dev;
865         struct in_ifaddr *ifa1, **ifap;
866
867         if (!ifa->ifa_local)
868                 return NULL;
869
870         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
871              ifap = &ifa1->ifa_next) {
872                 if (ifa1->ifa_mask == ifa->ifa_mask &&
873                     inet_ifa_match(ifa1->ifa_address, ifa) &&
874                     ifa1->ifa_local == ifa->ifa_local)
875                         return ifa1;
876         }
877         return NULL;
878 }
879
880 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
881                             struct netlink_ext_ack *extack)
882 {
883         struct net *net = sock_net(skb->sk);
884         struct in_ifaddr *ifa;
885         struct in_ifaddr *ifa_existing;
886         __u32 valid_lft = INFINITY_LIFE_TIME;
887         __u32 prefered_lft = INFINITY_LIFE_TIME;
888
889         ASSERT_RTNL();
890
891         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
892         if (IS_ERR(ifa))
893                 return PTR_ERR(ifa);
894
895         ifa_existing = find_matching_ifa(ifa);
896         if (!ifa_existing) {
897                 /* It would be best to check for !NLM_F_CREATE here but
898                  * userspace already relies on not having to provide this.
899                  */
900                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
901                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
902                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
903                                                true, ifa);
904
905                         if (ret < 0) {
906                                 inet_free_ifa(ifa);
907                                 return ret;
908                         }
909                 }
910                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
911                                          extack);
912         } else {
913                 u32 new_metric = ifa->ifa_rt_priority;
914
915                 inet_free_ifa(ifa);
916
917                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
918                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
919                         return -EEXIST;
920                 ifa = ifa_existing;
921
922                 if (ifa->ifa_rt_priority != new_metric) {
923                         fib_modify_prefix_metric(ifa, new_metric);
924                         ifa->ifa_rt_priority = new_metric;
925                 }
926
927                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
928                 cancel_delayed_work(&check_lifetime_work);
929                 queue_delayed_work(system_power_efficient_wq,
930                                 &check_lifetime_work, 0);
931                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
932         }
933         return 0;
934 }
935
936 /*
937  *      Determine a default network mask, based on the IP address.
938  */
939
940 static int inet_abc_len(__be32 addr)
941 {
942         int rc = -1;    /* Something else, probably a multicast. */
943
944         if (ipv4_is_zeronet(addr))
945                 rc = 0;
946         else {
947                 __u32 haddr = ntohl(addr);
948
949                 if (IN_CLASSA(haddr))
950                         rc = 8;
951                 else if (IN_CLASSB(haddr))
952                         rc = 16;
953                 else if (IN_CLASSC(haddr))
954                         rc = 24;
955         }
956
957         return rc;
958 }
959
960
961 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
962 {
963         struct sockaddr_in sin_orig;
964         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
965         struct in_device *in_dev;
966         struct in_ifaddr **ifap = NULL;
967         struct in_ifaddr *ifa = NULL;
968         struct net_device *dev;
969         char *colon;
970         int ret = -EFAULT;
971         int tryaddrmatch = 0;
972
973         ifr->ifr_name[IFNAMSIZ - 1] = 0;
974
975         /* save original address for comparison */
976         memcpy(&sin_orig, sin, sizeof(*sin));
977
978         colon = strchr(ifr->ifr_name, ':');
979         if (colon)
980                 *colon = 0;
981
982         dev_load(net, ifr->ifr_name);
983
984         switch (cmd) {
985         case SIOCGIFADDR:       /* Get interface address */
986         case SIOCGIFBRDADDR:    /* Get the broadcast address */
987         case SIOCGIFDSTADDR:    /* Get the destination address */
988         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
989                 /* Note that these ioctls will not sleep,
990                    so that we do not impose a lock.
991                    One day we will be forced to put shlock here (I mean SMP)
992                  */
993                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
994                 memset(sin, 0, sizeof(*sin));
995                 sin->sin_family = AF_INET;
996                 break;
997
998         case SIOCSIFFLAGS:
999                 ret = -EPERM;
1000                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1001                         goto out;
1002                 break;
1003         case SIOCSIFADDR:       /* Set interface address (and family) */
1004         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1005         case SIOCSIFDSTADDR:    /* Set the destination address */
1006         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1007                 ret = -EPERM;
1008                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009                         goto out;
1010                 ret = -EINVAL;
1011                 if (sin->sin_family != AF_INET)
1012                         goto out;
1013                 break;
1014         default:
1015                 ret = -EINVAL;
1016                 goto out;
1017         }
1018
1019         rtnl_lock();
1020
1021         ret = -ENODEV;
1022         dev = __dev_get_by_name(net, ifr->ifr_name);
1023         if (!dev)
1024                 goto done;
1025
1026         if (colon)
1027                 *colon = ':';
1028
1029         in_dev = __in_dev_get_rtnl(dev);
1030         if (in_dev) {
1031                 if (tryaddrmatch) {
1032                         /* Matthias Andree */
1033                         /* compare label and address (4.4BSD style) */
1034                         /* note: we only do this for a limited set of ioctls
1035                            and only if the original address family was AF_INET.
1036                            This is checked above. */
1037                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038                              ifap = &ifa->ifa_next) {
1039                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1040                                     sin_orig.sin_addr.s_addr ==
1041                                                         ifa->ifa_local) {
1042                                         break; /* found */
1043                                 }
1044                         }
1045                 }
1046                 /* we didn't get a match, maybe the application is
1047                    4.3BSD-style and passed in junk so we fall back to
1048                    comparing just the label */
1049                 if (!ifa) {
1050                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1051                              ifap = &ifa->ifa_next)
1052                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1053                                         break;
1054                 }
1055         }
1056
1057         ret = -EADDRNOTAVAIL;
1058         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1059                 goto done;
1060
1061         switch (cmd) {
1062         case SIOCGIFADDR:       /* Get interface address */
1063                 ret = 0;
1064                 sin->sin_addr.s_addr = ifa->ifa_local;
1065                 break;
1066
1067         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1068                 ret = 0;
1069                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1070                 break;
1071
1072         case SIOCGIFDSTADDR:    /* Get the destination address */
1073                 ret = 0;
1074                 sin->sin_addr.s_addr = ifa->ifa_address;
1075                 break;
1076
1077         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1078                 ret = 0;
1079                 sin->sin_addr.s_addr = ifa->ifa_mask;
1080                 break;
1081
1082         case SIOCSIFFLAGS:
1083                 if (colon) {
1084                         ret = -EADDRNOTAVAIL;
1085                         if (!ifa)
1086                                 break;
1087                         ret = 0;
1088                         if (!(ifr->ifr_flags & IFF_UP))
1089                                 inet_del_ifa(in_dev, ifap, 1);
1090                         break;
1091                 }
1092                 ret = dev_change_flags(dev, ifr->ifr_flags);
1093                 break;
1094
1095         case SIOCSIFADDR:       /* Set interface address (and family) */
1096                 ret = -EINVAL;
1097                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1098                         break;
1099
1100                 if (!ifa) {
1101                         ret = -ENOBUFS;
1102                         ifa = inet_alloc_ifa();
1103                         if (!ifa)
1104                                 break;
1105                         INIT_HLIST_NODE(&ifa->hash);
1106                         if (colon)
1107                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1108                         else
1109                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1110                 } else {
1111                         ret = 0;
1112                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1113                                 break;
1114                         inet_del_ifa(in_dev, ifap, 0);
1115                         ifa->ifa_broadcast = 0;
1116                         ifa->ifa_scope = 0;
1117                 }
1118
1119                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1120
1121                 if (!(dev->flags & IFF_POINTOPOINT)) {
1122                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1123                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1124                         if ((dev->flags & IFF_BROADCAST) &&
1125                             ifa->ifa_prefixlen < 31)
1126                                 ifa->ifa_broadcast = ifa->ifa_address |
1127                                                      ~ifa->ifa_mask;
1128                 } else {
1129                         ifa->ifa_prefixlen = 32;
1130                         ifa->ifa_mask = inet_make_mask(32);
1131                 }
1132                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1133                 ret = inet_set_ifa(dev, ifa);
1134                 break;
1135
1136         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1137                 ret = 0;
1138                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1139                         inet_del_ifa(in_dev, ifap, 0);
1140                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1141                         inet_insert_ifa(ifa);
1142                 }
1143                 break;
1144
1145         case SIOCSIFDSTADDR:    /* Set the destination address */
1146                 ret = 0;
1147                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1148                         break;
1149                 ret = -EINVAL;
1150                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1151                         break;
1152                 ret = 0;
1153                 inet_del_ifa(in_dev, ifap, 0);
1154                 ifa->ifa_address = sin->sin_addr.s_addr;
1155                 inet_insert_ifa(ifa);
1156                 break;
1157
1158         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1159
1160                 /*
1161                  *      The mask we set must be legal.
1162                  */
1163                 ret = -EINVAL;
1164                 if (bad_mask(sin->sin_addr.s_addr, 0))
1165                         break;
1166                 ret = 0;
1167                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1168                         __be32 old_mask = ifa->ifa_mask;
1169                         inet_del_ifa(in_dev, ifap, 0);
1170                         ifa->ifa_mask = sin->sin_addr.s_addr;
1171                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1172
1173                         /* See if current broadcast address matches
1174                          * with current netmask, then recalculate
1175                          * the broadcast address. Otherwise it's a
1176                          * funny address, so don't touch it since
1177                          * the user seems to know what (s)he's doing...
1178                          */
1179                         if ((dev->flags & IFF_BROADCAST) &&
1180                             (ifa->ifa_prefixlen < 31) &&
1181                             (ifa->ifa_broadcast ==
1182                              (ifa->ifa_local|~old_mask))) {
1183                                 ifa->ifa_broadcast = (ifa->ifa_local |
1184                                                       ~sin->sin_addr.s_addr);
1185                         }
1186                         inet_insert_ifa(ifa);
1187                 }
1188                 break;
1189         }
1190 done:
1191         rtnl_unlock();
1192 out:
1193         return ret;
1194 }
1195
1196 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1197 {
1198         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1199         struct in_ifaddr *ifa;
1200         struct ifreq ifr;
1201         int done = 0;
1202
1203         if (WARN_ON(size > sizeof(struct ifreq)))
1204                 goto out;
1205
1206         if (!in_dev)
1207                 goto out;
1208
1209         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1210                 if (!buf) {
1211                         done += size;
1212                         continue;
1213                 }
1214                 if (len < size)
1215                         break;
1216                 memset(&ifr, 0, sizeof(struct ifreq));
1217                 strcpy(ifr.ifr_name, ifa->ifa_label);
1218
1219                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1220                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1221                                                                 ifa->ifa_local;
1222
1223                 if (copy_to_user(buf + done, &ifr, size)) {
1224                         done = -EFAULT;
1225                         break;
1226                 }
1227                 len  -= size;
1228                 done += size;
1229         }
1230 out:
1231         return done;
1232 }
1233
1234 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1235                                  int scope)
1236 {
1237         for_primary_ifa(in_dev) {
1238                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1239                     ifa->ifa_scope <= scope)
1240                         return ifa->ifa_local;
1241         } endfor_ifa(in_dev);
1242
1243         return 0;
1244 }
1245
1246 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1247 {
1248         __be32 addr = 0;
1249         struct in_device *in_dev;
1250         struct net *net = dev_net(dev);
1251         int master_idx;
1252
1253         rcu_read_lock();
1254         in_dev = __in_dev_get_rcu(dev);
1255         if (!in_dev)
1256                 goto no_in_dev;
1257
1258         for_primary_ifa(in_dev) {
1259                 if (ifa->ifa_scope > scope)
1260                         continue;
1261                 if (!dst || inet_ifa_match(dst, ifa)) {
1262                         addr = ifa->ifa_local;
1263                         break;
1264                 }
1265                 if (!addr)
1266                         addr = ifa->ifa_local;
1267         } endfor_ifa(in_dev);
1268
1269         if (addr)
1270                 goto out_unlock;
1271 no_in_dev:
1272         master_idx = l3mdev_master_ifindex_rcu(dev);
1273
1274         /* For VRFs, the VRF device takes the place of the loopback device,
1275          * with addresses on it being preferred.  Note in such cases the
1276          * loopback device will be among the devices that fail the master_idx
1277          * equality check in the loop below.
1278          */
1279         if (master_idx &&
1280             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1281             (in_dev = __in_dev_get_rcu(dev))) {
1282                 addr = in_dev_select_addr(in_dev, scope);
1283                 if (addr)
1284                         goto out_unlock;
1285         }
1286
1287         /* Not loopback addresses on loopback should be preferred
1288            in this case. It is important that lo is the first interface
1289            in dev_base list.
1290          */
1291         for_each_netdev_rcu(net, dev) {
1292                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1293                         continue;
1294
1295                 in_dev = __in_dev_get_rcu(dev);
1296                 if (!in_dev)
1297                         continue;
1298
1299                 addr = in_dev_select_addr(in_dev, scope);
1300                 if (addr)
1301                         goto out_unlock;
1302         }
1303 out_unlock:
1304         rcu_read_unlock();
1305         return addr;
1306 }
1307 EXPORT_SYMBOL(inet_select_addr);
1308
1309 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1310                               __be32 local, int scope)
1311 {
1312         int same = 0;
1313         __be32 addr = 0;
1314
1315         for_ifa(in_dev) {
1316                 if (!addr &&
1317                     (local == ifa->ifa_local || !local) &&
1318                     ifa->ifa_scope <= scope) {
1319                         addr = ifa->ifa_local;
1320                         if (same)
1321                                 break;
1322                 }
1323                 if (!same) {
1324                         same = (!local || inet_ifa_match(local, ifa)) &&
1325                                 (!dst || inet_ifa_match(dst, ifa));
1326                         if (same && addr) {
1327                                 if (local || !dst)
1328                                         break;
1329                                 /* Is the selected addr into dst subnet? */
1330                                 if (inet_ifa_match(addr, ifa))
1331                                         break;
1332                                 /* No, then can we use new local src? */
1333                                 if (ifa->ifa_scope <= scope) {
1334                                         addr = ifa->ifa_local;
1335                                         break;
1336                                 }
1337                                 /* search for large dst subnet for addr */
1338                                 same = 0;
1339                         }
1340                 }
1341         } endfor_ifa(in_dev);
1342
1343         return same ? addr : 0;
1344 }
1345
1346 /*
1347  * Confirm that local IP address exists using wildcards:
1348  * - net: netns to check, cannot be NULL
1349  * - in_dev: only on this interface, NULL=any interface
1350  * - dst: only in the same subnet as dst, 0=any dst
1351  * - local: address, 0=autoselect the local address
1352  * - scope: maximum allowed scope value for the local address
1353  */
1354 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1355                          __be32 dst, __be32 local, int scope)
1356 {
1357         __be32 addr = 0;
1358         struct net_device *dev;
1359
1360         if (in_dev)
1361                 return confirm_addr_indev(in_dev, dst, local, scope);
1362
1363         rcu_read_lock();
1364         for_each_netdev_rcu(net, dev) {
1365                 in_dev = __in_dev_get_rcu(dev);
1366                 if (in_dev) {
1367                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1368                         if (addr)
1369                                 break;
1370                 }
1371         }
1372         rcu_read_unlock();
1373
1374         return addr;
1375 }
1376 EXPORT_SYMBOL(inet_confirm_addr);
1377
1378 /*
1379  *      Device notifier
1380  */
1381
1382 int register_inetaddr_notifier(struct notifier_block *nb)
1383 {
1384         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1385 }
1386 EXPORT_SYMBOL(register_inetaddr_notifier);
1387
1388 int unregister_inetaddr_notifier(struct notifier_block *nb)
1389 {
1390         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1391 }
1392 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1393
1394 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1395 {
1396         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1399
1400 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1401 {
1402         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1403             nb);
1404 }
1405 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1406
1407 /* Rename ifa_labels for a device name change. Make some effort to preserve
1408  * existing alias numbering and to create unique labels if possible.
1409 */
1410 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1411 {
1412         struct in_ifaddr *ifa;
1413         int named = 0;
1414
1415         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1416                 char old[IFNAMSIZ], *dot;
1417
1418                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1419                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1420                 if (named++ == 0)
1421                         goto skip;
1422                 dot = strchr(old, ':');
1423                 if (!dot) {
1424                         sprintf(old, ":%d", named);
1425                         dot = old;
1426                 }
1427                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1428                         strcat(ifa->ifa_label, dot);
1429                 else
1430                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1431 skip:
1432                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1433         }
1434 }
1435
1436 static bool inetdev_valid_mtu(unsigned int mtu)
1437 {
1438         return mtu >= IPV4_MIN_MTU;
1439 }
1440
1441 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1442                                         struct in_device *in_dev)
1443
1444 {
1445         struct in_ifaddr *ifa;
1446
1447         for (ifa = in_dev->ifa_list; ifa;
1448              ifa = ifa->ifa_next) {
1449                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1450                          ifa->ifa_local, dev,
1451                          ifa->ifa_local, NULL,
1452                          dev->dev_addr, NULL);
1453         }
1454 }
1455
1456 /* Called only under RTNL semaphore */
1457
1458 static int inetdev_event(struct notifier_block *this, unsigned long event,
1459                          void *ptr)
1460 {
1461         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1462         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1463
1464         ASSERT_RTNL();
1465
1466         if (!in_dev) {
1467                 if (event == NETDEV_REGISTER) {
1468                         in_dev = inetdev_init(dev);
1469                         if (IS_ERR(in_dev))
1470                                 return notifier_from_errno(PTR_ERR(in_dev));
1471                         if (dev->flags & IFF_LOOPBACK) {
1472                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1473                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1474                         }
1475                 } else if (event == NETDEV_CHANGEMTU) {
1476                         /* Re-enabling IP */
1477                         if (inetdev_valid_mtu(dev->mtu))
1478                                 in_dev = inetdev_init(dev);
1479                 }
1480                 goto out;
1481         }
1482
1483         switch (event) {
1484         case NETDEV_REGISTER:
1485                 pr_debug("%s: bug\n", __func__);
1486                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1487                 break;
1488         case NETDEV_UP:
1489                 if (!inetdev_valid_mtu(dev->mtu))
1490                         break;
1491                 if (dev->flags & IFF_LOOPBACK) {
1492                         struct in_ifaddr *ifa = inet_alloc_ifa();
1493
1494                         if (ifa) {
1495                                 INIT_HLIST_NODE(&ifa->hash);
1496                                 ifa->ifa_local =
1497                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1498                                 ifa->ifa_prefixlen = 8;
1499                                 ifa->ifa_mask = inet_make_mask(8);
1500                                 in_dev_hold(in_dev);
1501                                 ifa->ifa_dev = in_dev;
1502                                 ifa->ifa_scope = RT_SCOPE_HOST;
1503                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1504                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1505                                                  INFINITY_LIFE_TIME);
1506                                 ipv4_devconf_setall(in_dev);
1507                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1508                                 inet_insert_ifa(ifa);
1509                         }
1510                 }
1511                 ip_mc_up(in_dev);
1512                 /* fall through */
1513         case NETDEV_CHANGEADDR:
1514                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1515                         break;
1516                 /* fall through */
1517         case NETDEV_NOTIFY_PEERS:
1518                 /* Send gratuitous ARP to notify of link change */
1519                 inetdev_send_gratuitous_arp(dev, in_dev);
1520                 break;
1521         case NETDEV_DOWN:
1522                 ip_mc_down(in_dev);
1523                 break;
1524         case NETDEV_PRE_TYPE_CHANGE:
1525                 ip_mc_unmap(in_dev);
1526                 break;
1527         case NETDEV_POST_TYPE_CHANGE:
1528                 ip_mc_remap(in_dev);
1529                 break;
1530         case NETDEV_CHANGEMTU:
1531                 if (inetdev_valid_mtu(dev->mtu))
1532                         break;
1533                 /* disable IP when MTU is not enough */
1534                 /* fall through */
1535         case NETDEV_UNREGISTER:
1536                 inetdev_destroy(in_dev);
1537                 break;
1538         case NETDEV_CHANGENAME:
1539                 /* Do not notify about label change, this event is
1540                  * not interesting to applications using netlink.
1541                  */
1542                 inetdev_changename(dev, in_dev);
1543
1544                 devinet_sysctl_unregister(in_dev);
1545                 devinet_sysctl_register(in_dev);
1546                 break;
1547         }
1548 out:
1549         return NOTIFY_DONE;
1550 }
1551
1552 static struct notifier_block ip_netdev_notifier = {
1553         .notifier_call = inetdev_event,
1554 };
1555
1556 static size_t inet_nlmsg_size(void)
1557 {
1558         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1559                + nla_total_size(4) /* IFA_ADDRESS */
1560                + nla_total_size(4) /* IFA_LOCAL */
1561                + nla_total_size(4) /* IFA_BROADCAST */
1562                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1563                + nla_total_size(4)  /* IFA_FLAGS */
1564                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1565                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1566 }
1567
1568 static inline u32 cstamp_delta(unsigned long cstamp)
1569 {
1570         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1571 }
1572
1573 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1574                          unsigned long tstamp, u32 preferred, u32 valid)
1575 {
1576         struct ifa_cacheinfo ci;
1577
1578         ci.cstamp = cstamp_delta(cstamp);
1579         ci.tstamp = cstamp_delta(tstamp);
1580         ci.ifa_prefered = preferred;
1581         ci.ifa_valid = valid;
1582
1583         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1584 }
1585
1586 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1587                             u32 portid, u32 seq, int event, unsigned int flags)
1588 {
1589         struct ifaddrmsg *ifm;
1590         struct nlmsghdr  *nlh;
1591         u32 preferred, valid;
1592
1593         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1594         if (!nlh)
1595                 return -EMSGSIZE;
1596
1597         ifm = nlmsg_data(nlh);
1598         ifm->ifa_family = AF_INET;
1599         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1600         ifm->ifa_flags = ifa->ifa_flags;
1601         ifm->ifa_scope = ifa->ifa_scope;
1602         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1603
1604         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1605                 preferred = ifa->ifa_preferred_lft;
1606                 valid = ifa->ifa_valid_lft;
1607                 if (preferred != INFINITY_LIFE_TIME) {
1608                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1609
1610                         if (preferred > tval)
1611                                 preferred -= tval;
1612                         else
1613                                 preferred = 0;
1614                         if (valid != INFINITY_LIFE_TIME) {
1615                                 if (valid > tval)
1616                                         valid -= tval;
1617                                 else
1618                                         valid = 0;
1619                         }
1620                 }
1621         } else {
1622                 preferred = INFINITY_LIFE_TIME;
1623                 valid = INFINITY_LIFE_TIME;
1624         }
1625         if ((ifa->ifa_address &&
1626              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1627             (ifa->ifa_local &&
1628              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1629             (ifa->ifa_broadcast &&
1630              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1631             (ifa->ifa_label[0] &&
1632              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1633             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1634             (ifa->ifa_rt_priority &&
1635              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1636             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1637                           preferred, valid))
1638                 goto nla_put_failure;
1639
1640         nlmsg_end(skb, nlh);
1641         return 0;
1642
1643 nla_put_failure:
1644         nlmsg_cancel(skb, nlh);
1645         return -EMSGSIZE;
1646 }
1647
1648 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1649 {
1650         struct net *net = sock_net(skb->sk);
1651         int h, s_h;
1652         int idx, s_idx;
1653         int ip_idx, s_ip_idx;
1654         struct net_device *dev;
1655         struct in_device *in_dev;
1656         struct in_ifaddr *ifa;
1657         struct hlist_head *head;
1658
1659         s_h = cb->args[0];
1660         s_idx = idx = cb->args[1];
1661         s_ip_idx = ip_idx = cb->args[2];
1662
1663         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1664                 idx = 0;
1665                 head = &net->dev_index_head[h];
1666                 rcu_read_lock();
1667                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1668                           net->dev_base_seq;
1669                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1670                         if (idx < s_idx)
1671                                 goto cont;
1672                         if (h > s_h || idx > s_idx)
1673                                 s_ip_idx = 0;
1674                         in_dev = __in_dev_get_rcu(dev);
1675                         if (!in_dev)
1676                                 goto cont;
1677
1678                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1679                              ifa = ifa->ifa_next, ip_idx++) {
1680                                 if (ip_idx < s_ip_idx)
1681                                         continue;
1682                                 if (inet_fill_ifaddr(skb, ifa,
1683                                              NETLINK_CB(cb->skb).portid,
1684                                              cb->nlh->nlmsg_seq,
1685                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1686                                         rcu_read_unlock();
1687                                         goto done;
1688                                 }
1689                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1690                         }
1691 cont:
1692                         idx++;
1693                 }
1694                 rcu_read_unlock();
1695         }
1696
1697 done:
1698         cb->args[0] = h;
1699         cb->args[1] = idx;
1700         cb->args[2] = ip_idx;
1701
1702         return skb->len;
1703 }
1704
1705 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1706                       u32 portid)
1707 {
1708         struct sk_buff *skb;
1709         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1710         int err = -ENOBUFS;
1711         struct net *net;
1712
1713         net = dev_net(ifa->ifa_dev->dev);
1714         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1715         if (!skb)
1716                 goto errout;
1717
1718         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1719         if (err < 0) {
1720                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1721                 WARN_ON(err == -EMSGSIZE);
1722                 kfree_skb(skb);
1723                 goto errout;
1724         }
1725         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1726         return;
1727 errout:
1728         if (err < 0)
1729                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1730 }
1731
1732 static size_t inet_get_link_af_size(const struct net_device *dev,
1733                                     u32 ext_filter_mask)
1734 {
1735         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1736
1737         if (!in_dev)
1738                 return 0;
1739
1740         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1741 }
1742
1743 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1744                              u32 ext_filter_mask)
1745 {
1746         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1747         struct nlattr *nla;
1748         int i;
1749
1750         if (!in_dev)
1751                 return -ENODATA;
1752
1753         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1754         if (!nla)
1755                 return -EMSGSIZE;
1756
1757         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1758                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1759
1760         return 0;
1761 }
1762
1763 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1764         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1765 };
1766
1767 static int inet_validate_link_af(const struct net_device *dev,
1768                                  const struct nlattr *nla)
1769 {
1770         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1771         int err, rem;
1772
1773         if (dev && !__in_dev_get_rcu(dev))
1774                 return -EAFNOSUPPORT;
1775
1776         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1777         if (err < 0)
1778                 return err;
1779
1780         if (tb[IFLA_INET_CONF]) {
1781                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1782                         int cfgid = nla_type(a);
1783
1784                         if (nla_len(a) < 4)
1785                                 return -EINVAL;
1786
1787                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1788                                 return -EINVAL;
1789                 }
1790         }
1791
1792         return 0;
1793 }
1794
1795 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1796 {
1797         struct in_device *in_dev = __in_dev_get_rcu(dev);
1798         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1799         int rem;
1800
1801         if (!in_dev)
1802                 return -EAFNOSUPPORT;
1803
1804         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1805                 BUG();
1806
1807         if (tb[IFLA_INET_CONF]) {
1808                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1809                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1810         }
1811
1812         return 0;
1813 }
1814
1815 static int inet_netconf_msgsize_devconf(int type)
1816 {
1817         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1818                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1819         bool all = false;
1820
1821         if (type == NETCONFA_ALL)
1822                 all = true;
1823
1824         if (all || type == NETCONFA_FORWARDING)
1825                 size += nla_total_size(4);
1826         if (all || type == NETCONFA_RP_FILTER)
1827                 size += nla_total_size(4);
1828         if (all || type == NETCONFA_MC_FORWARDING)
1829                 size += nla_total_size(4);
1830         if (all || type == NETCONFA_PROXY_NEIGH)
1831                 size += nla_total_size(4);
1832         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1833                 size += nla_total_size(4);
1834
1835         return size;
1836 }
1837
1838 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1839                                      struct ipv4_devconf *devconf, u32 portid,
1840                                      u32 seq, int event, unsigned int flags,
1841                                      int type)
1842 {
1843         struct nlmsghdr  *nlh;
1844         struct netconfmsg *ncm;
1845         bool all = false;
1846
1847         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1848                         flags);
1849         if (!nlh)
1850                 return -EMSGSIZE;
1851
1852         if (type == NETCONFA_ALL)
1853                 all = true;
1854
1855         ncm = nlmsg_data(nlh);
1856         ncm->ncm_family = AF_INET;
1857
1858         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1859                 goto nla_put_failure;
1860
1861         if (!devconf)
1862                 goto out;
1863
1864         if ((all || type == NETCONFA_FORWARDING) &&
1865             nla_put_s32(skb, NETCONFA_FORWARDING,
1866                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1867                 goto nla_put_failure;
1868         if ((all || type == NETCONFA_RP_FILTER) &&
1869             nla_put_s32(skb, NETCONFA_RP_FILTER,
1870                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1871                 goto nla_put_failure;
1872         if ((all || type == NETCONFA_MC_FORWARDING) &&
1873             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1874                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1875                 goto nla_put_failure;
1876         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1877             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1878                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1879                 goto nla_put_failure;
1880         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1881             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1882                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1883                 goto nla_put_failure;
1884
1885 out:
1886         nlmsg_end(skb, nlh);
1887         return 0;
1888
1889 nla_put_failure:
1890         nlmsg_cancel(skb, nlh);
1891         return -EMSGSIZE;
1892 }
1893
1894 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1895                                  int ifindex, struct ipv4_devconf *devconf)
1896 {
1897         struct sk_buff *skb;
1898         int err = -ENOBUFS;
1899
1900         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1901         if (!skb)
1902                 goto errout;
1903
1904         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1905                                         event, 0, type);
1906         if (err < 0) {
1907                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1908                 WARN_ON(err == -EMSGSIZE);
1909                 kfree_skb(skb);
1910                 goto errout;
1911         }
1912         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1913         return;
1914 errout:
1915         if (err < 0)
1916                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1917 }
1918
1919 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1920         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1921         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1922         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1923         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1924         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1925 };
1926
1927 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1928                                     struct nlmsghdr *nlh,
1929                                     struct netlink_ext_ack *extack)
1930 {
1931         struct net *net = sock_net(in_skb->sk);
1932         struct nlattr *tb[NETCONFA_MAX+1];
1933         struct netconfmsg *ncm;
1934         struct sk_buff *skb;
1935         struct ipv4_devconf *devconf;
1936         struct in_device *in_dev;
1937         struct net_device *dev;
1938         int ifindex;
1939         int err;
1940
1941         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1942                           devconf_ipv4_policy, extack);
1943         if (err < 0)
1944                 goto errout;
1945
1946         err = -EINVAL;
1947         if (!tb[NETCONFA_IFINDEX])
1948                 goto errout;
1949
1950         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1951         switch (ifindex) {
1952         case NETCONFA_IFINDEX_ALL:
1953                 devconf = net->ipv4.devconf_all;
1954                 break;
1955         case NETCONFA_IFINDEX_DEFAULT:
1956                 devconf = net->ipv4.devconf_dflt;
1957                 break;
1958         default:
1959                 dev = __dev_get_by_index(net, ifindex);
1960                 if (!dev)
1961                         goto errout;
1962                 in_dev = __in_dev_get_rtnl(dev);
1963                 if (!in_dev)
1964                         goto errout;
1965                 devconf = &in_dev->cnf;
1966                 break;
1967         }
1968
1969         err = -ENOBUFS;
1970         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1971         if (!skb)
1972                 goto errout;
1973
1974         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1975                                         NETLINK_CB(in_skb).portid,
1976                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1977                                         NETCONFA_ALL);
1978         if (err < 0) {
1979                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1980                 WARN_ON(err == -EMSGSIZE);
1981                 kfree_skb(skb);
1982                 goto errout;
1983         }
1984         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1985 errout:
1986         return err;
1987 }
1988
1989 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1990                                      struct netlink_callback *cb)
1991 {
1992         struct net *net = sock_net(skb->sk);
1993         int h, s_h;
1994         int idx, s_idx;
1995         struct net_device *dev;
1996         struct in_device *in_dev;
1997         struct hlist_head *head;
1998
1999         s_h = cb->args[0];
2000         s_idx = idx = cb->args[1];
2001
2002         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2003                 idx = 0;
2004                 head = &net->dev_index_head[h];
2005                 rcu_read_lock();
2006                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2007                           net->dev_base_seq;
2008                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2009                         if (idx < s_idx)
2010                                 goto cont;
2011                         in_dev = __in_dev_get_rcu(dev);
2012                         if (!in_dev)
2013                                 goto cont;
2014
2015                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2016                                                       &in_dev->cnf,
2017                                                       NETLINK_CB(cb->skb).portid,
2018                                                       cb->nlh->nlmsg_seq,
2019                                                       RTM_NEWNETCONF,
2020                                                       NLM_F_MULTI,
2021                                                       NETCONFA_ALL) < 0) {
2022                                 rcu_read_unlock();
2023                                 goto done;
2024                         }
2025                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2026 cont:
2027                         idx++;
2028                 }
2029                 rcu_read_unlock();
2030         }
2031         if (h == NETDEV_HASHENTRIES) {
2032                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2033                                               net->ipv4.devconf_all,
2034                                               NETLINK_CB(cb->skb).portid,
2035                                               cb->nlh->nlmsg_seq,
2036                                               RTM_NEWNETCONF, NLM_F_MULTI,
2037                                               NETCONFA_ALL) < 0)
2038                         goto done;
2039                 else
2040                         h++;
2041         }
2042         if (h == NETDEV_HASHENTRIES + 1) {
2043                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2044                                               net->ipv4.devconf_dflt,
2045                                               NETLINK_CB(cb->skb).portid,
2046                                               cb->nlh->nlmsg_seq,
2047                                               RTM_NEWNETCONF, NLM_F_MULTI,
2048                                               NETCONFA_ALL) < 0)
2049                         goto done;
2050                 else
2051                         h++;
2052         }
2053 done:
2054         cb->args[0] = h;
2055         cb->args[1] = idx;
2056
2057         return skb->len;
2058 }
2059
2060 #ifdef CONFIG_SYSCTL
2061
2062 static void devinet_copy_dflt_conf(struct net *net, int i)
2063 {
2064         struct net_device *dev;
2065
2066         rcu_read_lock();
2067         for_each_netdev_rcu(net, dev) {
2068                 struct in_device *in_dev;
2069
2070                 in_dev = __in_dev_get_rcu(dev);
2071                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2072                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2073         }
2074         rcu_read_unlock();
2075 }
2076
2077 /* called with RTNL locked */
2078 static void inet_forward_change(struct net *net)
2079 {
2080         struct net_device *dev;
2081         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2082
2083         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2084         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2085         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2086                                     NETCONFA_FORWARDING,
2087                                     NETCONFA_IFINDEX_ALL,
2088                                     net->ipv4.devconf_all);
2089         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2090                                     NETCONFA_FORWARDING,
2091                                     NETCONFA_IFINDEX_DEFAULT,
2092                                     net->ipv4.devconf_dflt);
2093
2094         for_each_netdev(net, dev) {
2095                 struct in_device *in_dev;
2096
2097                 if (on)
2098                         dev_disable_lro(dev);
2099
2100                 in_dev = __in_dev_get_rtnl(dev);
2101                 if (in_dev) {
2102                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2103                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2104                                                     NETCONFA_FORWARDING,
2105                                                     dev->ifindex, &in_dev->cnf);
2106                 }
2107         }
2108 }
2109
2110 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2111 {
2112         if (cnf == net->ipv4.devconf_dflt)
2113                 return NETCONFA_IFINDEX_DEFAULT;
2114         else if (cnf == net->ipv4.devconf_all)
2115                 return NETCONFA_IFINDEX_ALL;
2116         else {
2117                 struct in_device *idev
2118                         = container_of(cnf, struct in_device, cnf);
2119                 return idev->dev->ifindex;
2120         }
2121 }
2122
2123 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2124                              void __user *buffer,
2125                              size_t *lenp, loff_t *ppos)
2126 {
2127         int old_value = *(int *)ctl->data;
2128         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2129         int new_value = *(int *)ctl->data;
2130
2131         if (write) {
2132                 struct ipv4_devconf *cnf = ctl->extra1;
2133                 struct net *net = ctl->extra2;
2134                 int i = (int *)ctl->data - cnf->data;
2135                 int ifindex;
2136
2137                 set_bit(i, cnf->state);
2138
2139                 if (cnf == net->ipv4.devconf_dflt)
2140                         devinet_copy_dflt_conf(net, i);
2141                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2142                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2143                         if ((new_value == 0) && (old_value != 0))
2144                                 rt_cache_flush(net);
2145
2146                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2147                     new_value != old_value) {
2148                         ifindex = devinet_conf_ifindex(net, cnf);
2149                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2150                                                     NETCONFA_RP_FILTER,
2151                                                     ifindex, cnf);
2152                 }
2153                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2154                     new_value != old_value) {
2155                         ifindex = devinet_conf_ifindex(net, cnf);
2156                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2157                                                     NETCONFA_PROXY_NEIGH,
2158                                                     ifindex, cnf);
2159                 }
2160                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2161                     new_value != old_value) {
2162                         ifindex = devinet_conf_ifindex(net, cnf);
2163                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2164                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2165                                                     ifindex, cnf);
2166                 }
2167         }
2168
2169         return ret;
2170 }
2171
2172 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2173                                   void __user *buffer,
2174                                   size_t *lenp, loff_t *ppos)
2175 {
2176         int *valp = ctl->data;
2177         int val = *valp;
2178         loff_t pos = *ppos;
2179         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2180
2181         if (write && *valp != val) {
2182                 struct net *net = ctl->extra2;
2183
2184                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2185                         if (!rtnl_trylock()) {
2186                                 /* Restore the original values before restarting */
2187                                 *valp = val;
2188                                 *ppos = pos;
2189                                 return restart_syscall();
2190                         }
2191                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2192                                 inet_forward_change(net);
2193                         } else {
2194                                 struct ipv4_devconf *cnf = ctl->extra1;
2195                                 struct in_device *idev =
2196                                         container_of(cnf, struct in_device, cnf);
2197                                 if (*valp)
2198                                         dev_disable_lro(idev->dev);
2199                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2200                                                             NETCONFA_FORWARDING,
2201                                                             idev->dev->ifindex,
2202                                                             cnf);
2203                         }
2204                         rtnl_unlock();
2205                         rt_cache_flush(net);
2206                 } else
2207                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2208                                                     NETCONFA_FORWARDING,
2209                                                     NETCONFA_IFINDEX_DEFAULT,
2210                                                     net->ipv4.devconf_dflt);
2211         }
2212
2213         return ret;
2214 }
2215
2216 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2217                                 void __user *buffer,
2218                                 size_t *lenp, loff_t *ppos)
2219 {
2220         int *valp = ctl->data;
2221         int val = *valp;
2222         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2223         struct net *net = ctl->extra2;
2224
2225         if (write && *valp != val)
2226                 rt_cache_flush(net);
2227
2228         return ret;
2229 }
2230
2231 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2232         { \
2233                 .procname       = name, \
2234                 .data           = ipv4_devconf.data + \
2235                                   IPV4_DEVCONF_ ## attr - 1, \
2236                 .maxlen         = sizeof(int), \
2237                 .mode           = mval, \
2238                 .proc_handler   = proc, \
2239                 .extra1         = &ipv4_devconf, \
2240         }
2241
2242 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2243         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2244
2245 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2246         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2247
2248 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2249         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2250
2251 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2252         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2253
2254 static struct devinet_sysctl_table {
2255         struct ctl_table_header *sysctl_header;
2256         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2257 } devinet_sysctl = {
2258         .devinet_vars = {
2259                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2260                                              devinet_sysctl_forward),
2261                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2262
2263                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2264                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2265                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2266                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2267                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2268                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2269                                         "accept_source_route"),
2270                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2271                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2272                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2273                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2274                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2275                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2276                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2277                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2278                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2279                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2280                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2281                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2282                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2283                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2284                                         "force_igmp_version"),
2285                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2286                                         "igmpv2_unsolicited_report_interval"),
2287                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2288                                         "igmpv3_unsolicited_report_interval"),
2289                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2290                                         "ignore_routes_with_linkdown"),
2291                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2292                                         "drop_gratuitous_arp"),
2293
2294                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2295                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2296                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2297                                               "promote_secondaries"),
2298                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2299                                               "route_localnet"),
2300                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2301                                               "drop_unicast_in_l2_multicast"),
2302         },
2303 };
2304
2305 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2306                                      int ifindex, struct ipv4_devconf *p)
2307 {
2308         int i;
2309         struct devinet_sysctl_table *t;
2310         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2311
2312         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2313         if (!t)
2314                 goto out;
2315
2316         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2317                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2318                 t->devinet_vars[i].extra1 = p;
2319                 t->devinet_vars[i].extra2 = net;
2320         }
2321
2322         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2323
2324         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2325         if (!t->sysctl_header)
2326                 goto free;
2327
2328         p->sysctl = t;
2329
2330         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2331                                     ifindex, p);
2332         return 0;
2333
2334 free:
2335         kfree(t);
2336 out:
2337         return -ENOBUFS;
2338 }
2339
2340 static void __devinet_sysctl_unregister(struct net *net,
2341                                         struct ipv4_devconf *cnf, int ifindex)
2342 {
2343         struct devinet_sysctl_table *t = cnf->sysctl;
2344
2345         if (t) {
2346                 cnf->sysctl = NULL;
2347                 unregister_net_sysctl_table(t->sysctl_header);
2348                 kfree(t);
2349         }
2350
2351         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2352 }
2353
2354 static int devinet_sysctl_register(struct in_device *idev)
2355 {
2356         int err;
2357
2358         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2359                 return -EINVAL;
2360
2361         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2362         if (err)
2363                 return err;
2364         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2365                                         idev->dev->ifindex, &idev->cnf);
2366         if (err)
2367                 neigh_sysctl_unregister(idev->arp_parms);
2368         return err;
2369 }
2370
2371 static void devinet_sysctl_unregister(struct in_device *idev)
2372 {
2373         struct net *net = dev_net(idev->dev);
2374
2375         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2376         neigh_sysctl_unregister(idev->arp_parms);
2377 }
2378
2379 static struct ctl_table ctl_forward_entry[] = {
2380         {
2381                 .procname       = "ip_forward",
2382                 .data           = &ipv4_devconf.data[
2383                                         IPV4_DEVCONF_FORWARDING - 1],
2384                 .maxlen         = sizeof(int),
2385                 .mode           = 0644,
2386                 .proc_handler   = devinet_sysctl_forward,
2387                 .extra1         = &ipv4_devconf,
2388                 .extra2         = &init_net,
2389         },
2390         { },
2391 };
2392 #endif
2393
2394 static __net_init int devinet_init_net(struct net *net)
2395 {
2396         int err;
2397         struct ipv4_devconf *all, *dflt;
2398 #ifdef CONFIG_SYSCTL
2399         struct ctl_table *tbl = ctl_forward_entry;
2400         struct ctl_table_header *forw_hdr;
2401 #endif
2402
2403         err = -ENOMEM;
2404         all = &ipv4_devconf;
2405         dflt = &ipv4_devconf_dflt;
2406
2407         if (!net_eq(net, &init_net)) {
2408                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2409                 if (!all)
2410                         goto err_alloc_all;
2411
2412                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2413                 if (!dflt)
2414                         goto err_alloc_dflt;
2415
2416 #ifdef CONFIG_SYSCTL
2417                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2418                 if (!tbl)
2419                         goto err_alloc_ctl;
2420
2421                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2422                 tbl[0].extra1 = all;
2423                 tbl[0].extra2 = net;
2424 #endif
2425         }
2426
2427 #ifdef CONFIG_SYSCTL
2428         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2429         if (err < 0)
2430                 goto err_reg_all;
2431
2432         err = __devinet_sysctl_register(net, "default",
2433                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2434         if (err < 0)
2435                 goto err_reg_dflt;
2436
2437         err = -ENOMEM;
2438         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2439         if (!forw_hdr)
2440                 goto err_reg_ctl;
2441         net->ipv4.forw_hdr = forw_hdr;
2442 #endif
2443
2444         net->ipv4.devconf_all = all;
2445         net->ipv4.devconf_dflt = dflt;
2446         return 0;
2447
2448 #ifdef CONFIG_SYSCTL
2449 err_reg_ctl:
2450         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2451 err_reg_dflt:
2452         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2453 err_reg_all:
2454         if (tbl != ctl_forward_entry)
2455                 kfree(tbl);
2456 err_alloc_ctl:
2457 #endif
2458         if (dflt != &ipv4_devconf_dflt)
2459                 kfree(dflt);
2460 err_alloc_dflt:
2461         if (all != &ipv4_devconf)
2462                 kfree(all);
2463 err_alloc_all:
2464         return err;
2465 }
2466
2467 static __net_exit void devinet_exit_net(struct net *net)
2468 {
2469 #ifdef CONFIG_SYSCTL
2470         struct ctl_table *tbl;
2471
2472         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2473         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2474         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2475                                     NETCONFA_IFINDEX_DEFAULT);
2476         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2477                                     NETCONFA_IFINDEX_ALL);
2478         kfree(tbl);
2479 #endif
2480         kfree(net->ipv4.devconf_dflt);
2481         kfree(net->ipv4.devconf_all);
2482 }
2483
2484 static __net_initdata struct pernet_operations devinet_ops = {
2485         .init = devinet_init_net,
2486         .exit = devinet_exit_net,
2487 };
2488
2489 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2490         .family           = AF_INET,
2491         .fill_link_af     = inet_fill_link_af,
2492         .get_link_af_size = inet_get_link_af_size,
2493         .validate_link_af = inet_validate_link_af,
2494         .set_link_af      = inet_set_link_af,
2495 };
2496
2497 void __init devinet_init(void)
2498 {
2499         int i;
2500
2501         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2502                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2503
2504         register_pernet_subsys(&devinet_ops);
2505
2506         register_gifconf(PF_INET, inet_gifconf);
2507         register_netdevice_notifier(&ip_netdev_notifier);
2508
2509         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2510
2511         rtnl_af_register(&inet_af_ops);
2512
2513         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2514         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2515         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2516         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2517                       inet_netconf_dump_devconf, 0);
2518 }