1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET3 IP device support routines.
5 * Derived from the IP parts of dev.c 1.0.19
7 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 * Mark Evans, <evansmp@uhura.aston.ac.uk>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
15 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
17 * Cyrus Durgin: updated for kmod
18 * Matthias Andree: in devinet_ioctl, compare label and
19 * address (4.4BSD alias style support),
20 * fall back to comparing just the label
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
52 #include <linux/sysctl.h>
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
65 #define IPV6ONLY_FLAGS \
66 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
78 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
82 static struct ipv4_devconf ipv4_devconf_dflt = {
84 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
91 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 [IFA_LOCAL] = { .type = NLA_U32 },
100 [IFA_ADDRESS] = { .type = NLA_U32 },
101 [IFA_BROADCAST] = { .type = NLA_U32 },
102 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
104 [IFA_FLAGS] = { .type = NLA_U32 },
105 [IFA_RT_PRIORITY] = { .type = NLA_U32 },
106 [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
107 [IFA_PROTO] = { .type = NLA_U8 },
110 struct inet_fill_args {
119 #define IN4_ADDR_HSIZE_SHIFT 8
120 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
126 u32 val = (__force u32) addr ^ net_hash_mix(net);
128 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
133 u32 hash = inet_addr_hash(net, ifa->ifa_local);
136 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
139 static void inet_hash_remove(struct in_ifaddr *ifa)
142 hlist_del_init_rcu(&ifa->hash);
146 * __ip_dev_find - find the first device with a given source address.
147 * @net: the net namespace
148 * @addr: the source address
149 * @devref: if true, take a reference on the found device
151 * If a caller uses devref=false, it should be protected by RCU, or RTNL
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
155 struct net_device *result = NULL;
156 struct in_ifaddr *ifa;
159 ifa = inet_lookup_ifaddr_rcu(net, addr);
161 struct flowi4 fl4 = { .daddr = addr };
162 struct fib_result res = { 0 };
163 struct fib_table *local;
165 /* Fallback to FIB local table so that communication
166 * over loopback subnets work.
168 local = fib_get_table(net, RT_TABLE_LOCAL);
170 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 res.type == RTN_LOCAL)
172 result = FIB_RES_DEV(res);
174 result = ifa->ifa_dev->dev;
176 if (result && devref)
181 EXPORT_SYMBOL(__ip_dev_find);
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
186 u32 hash = inet_addr_hash(net, addr);
187 struct in_ifaddr *ifa;
189 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 if (ifa->ifa_local == addr &&
191 net_eq(dev_net(ifa->ifa_dev->dev), net))
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 struct in_ifaddr __rcu **ifap,
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
208 static int devinet_sysctl_register(struct in_device *idev)
212 static void devinet_sysctl_unregister(struct in_device *idev)
217 /* Locks all the inet devices. */
219 static struct in_ifaddr *inet_alloc_ifa(void)
221 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
224 static void inet_rcu_free_ifa(struct rcu_head *head)
226 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
228 in_dev_put(ifa->ifa_dev);
232 static void inet_free_ifa(struct in_ifaddr *ifa)
234 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
237 static void in_dev_free_rcu(struct rcu_head *head)
239 struct in_device *idev = container_of(head, struct in_device, rcu_head);
241 kfree(rcu_dereference_protected(idev->mc_hash, 1));
245 void in_dev_finish_destroy(struct in_device *idev)
247 struct net_device *dev = idev->dev;
249 WARN_ON(idev->ifa_list);
250 WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
254 netdev_put(dev, &idev->dev_tracker);
256 pr_err("Freeing alive in_device %p\n", idev);
258 call_rcu(&idev->rcu_head, in_dev_free_rcu);
260 EXPORT_SYMBOL(in_dev_finish_destroy);
262 static struct in_device *inetdev_init(struct net_device *dev)
264 struct in_device *in_dev;
269 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
272 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 sizeof(in_dev->cnf));
274 in_dev->cnf.sysctl = NULL;
276 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 if (!in_dev->arp_parms)
279 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 dev_disable_lro(dev);
281 /* Reference in_dev->dev */
282 netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 /* Account for reference dev->ip_ptr (below) */
284 refcount_set(&in_dev->refcnt, 1);
286 err = devinet_sysctl_register(in_dev);
289 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
294 ip_mc_init_dev(in_dev);
295 if (dev->flags & IFF_UP)
298 /* we can receive as soon as ip_ptr is set -- do this last */
299 rcu_assign_pointer(dev->ip_ptr, in_dev);
301 return in_dev ?: ERR_PTR(err);
308 static void inetdev_destroy(struct in_device *in_dev)
310 struct net_device *dev;
311 struct in_ifaddr *ifa;
319 ip_mc_destroy_dev(in_dev);
321 while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
326 RCU_INIT_POINTER(dev->ip_ptr, NULL);
328 devinet_sysctl_unregister(in_dev);
329 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
337 const struct in_ifaddr *ifa;
340 in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 if (inet_ifa_match(a, ifa)) {
342 if (!b || inet_ifa_match(b, ifa)) {
352 static void __inet_del_ifa(struct in_device *in_dev,
353 struct in_ifaddr __rcu **ifap,
354 int destroy, struct nlmsghdr *nlh, u32 portid)
356 struct in_ifaddr *promote = NULL;
357 struct in_ifaddr *ifa, *ifa1;
358 struct in_ifaddr __rcu **last_prim;
359 struct in_ifaddr *prev_prom = NULL;
360 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
364 ifa1 = rtnl_dereference(*ifap);
369 /* 1. Deleting primary ifaddr forces deletion all secondaries
370 * unless alias promotion is set
373 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
376 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 ifa1->ifa_scope <= ifa->ifa_scope)
379 last_prim = &ifa->ifa_next;
381 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 ifa1->ifa_mask != ifa->ifa_mask ||
383 !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 ifap1 = &ifa->ifa_next;
390 inet_hash_remove(ifa);
391 *ifap1 = ifa->ifa_next;
393 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 blocking_notifier_call_chain(&inetaddr_chain,
404 /* On promotion all secondaries from subnet are changing
405 * the primary IP, we must remove all their routes silently
406 * and later to add them back with new prefsrc. Do this
407 * while all addresses are on the device list.
409 for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 if (ifa1->ifa_mask == ifa->ifa_mask &&
411 inet_ifa_match(ifa1->ifa_address, ifa))
412 fib_del_ifaddr(ifa, ifa1);
418 *ifap = ifa1->ifa_next;
419 inet_hash_remove(ifa1);
421 /* 3. Announce address deletion */
423 /* Send message first, then call notifier.
424 At first sight, FIB update triggered by notifier
425 will refer to already deleted ifaddr, that could confuse
426 netlink listeners. It is not true: look, gated sees
427 that route deleted and if it still thinks that ifaddr
428 is valid, it will try to restore deleted routes... Grr.
429 So that, this order is correct.
431 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
435 struct in_ifaddr *next_sec;
437 next_sec = rtnl_dereference(promote->ifa_next);
439 struct in_ifaddr *last_sec;
441 rcu_assign_pointer(prev_prom->ifa_next, next_sec);
443 last_sec = rtnl_dereference(*last_prim);
444 rcu_assign_pointer(promote->ifa_next, last_sec);
445 rcu_assign_pointer(*last_prim, promote);
448 promote->ifa_flags &= ~IFA_F_SECONDARY;
449 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 blocking_notifier_call_chain(&inetaddr_chain,
452 for (ifa = next_sec; ifa;
453 ifa = rtnl_dereference(ifa->ifa_next)) {
454 if (ifa1->ifa_mask != ifa->ifa_mask ||
455 !inet_ifa_match(ifa1->ifa_address, ifa))
465 static void inet_del_ifa(struct in_device *in_dev,
466 struct in_ifaddr __rcu **ifap,
469 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
472 static void check_lifetime(struct work_struct *work);
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 u32 portid, struct netlink_ext_ack *extack)
479 struct in_ifaddr __rcu **last_primary, **ifap;
480 struct in_device *in_dev = ifa->ifa_dev;
481 struct in_validator_info ivi;
482 struct in_ifaddr *ifa1;
487 if (!ifa->ifa_local) {
492 ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 last_primary = &in_dev->ifa_list;
495 /* Don't set IPv6 only flags to IPv4 addresses */
496 ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
498 ifap = &in_dev->ifa_list;
499 ifa1 = rtnl_dereference(*ifap);
502 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 ifa->ifa_scope <= ifa1->ifa_scope)
504 last_primary = &ifa1->ifa_next;
505 if (ifa1->ifa_mask == ifa->ifa_mask &&
506 inet_ifa_match(ifa1->ifa_address, ifa)) {
507 if (ifa1->ifa_local == ifa->ifa_local) {
511 if (ifa1->ifa_scope != ifa->ifa_scope) {
512 NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
516 ifa->ifa_flags |= IFA_F_SECONDARY;
519 ifap = &ifa1->ifa_next;
520 ifa1 = rtnl_dereference(*ifap);
523 /* Allow any devices that wish to register ifaddr validtors to weigh
524 * in now, before changes are committed. The rntl lock is serializing
525 * access here, so the state should not change between a validator call
526 * and a final notify on commit. This isn't invoked on promotion under
527 * the assumption that validators are checking the address itself, and
530 ivi.ivi_addr = ifa->ifa_address;
531 ivi.ivi_dev = ifa->ifa_dev;
533 ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
535 ret = notifier_to_errno(ret);
541 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
544 rcu_assign_pointer(ifa->ifa_next, *ifap);
545 rcu_assign_pointer(*ifap, ifa);
547 inet_hash_insert(dev_net(in_dev->dev), ifa);
549 cancel_delayed_work(&check_lifetime_work);
550 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
552 /* Send message first, then call notifier.
553 Notifier will trigger FIB update, so that
554 listeners of netlink will know about new ifaddr */
555 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
563 return __inet_insert_ifa(ifa, NULL, 0, NULL);
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
568 struct in_device *in_dev = __in_dev_get_rtnl(dev);
576 ipv4_devconf_setall(in_dev);
577 neigh_parms_data_state_setall(in_dev->arp_parms);
578 if (ifa->ifa_dev != in_dev) {
579 WARN_ON(ifa->ifa_dev);
581 ifa->ifa_dev = in_dev;
583 if (ipv4_is_loopback(ifa->ifa_local))
584 ifa->ifa_scope = RT_SCOPE_HOST;
585 return inet_insert_ifa(ifa);
588 /* Caller must hold RCU or RTNL :
589 * We dont take a reference on found in_device
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
593 struct net_device *dev;
594 struct in_device *in_dev = NULL;
597 dev = dev_get_by_index_rcu(net, ifindex);
599 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
603 EXPORT_SYMBOL(inetdev_by_index);
605 /* Called only from RTNL semaphored context. No locks. */
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
610 struct in_ifaddr *ifa;
614 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 const struct in_ifaddr *ifa)
624 #if defined(CONFIG_IP_MULTICAST)
625 struct ip_mreqn mreq = {
626 .imr_multiaddr.s_addr = ifa->ifa_address,
627 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
629 struct sock *sk = net->ipv4.mc_autojoin_sk;
636 ret = ip_mc_join_group(sk, &mreq);
638 ret = ip_mc_leave_group(sk, &mreq);
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 struct netlink_ext_ack *extack)
650 struct net *net = sock_net(skb->sk);
651 struct in_ifaddr __rcu **ifap;
652 struct nlattr *tb[IFA_MAX+1];
653 struct in_device *in_dev;
654 struct ifaddrmsg *ifm;
655 struct in_ifaddr *ifa;
660 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 ifa_ipv4_policy, extack);
665 ifm = nlmsg_data(nlh);
666 in_dev = inetdev_by_index(net, ifm->ifa_index);
668 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
673 for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674 ifap = &ifa->ifa_next) {
676 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
679 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
682 if (tb[IFA_ADDRESS] &&
683 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
687 if (ipv4_is_multicast(ifa->ifa_address))
688 ip_mc_autojoin_config(net, false, ifa);
689 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
693 NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694 err = -EADDRNOTAVAIL;
699 #define INFINITY_LIFE_TIME 0xFFFFFFFF
701 static void check_lifetime(struct work_struct *work)
703 unsigned long now, next, next_sec, next_sched;
704 struct in_ifaddr *ifa;
705 struct hlist_node *n;
709 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
711 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712 bool change_needed = false;
715 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716 unsigned long age, tstamp;
721 flags = READ_ONCE(ifa->ifa_flags);
722 if (flags & IFA_F_PERMANENT)
725 preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
726 valid_lft = READ_ONCE(ifa->ifa_valid_lft);
727 tstamp = READ_ONCE(ifa->ifa_tstamp);
728 /* We try to batch several events at once. */
729 age = (now - tstamp +
730 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
732 if (valid_lft != INFINITY_LIFE_TIME &&
734 change_needed = true;
735 } else if (preferred_lft ==
736 INFINITY_LIFE_TIME) {
738 } else if (age >= preferred_lft) {
739 if (time_before(tstamp + valid_lft * HZ, next))
740 next = tstamp + valid_lft * HZ;
742 if (!(flags & IFA_F_DEPRECATED))
743 change_needed = true;
744 } else if (time_before(tstamp + preferred_lft * HZ,
746 next = tstamp + preferred_lft * HZ;
753 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
756 if (ifa->ifa_flags & IFA_F_PERMANENT)
759 /* We try to batch several events at once. */
760 age = (now - ifa->ifa_tstamp +
761 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
763 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
764 age >= ifa->ifa_valid_lft) {
765 struct in_ifaddr __rcu **ifap;
766 struct in_ifaddr *tmp;
768 ifap = &ifa->ifa_dev->ifa_list;
769 tmp = rtnl_dereference(*ifap);
772 inet_del_ifa(ifa->ifa_dev,
776 ifap = &tmp->ifa_next;
777 tmp = rtnl_dereference(*ifap);
779 } else if (ifa->ifa_preferred_lft !=
780 INFINITY_LIFE_TIME &&
781 age >= ifa->ifa_preferred_lft &&
782 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
783 ifa->ifa_flags |= IFA_F_DEPRECATED;
784 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
790 next_sec = round_jiffies_up(next);
793 /* If rounded timeout is accurate enough, accept it. */
794 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
795 next_sched = next_sec;
798 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
799 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
800 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
802 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
806 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
809 unsigned long timeout;
812 flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
814 timeout = addrconf_timeout_fixup(valid_lft, HZ);
815 if (addrconf_finite_timeout(timeout))
816 WRITE_ONCE(ifa->ifa_valid_lft, timeout);
818 flags |= IFA_F_PERMANENT;
820 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
821 if (addrconf_finite_timeout(timeout)) {
823 flags |= IFA_F_DEPRECATED;
824 WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
826 WRITE_ONCE(ifa->ifa_flags, flags);
827 WRITE_ONCE(ifa->ifa_tstamp, jiffies);
828 if (!ifa->ifa_cstamp)
829 WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
832 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
833 __u32 *pvalid_lft, __u32 *pprefered_lft,
834 struct netlink_ext_ack *extack)
836 struct nlattr *tb[IFA_MAX+1];
837 struct in_ifaddr *ifa;
838 struct ifaddrmsg *ifm;
839 struct net_device *dev;
840 struct in_device *in_dev;
843 err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
844 ifa_ipv4_policy, extack);
848 ifm = nlmsg_data(nlh);
851 if (ifm->ifa_prefixlen > 32) {
852 NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
856 if (!tb[IFA_LOCAL]) {
857 NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
861 dev = __dev_get_by_index(net, ifm->ifa_index);
864 NL_SET_ERR_MSG(extack, "ipv4: Device not found");
868 in_dev = __in_dev_get_rtnl(dev);
873 ifa = inet_alloc_ifa();
876 * A potential indev allocation can be left alive, it stays
877 * assigned to its device and is destroy with it.
881 ipv4_devconf_setall(in_dev);
882 neigh_parms_data_state_setall(in_dev->arp_parms);
885 if (!tb[IFA_ADDRESS])
886 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
888 INIT_HLIST_NODE(&ifa->hash);
889 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
890 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
891 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
893 ifa->ifa_scope = ifm->ifa_scope;
894 ifa->ifa_dev = in_dev;
896 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
897 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
899 if (tb[IFA_BROADCAST])
900 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
903 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
905 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
907 if (tb[IFA_RT_PRIORITY])
908 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
911 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
913 if (tb[IFA_CACHEINFO]) {
914 struct ifa_cacheinfo *ci;
916 ci = nla_data(tb[IFA_CACHEINFO]);
917 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
918 NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
922 *pvalid_lft = ci->ifa_valid;
923 *pprefered_lft = ci->ifa_prefered;
934 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
936 struct in_device *in_dev = ifa->ifa_dev;
937 struct in_ifaddr *ifa1;
942 in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
943 if (ifa1->ifa_mask == ifa->ifa_mask &&
944 inet_ifa_match(ifa1->ifa_address, ifa) &&
945 ifa1->ifa_local == ifa->ifa_local)
951 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
952 struct netlink_ext_ack *extack)
954 struct net *net = sock_net(skb->sk);
955 struct in_ifaddr *ifa;
956 struct in_ifaddr *ifa_existing;
957 __u32 valid_lft = INFINITY_LIFE_TIME;
958 __u32 prefered_lft = INFINITY_LIFE_TIME;
962 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
966 ifa_existing = find_matching_ifa(ifa);
968 /* It would be best to check for !NLM_F_CREATE here but
969 * userspace already relies on not having to provide this.
971 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
972 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
973 int ret = ip_mc_autojoin_config(net, true, ifa);
976 NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
981 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
984 u32 new_metric = ifa->ifa_rt_priority;
985 u8 new_proto = ifa->ifa_proto;
989 if (nlh->nlmsg_flags & NLM_F_EXCL ||
990 !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
991 NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
996 if (ifa->ifa_rt_priority != new_metric) {
997 fib_modify_prefix_metric(ifa, new_metric);
998 ifa->ifa_rt_priority = new_metric;
1001 ifa->ifa_proto = new_proto;
1003 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1004 cancel_delayed_work(&check_lifetime_work);
1005 queue_delayed_work(system_power_efficient_wq,
1006 &check_lifetime_work, 0);
1007 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1013 * Determine a default network mask, based on the IP address.
1016 static int inet_abc_len(__be32 addr)
1018 int rc = -1; /* Something else, probably a multicast. */
1020 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1023 __u32 haddr = ntohl(addr);
1024 if (IN_CLASSA(haddr))
1026 else if (IN_CLASSB(haddr))
1028 else if (IN_CLASSC(haddr))
1030 else if (IN_CLASSE(haddr))
1038 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1040 struct sockaddr_in sin_orig;
1041 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1042 struct in_ifaddr __rcu **ifap = NULL;
1043 struct in_device *in_dev;
1044 struct in_ifaddr *ifa = NULL;
1045 struct net_device *dev;
1048 int tryaddrmatch = 0;
1050 ifr->ifr_name[IFNAMSIZ - 1] = 0;
1052 /* save original address for comparison */
1053 memcpy(&sin_orig, sin, sizeof(*sin));
1055 colon = strchr(ifr->ifr_name, ':');
1059 dev_load(net, ifr->ifr_name);
1062 case SIOCGIFADDR: /* Get interface address */
1063 case SIOCGIFBRDADDR: /* Get the broadcast address */
1064 case SIOCGIFDSTADDR: /* Get the destination address */
1065 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1066 /* Note that these ioctls will not sleep,
1067 so that we do not impose a lock.
1068 One day we will be forced to put shlock here (I mean SMP)
1070 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1071 memset(sin, 0, sizeof(*sin));
1072 sin->sin_family = AF_INET;
1077 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1080 case SIOCSIFADDR: /* Set interface address (and family) */
1081 case SIOCSIFBRDADDR: /* Set the broadcast address */
1082 case SIOCSIFDSTADDR: /* Set the destination address */
1083 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1085 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1088 if (sin->sin_family != AF_INET)
1099 dev = __dev_get_by_name(net, ifr->ifr_name);
1106 in_dev = __in_dev_get_rtnl(dev);
1109 /* Matthias Andree */
1110 /* compare label and address (4.4BSD style) */
1111 /* note: we only do this for a limited set of ioctls
1112 and only if the original address family was AF_INET.
1113 This is checked above. */
1115 for (ifap = &in_dev->ifa_list;
1116 (ifa = rtnl_dereference(*ifap)) != NULL;
1117 ifap = &ifa->ifa_next) {
1118 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1119 sin_orig.sin_addr.s_addr ==
1125 /* we didn't get a match, maybe the application is
1126 4.3BSD-style and passed in junk so we fall back to
1127 comparing just the label */
1129 for (ifap = &in_dev->ifa_list;
1130 (ifa = rtnl_dereference(*ifap)) != NULL;
1131 ifap = &ifa->ifa_next)
1132 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1137 ret = -EADDRNOTAVAIL;
1138 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1142 case SIOCGIFADDR: /* Get interface address */
1144 sin->sin_addr.s_addr = ifa->ifa_local;
1147 case SIOCGIFBRDADDR: /* Get the broadcast address */
1149 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1152 case SIOCGIFDSTADDR: /* Get the destination address */
1154 sin->sin_addr.s_addr = ifa->ifa_address;
1157 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1159 sin->sin_addr.s_addr = ifa->ifa_mask;
1164 ret = -EADDRNOTAVAIL;
1168 if (!(ifr->ifr_flags & IFF_UP))
1169 inet_del_ifa(in_dev, ifap, 1);
1172 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1175 case SIOCSIFADDR: /* Set interface address (and family) */
1177 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1182 ifa = inet_alloc_ifa();
1185 INIT_HLIST_NODE(&ifa->hash);
1187 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1189 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192 if (ifa->ifa_local == sin->sin_addr.s_addr)
1194 inet_del_ifa(in_dev, ifap, 0);
1195 ifa->ifa_broadcast = 0;
1199 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1201 if (!(dev->flags & IFF_POINTOPOINT)) {
1202 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1203 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1204 if ((dev->flags & IFF_BROADCAST) &&
1205 ifa->ifa_prefixlen < 31)
1206 ifa->ifa_broadcast = ifa->ifa_address |
1209 ifa->ifa_prefixlen = 32;
1210 ifa->ifa_mask = inet_make_mask(32);
1212 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1213 ret = inet_set_ifa(dev, ifa);
1216 case SIOCSIFBRDADDR: /* Set the broadcast address */
1218 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1219 inet_del_ifa(in_dev, ifap, 0);
1220 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1221 inet_insert_ifa(ifa);
1225 case SIOCSIFDSTADDR: /* Set the destination address */
1227 if (ifa->ifa_address == sin->sin_addr.s_addr)
1230 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1233 inet_del_ifa(in_dev, ifap, 0);
1234 ifa->ifa_address = sin->sin_addr.s_addr;
1235 inet_insert_ifa(ifa);
1238 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1241 * The mask we set must be legal.
1244 if (bad_mask(sin->sin_addr.s_addr, 0))
1247 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1248 __be32 old_mask = ifa->ifa_mask;
1249 inet_del_ifa(in_dev, ifap, 0);
1250 ifa->ifa_mask = sin->sin_addr.s_addr;
1251 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1253 /* See if current broadcast address matches
1254 * with current netmask, then recalculate
1255 * the broadcast address. Otherwise it's a
1256 * funny address, so don't touch it since
1257 * the user seems to know what (s)he's doing...
1259 if ((dev->flags & IFF_BROADCAST) &&
1260 (ifa->ifa_prefixlen < 31) &&
1261 (ifa->ifa_broadcast ==
1262 (ifa->ifa_local|~old_mask))) {
1263 ifa->ifa_broadcast = (ifa->ifa_local |
1264 ~sin->sin_addr.s_addr);
1266 inet_insert_ifa(ifa);
1276 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1278 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1279 const struct in_ifaddr *ifa;
1283 if (WARN_ON(size > sizeof(struct ifreq)))
1289 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1296 memset(&ifr, 0, sizeof(struct ifreq));
1297 strcpy(ifr.ifr_name, ifa->ifa_label);
1299 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1300 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1303 if (copy_to_user(buf + done, &ifr, size)) {
1314 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1317 const struct in_ifaddr *ifa;
1319 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1322 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1323 ifa->ifa_scope <= scope)
1324 return ifa->ifa_local;
1330 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1332 const struct in_ifaddr *ifa;
1334 unsigned char localnet_scope = RT_SCOPE_HOST;
1335 struct in_device *in_dev;
1336 struct net *net = dev_net(dev);
1340 in_dev = __in_dev_get_rcu(dev);
1344 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1345 localnet_scope = RT_SCOPE_LINK;
1347 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1348 if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1350 if (min(ifa->ifa_scope, localnet_scope) > scope)
1352 if (!dst || inet_ifa_match(dst, ifa)) {
1353 addr = ifa->ifa_local;
1357 addr = ifa->ifa_local;
1363 master_idx = l3mdev_master_ifindex_rcu(dev);
1365 /* For VRFs, the VRF device takes the place of the loopback device,
1366 * with addresses on it being preferred. Note in such cases the
1367 * loopback device will be among the devices that fail the master_idx
1368 * equality check in the loop below.
1371 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1372 (in_dev = __in_dev_get_rcu(dev))) {
1373 addr = in_dev_select_addr(in_dev, scope);
1378 /* Not loopback addresses on loopback should be preferred
1379 in this case. It is important that lo is the first interface
1382 for_each_netdev_rcu(net, dev) {
1383 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1386 in_dev = __in_dev_get_rcu(dev);
1390 addr = in_dev_select_addr(in_dev, scope);
1398 EXPORT_SYMBOL(inet_select_addr);
1400 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1401 __be32 local, int scope)
1403 unsigned char localnet_scope = RT_SCOPE_HOST;
1404 const struct in_ifaddr *ifa;
1408 if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1409 localnet_scope = RT_SCOPE_LINK;
1411 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1412 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1415 (local == ifa->ifa_local || !local) &&
1416 min_scope <= scope) {
1417 addr = ifa->ifa_local;
1422 same = (!local || inet_ifa_match(local, ifa)) &&
1423 (!dst || inet_ifa_match(dst, ifa));
1427 /* Is the selected addr into dst subnet? */
1428 if (inet_ifa_match(addr, ifa))
1430 /* No, then can we use new local src? */
1431 if (min_scope <= scope) {
1432 addr = ifa->ifa_local;
1435 /* search for large dst subnet for addr */
1441 return same ? addr : 0;
1445 * Confirm that local IP address exists using wildcards:
1446 * - net: netns to check, cannot be NULL
1447 * - in_dev: only on this interface, NULL=any interface
1448 * - dst: only in the same subnet as dst, 0=any dst
1449 * - local: address, 0=autoselect the local address
1450 * - scope: maximum allowed scope value for the local address
1452 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1453 __be32 dst, __be32 local, int scope)
1456 struct net_device *dev;
1459 return confirm_addr_indev(in_dev, dst, local, scope);
1462 for_each_netdev_rcu(net, dev) {
1463 in_dev = __in_dev_get_rcu(dev);
1465 addr = confirm_addr_indev(in_dev, dst, local, scope);
1474 EXPORT_SYMBOL(inet_confirm_addr);
1480 int register_inetaddr_notifier(struct notifier_block *nb)
1482 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1484 EXPORT_SYMBOL(register_inetaddr_notifier);
1486 int unregister_inetaddr_notifier(struct notifier_block *nb)
1488 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1490 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1492 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1494 return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1496 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1498 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1500 return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1503 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1505 /* Rename ifa_labels for a device name change. Make some effort to preserve
1506 * existing alias numbering and to create unique labels if possible.
1508 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1510 struct in_ifaddr *ifa;
1513 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1514 char old[IFNAMSIZ], *dot;
1516 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1517 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1520 dot = strchr(old, ':');
1522 sprintf(old, ":%d", named);
1525 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1526 strcat(ifa->ifa_label, dot);
1528 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1530 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1534 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1535 struct in_device *in_dev)
1538 const struct in_ifaddr *ifa;
1540 in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1541 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1542 ifa->ifa_local, dev,
1543 ifa->ifa_local, NULL,
1544 dev->dev_addr, NULL);
1548 /* Called only under RTNL semaphore */
1550 static int inetdev_event(struct notifier_block *this, unsigned long event,
1553 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1554 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1559 if (event == NETDEV_REGISTER) {
1560 in_dev = inetdev_init(dev);
1562 return notifier_from_errno(PTR_ERR(in_dev));
1563 if (dev->flags & IFF_LOOPBACK) {
1564 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1565 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1567 } else if (event == NETDEV_CHANGEMTU) {
1568 /* Re-enabling IP */
1569 if (inetdev_valid_mtu(dev->mtu))
1570 in_dev = inetdev_init(dev);
1576 case NETDEV_REGISTER:
1577 pr_debug("%s: bug\n", __func__);
1578 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1581 if (!inetdev_valid_mtu(dev->mtu))
1583 if (dev->flags & IFF_LOOPBACK) {
1584 struct in_ifaddr *ifa = inet_alloc_ifa();
1587 INIT_HLIST_NODE(&ifa->hash);
1589 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1590 ifa->ifa_prefixlen = 8;
1591 ifa->ifa_mask = inet_make_mask(8);
1592 in_dev_hold(in_dev);
1593 ifa->ifa_dev = in_dev;
1594 ifa->ifa_scope = RT_SCOPE_HOST;
1595 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1596 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1597 INFINITY_LIFE_TIME);
1598 ipv4_devconf_setall(in_dev);
1599 neigh_parms_data_state_setall(in_dev->arp_parms);
1600 inet_insert_ifa(ifa);
1605 case NETDEV_CHANGEADDR:
1606 if (!IN_DEV_ARP_NOTIFY(in_dev))
1609 case NETDEV_NOTIFY_PEERS:
1610 /* Send gratuitous ARP to notify of link change */
1611 inetdev_send_gratuitous_arp(dev, in_dev);
1616 case NETDEV_PRE_TYPE_CHANGE:
1617 ip_mc_unmap(in_dev);
1619 case NETDEV_POST_TYPE_CHANGE:
1620 ip_mc_remap(in_dev);
1622 case NETDEV_CHANGEMTU:
1623 if (inetdev_valid_mtu(dev->mtu))
1625 /* disable IP when MTU is not enough */
1627 case NETDEV_UNREGISTER:
1628 inetdev_destroy(in_dev);
1630 case NETDEV_CHANGENAME:
1631 /* Do not notify about label change, this event is
1632 * not interesting to applications using netlink.
1634 inetdev_changename(dev, in_dev);
1636 devinet_sysctl_unregister(in_dev);
1637 devinet_sysctl_register(in_dev);
1644 static struct notifier_block ip_netdev_notifier = {
1645 .notifier_call = inetdev_event,
1648 static size_t inet_nlmsg_size(void)
1650 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1651 + nla_total_size(4) /* IFA_ADDRESS */
1652 + nla_total_size(4) /* IFA_LOCAL */
1653 + nla_total_size(4) /* IFA_BROADCAST */
1654 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1655 + nla_total_size(4) /* IFA_FLAGS */
1656 + nla_total_size(1) /* IFA_PROTO */
1657 + nla_total_size(4) /* IFA_RT_PRIORITY */
1658 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1661 static inline u32 cstamp_delta(unsigned long cstamp)
1663 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1666 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1667 unsigned long tstamp, u32 preferred, u32 valid)
1669 struct ifa_cacheinfo ci;
1671 ci.cstamp = cstamp_delta(cstamp);
1672 ci.tstamp = cstamp_delta(tstamp);
1673 ci.ifa_prefered = preferred;
1674 ci.ifa_valid = valid;
1676 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1679 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1680 struct inet_fill_args *args)
1682 struct ifaddrmsg *ifm;
1683 struct nlmsghdr *nlh;
1684 unsigned long tstamp;
1685 u32 preferred, valid;
1687 nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1692 ifm = nlmsg_data(nlh);
1693 ifm->ifa_family = AF_INET;
1694 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1695 ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
1696 ifm->ifa_scope = ifa->ifa_scope;
1697 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1699 if (args->netnsid >= 0 &&
1700 nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1701 goto nla_put_failure;
1703 tstamp = READ_ONCE(ifa->ifa_tstamp);
1704 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1705 preferred = READ_ONCE(ifa->ifa_preferred_lft);
1706 valid = READ_ONCE(ifa->ifa_valid_lft);
1707 if (preferred != INFINITY_LIFE_TIME) {
1708 long tval = (jiffies - tstamp) / HZ;
1710 if (preferred > tval)
1714 if (valid != INFINITY_LIFE_TIME) {
1722 preferred = INFINITY_LIFE_TIME;
1723 valid = INFINITY_LIFE_TIME;
1725 if ((ifa->ifa_address &&
1726 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1728 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1729 (ifa->ifa_broadcast &&
1730 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1731 (ifa->ifa_label[0] &&
1732 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1734 nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1735 nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
1736 (ifa->ifa_rt_priority &&
1737 nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1738 put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1740 goto nla_put_failure;
1742 nlmsg_end(skb, nlh);
1746 nlmsg_cancel(skb, nlh);
1750 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1751 struct inet_fill_args *fillargs,
1752 struct net **tgt_net, struct sock *sk,
1753 struct netlink_callback *cb)
1755 struct netlink_ext_ack *extack = cb->extack;
1756 struct nlattr *tb[IFA_MAX+1];
1757 struct ifaddrmsg *ifm;
1760 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1761 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1765 ifm = nlmsg_data(nlh);
1766 if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1767 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1771 fillargs->ifindex = ifm->ifa_index;
1772 if (fillargs->ifindex) {
1773 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1774 fillargs->flags |= NLM_F_DUMP_FILTERED;
1777 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1778 ifa_ipv4_policy, extack);
1782 for (i = 0; i <= IFA_MAX; ++i) {
1786 if (i == IFA_TARGET_NETNSID) {
1789 fillargs->netnsid = nla_get_s32(tb[i]);
1791 net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1793 fillargs->netnsid = -1;
1794 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1795 return PTR_ERR(net);
1799 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1807 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1808 struct netlink_callback *cb, int *s_ip_idx,
1809 struct inet_fill_args *fillargs)
1811 struct in_ifaddr *ifa;
1815 in_dev_for_each_ifa_rcu(ifa, in_dev) {
1816 if (ip_idx < *s_ip_idx) {
1820 err = inet_fill_ifaddr(skb, ifa, fillargs);
1824 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1835 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1837 static u32 inet_base_seq(const struct net *net)
1839 u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1840 READ_ONCE(net->dev_base_seq);
1842 /* Must not return 0 (see nl_dump_check_consistent()).
1843 * Chose a value far away from 0.
1850 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1852 const struct nlmsghdr *nlh = cb->nlh;
1853 struct inet_fill_args fillargs = {
1854 .portid = NETLINK_CB(cb->skb).portid,
1855 .seq = nlh->nlmsg_seq,
1856 .event = RTM_NEWADDR,
1857 .flags = NLM_F_MULTI,
1860 struct net *net = sock_net(skb->sk);
1861 struct net *tgt_net = net;
1863 unsigned long ifindex;
1865 } *ctx = (void *)cb->ctx;
1866 struct in_device *in_dev;
1867 struct net_device *dev;
1871 if (cb->strict_check) {
1872 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1877 if (fillargs.ifindex) {
1879 dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1882 in_dev = __in_dev_get_rcu(dev);
1885 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1891 cb->seq = inet_base_seq(tgt_net);
1893 for_each_netdev_dump(net, dev, ctx->ifindex) {
1894 in_dev = __in_dev_get_rcu(dev);
1897 err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1903 if (fillargs.netnsid >= 0)
1909 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1912 struct inet_fill_args fillargs = {
1914 .seq = nlh ? nlh->nlmsg_seq : 0,
1919 struct sk_buff *skb;
1923 net = dev_net(ifa->ifa_dev->dev);
1924 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1928 err = inet_fill_ifaddr(skb, ifa, &fillargs);
1930 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1931 WARN_ON(err == -EMSGSIZE);
1935 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1939 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1942 static size_t inet_get_link_af_size(const struct net_device *dev,
1943 u32 ext_filter_mask)
1945 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1950 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1953 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1954 u32 ext_filter_mask)
1956 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1963 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1967 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1968 ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1973 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1974 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1977 static int inet_validate_link_af(const struct net_device *dev,
1978 const struct nlattr *nla,
1979 struct netlink_ext_ack *extack)
1981 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1984 if (dev && !__in_dev_get_rtnl(dev))
1985 return -EAFNOSUPPORT;
1987 err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1988 inet_af_policy, extack);
1992 if (tb[IFLA_INET_CONF]) {
1993 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1994 int cfgid = nla_type(a);
1999 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2007 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2008 struct netlink_ext_ack *extack)
2010 struct in_device *in_dev = __in_dev_get_rtnl(dev);
2011 struct nlattr *a, *tb[IFLA_INET_MAX+1];
2015 return -EAFNOSUPPORT;
2017 if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2020 if (tb[IFLA_INET_CONF]) {
2021 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2022 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2028 static int inet_netconf_msgsize_devconf(int type)
2030 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2031 + nla_total_size(4); /* NETCONFA_IFINDEX */
2034 if (type == NETCONFA_ALL)
2037 if (all || type == NETCONFA_FORWARDING)
2038 size += nla_total_size(4);
2039 if (all || type == NETCONFA_RP_FILTER)
2040 size += nla_total_size(4);
2041 if (all || type == NETCONFA_MC_FORWARDING)
2042 size += nla_total_size(4);
2043 if (all || type == NETCONFA_BC_FORWARDING)
2044 size += nla_total_size(4);
2045 if (all || type == NETCONFA_PROXY_NEIGH)
2046 size += nla_total_size(4);
2047 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2048 size += nla_total_size(4);
2053 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2054 const struct ipv4_devconf *devconf,
2055 u32 portid, u32 seq, int event,
2056 unsigned int flags, int type)
2058 struct nlmsghdr *nlh;
2059 struct netconfmsg *ncm;
2062 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2067 if (type == NETCONFA_ALL)
2070 ncm = nlmsg_data(nlh);
2071 ncm->ncm_family = AF_INET;
2073 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2074 goto nla_put_failure;
2079 if ((all || type == NETCONFA_FORWARDING) &&
2080 nla_put_s32(skb, NETCONFA_FORWARDING,
2081 IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2082 goto nla_put_failure;
2083 if ((all || type == NETCONFA_RP_FILTER) &&
2084 nla_put_s32(skb, NETCONFA_RP_FILTER,
2085 IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2086 goto nla_put_failure;
2087 if ((all || type == NETCONFA_MC_FORWARDING) &&
2088 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2089 IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2090 goto nla_put_failure;
2091 if ((all || type == NETCONFA_BC_FORWARDING) &&
2092 nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2093 IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2094 goto nla_put_failure;
2095 if ((all || type == NETCONFA_PROXY_NEIGH) &&
2096 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2097 IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2098 goto nla_put_failure;
2099 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2100 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2101 IPV4_DEVCONF_RO(*devconf,
2102 IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2103 goto nla_put_failure;
2106 nlmsg_end(skb, nlh);
2110 nlmsg_cancel(skb, nlh);
2114 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2115 int ifindex, struct ipv4_devconf *devconf)
2117 struct sk_buff *skb;
2120 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2124 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2127 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2128 WARN_ON(err == -EMSGSIZE);
2132 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2136 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2139 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2140 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
2141 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
2142 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
2143 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
2144 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
2147 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2148 const struct nlmsghdr *nlh,
2150 struct netlink_ext_ack *extack)
2154 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2155 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2159 if (!netlink_strict_get_check(skb))
2160 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2162 devconf_ipv4_policy, extack);
2164 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2166 devconf_ipv4_policy, extack);
2170 for (i = 0; i <= NETCONFA_MAX; i++) {
2175 case NETCONFA_IFINDEX:
2178 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2186 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2187 struct nlmsghdr *nlh,
2188 struct netlink_ext_ack *extack)
2190 struct net *net = sock_net(in_skb->sk);
2191 struct nlattr *tb[NETCONFA_MAX + 1];
2192 const struct ipv4_devconf *devconf;
2193 struct in_device *in_dev = NULL;
2194 struct net_device *dev = NULL;
2195 struct sk_buff *skb;
2199 err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2203 if (!tb[NETCONFA_IFINDEX])
2206 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2208 case NETCONFA_IFINDEX_ALL:
2209 devconf = net->ipv4.devconf_all;
2211 case NETCONFA_IFINDEX_DEFAULT:
2212 devconf = net->ipv4.devconf_dflt;
2216 dev = dev_get_by_index(net, ifindex);
2218 in_dev = in_dev_get(dev);
2221 devconf = &in_dev->cnf;
2226 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2230 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2231 NETLINK_CB(in_skb).portid,
2232 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2235 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2236 WARN_ON(err == -EMSGSIZE);
2240 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2248 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2249 struct netlink_callback *cb)
2251 const struct nlmsghdr *nlh = cb->nlh;
2252 struct net *net = sock_net(skb->sk);
2254 unsigned long ifindex;
2255 unsigned int all_default;
2256 } *ctx = (void *)cb->ctx;
2257 const struct in_device *in_dev;
2258 struct net_device *dev;
2261 if (cb->strict_check) {
2262 struct netlink_ext_ack *extack = cb->extack;
2263 struct netconfmsg *ncm;
2265 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2266 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2270 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2271 NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2277 for_each_netdev_dump(net, dev, ctx->ifindex) {
2278 in_dev = __in_dev_get_rcu(dev);
2281 err = inet_netconf_fill_devconf(skb, dev->ifindex,
2283 NETLINK_CB(cb->skb).portid,
2285 RTM_NEWNETCONF, NLM_F_MULTI,
2290 if (ctx->all_default == 0) {
2291 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2292 net->ipv4.devconf_all,
2293 NETLINK_CB(cb->skb).portid,
2295 RTM_NEWNETCONF, NLM_F_MULTI,
2301 if (ctx->all_default == 1) {
2302 err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2303 net->ipv4.devconf_dflt,
2304 NETLINK_CB(cb->skb).portid,
2306 RTM_NEWNETCONF, NLM_F_MULTI,
2317 #ifdef CONFIG_SYSCTL
2319 static void devinet_copy_dflt_conf(struct net *net, int i)
2321 struct net_device *dev;
2324 for_each_netdev_rcu(net, dev) {
2325 struct in_device *in_dev;
2327 in_dev = __in_dev_get_rcu(dev);
2328 if (in_dev && !test_bit(i, in_dev->cnf.state))
2329 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2334 /* called with RTNL locked */
2335 static void inet_forward_change(struct net *net)
2337 struct net_device *dev;
2338 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2340 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2341 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2342 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2343 NETCONFA_FORWARDING,
2344 NETCONFA_IFINDEX_ALL,
2345 net->ipv4.devconf_all);
2346 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2347 NETCONFA_FORWARDING,
2348 NETCONFA_IFINDEX_DEFAULT,
2349 net->ipv4.devconf_dflt);
2351 for_each_netdev(net, dev) {
2352 struct in_device *in_dev;
2355 dev_disable_lro(dev);
2357 in_dev = __in_dev_get_rtnl(dev);
2359 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2360 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2361 NETCONFA_FORWARDING,
2362 dev->ifindex, &in_dev->cnf);
2367 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2369 if (cnf == net->ipv4.devconf_dflt)
2370 return NETCONFA_IFINDEX_DEFAULT;
2371 else if (cnf == net->ipv4.devconf_all)
2372 return NETCONFA_IFINDEX_ALL;
2374 struct in_device *idev
2375 = container_of(cnf, struct in_device, cnf);
2376 return idev->dev->ifindex;
2380 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2381 void *buffer, size_t *lenp, loff_t *ppos)
2383 int old_value = *(int *)ctl->data;
2384 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2385 int new_value = *(int *)ctl->data;
2388 struct ipv4_devconf *cnf = ctl->extra1;
2389 struct net *net = ctl->extra2;
2390 int i = (int *)ctl->data - cnf->data;
2393 set_bit(i, cnf->state);
2395 if (cnf == net->ipv4.devconf_dflt)
2396 devinet_copy_dflt_conf(net, i);
2397 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2398 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2399 if ((new_value == 0) && (old_value != 0))
2400 rt_cache_flush(net);
2402 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2403 new_value != old_value)
2404 rt_cache_flush(net);
2406 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2407 new_value != old_value) {
2408 ifindex = devinet_conf_ifindex(net, cnf);
2409 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2413 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2414 new_value != old_value) {
2415 ifindex = devinet_conf_ifindex(net, cnf);
2416 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2417 NETCONFA_PROXY_NEIGH,
2420 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2421 new_value != old_value) {
2422 ifindex = devinet_conf_ifindex(net, cnf);
2423 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2424 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2432 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2433 void *buffer, size_t *lenp, loff_t *ppos)
2435 int *valp = ctl->data;
2438 struct net *net = ctl->extra2;
2441 if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2444 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2446 if (write && *valp != val) {
2447 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2448 if (!rtnl_trylock()) {
2449 /* Restore the original values before restarting */
2452 return restart_syscall();
2454 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2455 inet_forward_change(net);
2457 struct ipv4_devconf *cnf = ctl->extra1;
2458 struct in_device *idev =
2459 container_of(cnf, struct in_device, cnf);
2461 dev_disable_lro(idev->dev);
2462 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2463 NETCONFA_FORWARDING,
2468 rt_cache_flush(net);
2470 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2471 NETCONFA_FORWARDING,
2472 NETCONFA_IFINDEX_DEFAULT,
2473 net->ipv4.devconf_dflt);
2479 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2480 void *buffer, size_t *lenp, loff_t *ppos)
2482 int *valp = ctl->data;
2484 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2485 struct net *net = ctl->extra2;
2487 if (write && *valp != val)
2488 rt_cache_flush(net);
2493 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2496 .data = ipv4_devconf.data + \
2497 IPV4_DEVCONF_ ## attr - 1, \
2498 .maxlen = sizeof(int), \
2500 .proc_handler = proc, \
2501 .extra1 = &ipv4_devconf, \
2504 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2505 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2507 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2508 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2510 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2511 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2513 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2514 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2516 static struct devinet_sysctl_table {
2517 struct ctl_table_header *sysctl_header;
2518 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2519 } devinet_sysctl = {
2521 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2522 devinet_sysctl_forward),
2523 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2524 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2526 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2527 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2528 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2529 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2530 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2531 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2532 "accept_source_route"),
2533 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2534 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2535 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2536 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2537 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2538 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2539 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2540 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2541 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2542 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2543 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2544 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2545 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2546 "arp_evict_nocarrier"),
2547 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2548 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2549 "force_igmp_version"),
2550 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2551 "igmpv2_unsolicited_report_interval"),
2552 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2553 "igmpv3_unsolicited_report_interval"),
2554 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2555 "ignore_routes_with_linkdown"),
2556 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2557 "drop_gratuitous_arp"),
2559 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2560 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2561 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2562 "promote_secondaries"),
2563 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2565 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2566 "drop_unicast_in_l2_multicast"),
2570 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2571 int ifindex, struct ipv4_devconf *p)
2574 struct devinet_sysctl_table *t;
2575 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2577 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2581 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2582 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2583 t->devinet_vars[i].extra1 = p;
2584 t->devinet_vars[i].extra2 = net;
2587 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2589 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2590 if (!t->sysctl_header)
2595 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2605 static void __devinet_sysctl_unregister(struct net *net,
2606 struct ipv4_devconf *cnf, int ifindex)
2608 struct devinet_sysctl_table *t = cnf->sysctl;
2612 unregister_net_sysctl_table(t->sysctl_header);
2616 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2619 static int devinet_sysctl_register(struct in_device *idev)
2623 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2626 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2629 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2630 idev->dev->ifindex, &idev->cnf);
2632 neigh_sysctl_unregister(idev->arp_parms);
2636 static void devinet_sysctl_unregister(struct in_device *idev)
2638 struct net *net = dev_net(idev->dev);
2640 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2641 neigh_sysctl_unregister(idev->arp_parms);
2644 static struct ctl_table ctl_forward_entry[] = {
2646 .procname = "ip_forward",
2647 .data = &ipv4_devconf.data[
2648 IPV4_DEVCONF_FORWARDING - 1],
2649 .maxlen = sizeof(int),
2651 .proc_handler = devinet_sysctl_forward,
2652 .extra1 = &ipv4_devconf,
2653 .extra2 = &init_net,
2659 static __net_init int devinet_init_net(struct net *net)
2662 struct ipv4_devconf *all, *dflt;
2663 #ifdef CONFIG_SYSCTL
2664 struct ctl_table *tbl;
2665 struct ctl_table_header *forw_hdr;
2669 all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2673 dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2675 goto err_alloc_dflt;
2677 #ifdef CONFIG_SYSCTL
2678 tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2682 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2683 tbl[0].extra1 = all;
2684 tbl[0].extra2 = net;
2687 if (!net_eq(net, &init_net)) {
2688 switch (net_inherit_devconf()) {
2690 /* copy from the current netns */
2691 memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2692 sizeof(ipv4_devconf));
2694 current->nsproxy->net_ns->ipv4.devconf_dflt,
2695 sizeof(ipv4_devconf_dflt));
2699 /* copy from init_net */
2700 memcpy(all, init_net.ipv4.devconf_all,
2701 sizeof(ipv4_devconf));
2702 memcpy(dflt, init_net.ipv4.devconf_dflt,
2703 sizeof(ipv4_devconf_dflt));
2706 /* use compiled values */
2711 #ifdef CONFIG_SYSCTL
2712 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2716 err = __devinet_sysctl_register(net, "default",
2717 NETCONFA_IFINDEX_DEFAULT, dflt);
2722 forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2723 ARRAY_SIZE(ctl_forward_entry));
2726 net->ipv4.forw_hdr = forw_hdr;
2729 net->ipv4.devconf_all = all;
2730 net->ipv4.devconf_dflt = dflt;
2733 #ifdef CONFIG_SYSCTL
2735 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2737 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2749 static __net_exit void devinet_exit_net(struct net *net)
2751 #ifdef CONFIG_SYSCTL
2752 struct ctl_table *tbl;
2754 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2755 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2756 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2757 NETCONFA_IFINDEX_DEFAULT);
2758 __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2759 NETCONFA_IFINDEX_ALL);
2762 kfree(net->ipv4.devconf_dflt);
2763 kfree(net->ipv4.devconf_all);
2766 static __net_initdata struct pernet_operations devinet_ops = {
2767 .init = devinet_init_net,
2768 .exit = devinet_exit_net,
2771 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2773 .fill_link_af = inet_fill_link_af,
2774 .get_link_af_size = inet_get_link_af_size,
2775 .validate_link_af = inet_validate_link_af,
2776 .set_link_af = inet_set_link_af,
2779 void __init devinet_init(void)
2783 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2784 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2786 register_pernet_subsys(&devinet_ops);
2787 register_netdevice_notifier(&ip_netdev_notifier);
2789 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2791 rtnl_af_register(&inet_af_ops);
2793 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2794 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2795 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2796 RTNL_FLAG_DUMP_UNLOCKED);
2797 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2798 inet_netconf_dump_devconf,
2799 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);