1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 struct net_device *dev);
61 static const struct seq_operations neigh_stat_seq_ops;
65 Neighbour hash table buckets are protected with rwlock tbl->lock.
67 - All the scans/updates to hash buckets MUST be made under this lock.
68 - NOTHING clever should be made under this lock: no callbacks
69 to protocol backends, no attempts to send something to network.
70 It will result in deadlocks, if backend/driver wants to use neighbour
72 - If the entry requires some non-trivial actions, increase
73 its reference count and release table lock.
75 Neighbour entries are protected:
76 - with reference count.
77 - with rwlock neigh->lock
79 Reference count prevents destruction.
81 neigh->lock mainly serializes ll address data and its validity state.
82 However, the same lock is used to protect another entry fields:
86 Again, nothing clever shall be made under neigh->lock,
87 the most complicated procedure, which we allow is dev->hard_header.
88 It is supposed, that dev->hard_header is simplistic and does
89 not make callbacks to neighbour tables.
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 trace_neigh_cleanup_and_release(neigh, 0);
101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
118 static void neigh_mark_dead(struct neighbour *n)
121 if (!list_empty(&n->gc_list)) {
122 list_del_init(&n->gc_list);
123 atomic_dec(&n->tbl->gc_entries);
127 static void neigh_update_gc_list(struct neighbour *n)
129 bool on_gc_list, exempt_from_gc;
131 write_lock_bh(&n->tbl->lock);
132 write_lock(&n->lock);
137 /* remove from the gc list if new state is permanent or if neighbor
138 * is externally learned; otherwise entry should be on the gc list
140 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
141 n->flags & NTF_EXT_LEARNED;
142 on_gc_list = !list_empty(&n->gc_list);
144 if (exempt_from_gc && on_gc_list) {
145 list_del_init(&n->gc_list);
146 atomic_dec(&n->tbl->gc_entries);
147 } else if (!exempt_from_gc && !on_gc_list) {
148 /* add entries to the tail; cleaning removes from the front */
149 list_add_tail(&n->gc_list, &n->tbl->gc_list);
150 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
164 if (!(flags & NEIGH_UPDATE_F_ADMIN))
167 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
168 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
169 if (ndm_flags & NTF_EXT_LEARNED)
170 neigh->flags |= NTF_EXT_LEARNED;
172 neigh->flags &= ~NTF_EXT_LEARNED;
180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
181 struct neigh_table *tbl)
185 write_lock(&n->lock);
186 if (refcount_read(&n->refcnt) == 1) {
187 struct neighbour *neigh;
189 neigh = rcu_dereference_protected(n->next,
190 lockdep_is_held(&tbl->lock));
191 rcu_assign_pointer(*np, neigh);
195 write_unlock(&n->lock);
197 neigh_cleanup_and_release(n);
201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
203 struct neigh_hash_table *nht;
204 void *pkey = ndel->primary_key;
207 struct neighbour __rcu **np;
209 nht = rcu_dereference_protected(tbl->nht,
210 lockdep_is_held(&tbl->lock));
211 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
212 hash_val = hash_val >> (32 - nht->hash_shift);
214 np = &nht->hash_buckets[hash_val];
215 while ((n = rcu_dereference_protected(*np,
216 lockdep_is_held(&tbl->lock)))) {
218 return neigh_del(n, np, tbl);
224 static int neigh_forced_gc(struct neigh_table *tbl)
226 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
227 unsigned long tref = jiffies - 5 * HZ;
228 struct neighbour *n, *tmp;
231 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
233 write_lock_bh(&tbl->lock);
235 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
236 if (refcount_read(&n->refcnt) == 1) {
239 write_lock(&n->lock);
240 if ((n->nud_state == NUD_FAILED) ||
241 (tbl->is_multicast &&
242 tbl->is_multicast(n->primary_key)) ||
243 time_after(tref, n->updated))
245 write_unlock(&n->lock);
247 if (remove && neigh_remove_one(n, tbl))
249 if (shrunk >= max_clean)
254 tbl->last_flush = jiffies;
256 write_unlock_bh(&tbl->lock);
261 static void neigh_add_timer(struct neighbour *n, unsigned long when)
264 if (unlikely(mod_timer(&n->timer, when))) {
265 printk("NEIGH: BUG, double timer add, state is %x\n",
271 static int neigh_del_timer(struct neighbour *n)
273 if ((n->nud_state & NUD_IN_TIMER) &&
274 del_timer(&n->timer)) {
281 static void pneigh_queue_purge(struct sk_buff_head *list)
285 while ((skb = skb_dequeue(list)) != NULL) {
291 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
295 struct neigh_hash_table *nht;
297 nht = rcu_dereference_protected(tbl->nht,
298 lockdep_is_held(&tbl->lock));
300 for (i = 0; i < (1 << nht->hash_shift); i++) {
302 struct neighbour __rcu **np = &nht->hash_buckets[i];
304 while ((n = rcu_dereference_protected(*np,
305 lockdep_is_held(&tbl->lock))) != NULL) {
306 if (dev && n->dev != dev) {
310 if (skip_perm && n->nud_state & NUD_PERMANENT) {
314 rcu_assign_pointer(*np,
315 rcu_dereference_protected(n->next,
316 lockdep_is_held(&tbl->lock)));
317 write_lock(&n->lock);
320 if (refcount_read(&n->refcnt) != 1) {
321 /* The most unpleasant situation.
322 We must destroy neighbour entry,
323 but someone still uses it.
325 The destroy will be delayed until
326 the last user releases us, but
327 we must kill timers etc. and move
330 __skb_queue_purge(&n->arp_queue);
331 n->arp_queue_len_bytes = 0;
332 n->output = neigh_blackhole;
333 if (n->nud_state & NUD_VALID)
334 n->nud_state = NUD_NOARP;
336 n->nud_state = NUD_NONE;
337 neigh_dbg(2, "neigh %p is stray\n", n);
339 write_unlock(&n->lock);
340 neigh_cleanup_and_release(n);
345 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
347 write_lock_bh(&tbl->lock);
348 neigh_flush_dev(tbl, dev, false);
349 write_unlock_bh(&tbl->lock);
351 EXPORT_SYMBOL(neigh_changeaddr);
353 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
356 write_lock_bh(&tbl->lock);
357 neigh_flush_dev(tbl, dev, skip_perm);
358 pneigh_ifdown_and_unlock(tbl, dev);
360 del_timer_sync(&tbl->proxy_timer);
361 pneigh_queue_purge(&tbl->proxy_queue);
365 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
367 __neigh_ifdown(tbl, dev, true);
370 EXPORT_SYMBOL(neigh_carrier_down);
372 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
374 __neigh_ifdown(tbl, dev, false);
377 EXPORT_SYMBOL(neigh_ifdown);
379 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
380 struct net_device *dev,
383 struct neighbour *n = NULL;
384 unsigned long now = jiffies;
390 entries = atomic_inc_return(&tbl->gc_entries) - 1;
391 if (entries >= tbl->gc_thresh3 ||
392 (entries >= tbl->gc_thresh2 &&
393 time_after(now, tbl->last_flush + 5 * HZ))) {
394 if (!neigh_forced_gc(tbl) &&
395 entries >= tbl->gc_thresh3) {
396 net_info_ratelimited("%s: neighbor table overflow!\n",
398 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
404 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
408 __skb_queue_head_init(&n->arp_queue);
409 rwlock_init(&n->lock);
410 seqlock_init(&n->ha_lock);
411 n->updated = n->used = now;
412 n->nud_state = NUD_NONE;
413 n->output = neigh_blackhole;
414 seqlock_init(&n->hh.hh_lock);
415 n->parms = neigh_parms_clone(&tbl->parms);
416 timer_setup(&n->timer, neigh_timer_handler, 0);
418 NEIGH_CACHE_STAT_INC(tbl, allocs);
420 refcount_set(&n->refcnt, 1);
422 INIT_LIST_HEAD(&n->gc_list);
424 atomic_inc(&tbl->entries);
430 atomic_dec(&tbl->gc_entries);
434 static void neigh_get_hash_rnd(u32 *x)
436 *x = get_random_u32() | 1;
439 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
441 size_t size = (1 << shift) * sizeof(struct neighbour *);
442 struct neigh_hash_table *ret;
443 struct neighbour __rcu **buckets;
446 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
449 if (size <= PAGE_SIZE) {
450 buckets = kzalloc(size, GFP_ATOMIC);
452 buckets = (struct neighbour __rcu **)
453 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
455 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
461 ret->hash_buckets = buckets;
462 ret->hash_shift = shift;
463 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
464 neigh_get_hash_rnd(&ret->hash_rnd[i]);
468 static void neigh_hash_free_rcu(struct rcu_head *head)
470 struct neigh_hash_table *nht = container_of(head,
471 struct neigh_hash_table,
473 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
474 struct neighbour __rcu **buckets = nht->hash_buckets;
476 if (size <= PAGE_SIZE) {
479 kmemleak_free(buckets);
480 free_pages((unsigned long)buckets, get_order(size));
485 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
486 unsigned long new_shift)
488 unsigned int i, hash;
489 struct neigh_hash_table *new_nht, *old_nht;
491 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
493 old_nht = rcu_dereference_protected(tbl->nht,
494 lockdep_is_held(&tbl->lock));
495 new_nht = neigh_hash_alloc(new_shift);
499 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
500 struct neighbour *n, *next;
502 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
503 lockdep_is_held(&tbl->lock));
506 hash = tbl->hash(n->primary_key, n->dev,
509 hash >>= (32 - new_nht->hash_shift);
510 next = rcu_dereference_protected(n->next,
511 lockdep_is_held(&tbl->lock));
513 rcu_assign_pointer(n->next,
514 rcu_dereference_protected(
515 new_nht->hash_buckets[hash],
516 lockdep_is_held(&tbl->lock)));
517 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
521 rcu_assign_pointer(tbl->nht, new_nht);
522 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
526 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
527 struct net_device *dev)
531 NEIGH_CACHE_STAT_INC(tbl, lookups);
534 n = __neigh_lookup_noref(tbl, pkey, dev);
536 if (!refcount_inc_not_zero(&n->refcnt))
538 NEIGH_CACHE_STAT_INC(tbl, hits);
541 rcu_read_unlock_bh();
544 EXPORT_SYMBOL(neigh_lookup);
546 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
550 unsigned int key_len = tbl->key_len;
552 struct neigh_hash_table *nht;
554 NEIGH_CACHE_STAT_INC(tbl, lookups);
557 nht = rcu_dereference_bh(tbl->nht);
558 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
560 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
562 n = rcu_dereference_bh(n->next)) {
563 if (!memcmp(n->primary_key, pkey, key_len) &&
564 net_eq(dev_net(n->dev), net)) {
565 if (!refcount_inc_not_zero(&n->refcnt))
567 NEIGH_CACHE_STAT_INC(tbl, hits);
572 rcu_read_unlock_bh();
575 EXPORT_SYMBOL(neigh_lookup_nodev);
577 static struct neighbour *___neigh_create(struct neigh_table *tbl,
579 struct net_device *dev,
580 bool exempt_from_gc, bool want_ref)
582 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
584 unsigned int key_len = tbl->key_len;
586 struct neigh_hash_table *nht;
588 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
591 rc = ERR_PTR(-ENOBUFS);
595 memcpy(n->primary_key, pkey, key_len);
599 /* Protocol specific setup. */
600 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
602 goto out_neigh_release;
605 if (dev->netdev_ops->ndo_neigh_construct) {
606 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
609 goto out_neigh_release;
613 /* Device specific setup. */
614 if (n->parms->neigh_setup &&
615 (error = n->parms->neigh_setup(n)) < 0) {
617 goto out_neigh_release;
620 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
622 write_lock_bh(&tbl->lock);
623 nht = rcu_dereference_protected(tbl->nht,
624 lockdep_is_held(&tbl->lock));
626 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
627 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
629 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
631 if (n->parms->dead) {
632 rc = ERR_PTR(-EINVAL);
636 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
637 lockdep_is_held(&tbl->lock));
639 n1 = rcu_dereference_protected(n1->next,
640 lockdep_is_held(&tbl->lock))) {
641 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
651 list_add_tail(&n->gc_list, &n->tbl->gc_list);
655 rcu_assign_pointer(n->next,
656 rcu_dereference_protected(nht->hash_buckets[hash_val],
657 lockdep_is_held(&tbl->lock)));
658 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
659 write_unlock_bh(&tbl->lock);
660 neigh_dbg(2, "neigh %p is created\n", n);
665 write_unlock_bh(&tbl->lock);
668 atomic_dec(&tbl->gc_entries);
673 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
674 struct net_device *dev, bool want_ref)
676 return ___neigh_create(tbl, pkey, dev, false, want_ref);
678 EXPORT_SYMBOL(__neigh_create);
680 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
682 u32 hash_val = *(u32 *)(pkey + key_len - 4);
683 hash_val ^= (hash_val >> 16);
684 hash_val ^= hash_val >> 8;
685 hash_val ^= hash_val >> 4;
686 hash_val &= PNEIGH_HASHMASK;
690 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
693 unsigned int key_len,
694 struct net_device *dev)
697 if (!memcmp(n->key, pkey, key_len) &&
698 net_eq(pneigh_net(n), net) &&
699 (n->dev == dev || !n->dev))
706 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
707 struct net *net, const void *pkey, struct net_device *dev)
709 unsigned int key_len = tbl->key_len;
710 u32 hash_val = pneigh_hash(pkey, key_len);
712 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
713 net, pkey, key_len, dev);
715 EXPORT_SYMBOL_GPL(__pneigh_lookup);
717 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
718 struct net *net, const void *pkey,
719 struct net_device *dev, int creat)
721 struct pneigh_entry *n;
722 unsigned int key_len = tbl->key_len;
723 u32 hash_val = pneigh_hash(pkey, key_len);
725 read_lock_bh(&tbl->lock);
726 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
727 net, pkey, key_len, dev);
728 read_unlock_bh(&tbl->lock);
735 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
740 write_pnet(&n->net, net);
741 memcpy(n->key, pkey, key_len);
746 if (tbl->pconstructor && tbl->pconstructor(n)) {
754 write_lock_bh(&tbl->lock);
755 n->next = tbl->phash_buckets[hash_val];
756 tbl->phash_buckets[hash_val] = n;
757 write_unlock_bh(&tbl->lock);
761 EXPORT_SYMBOL(pneigh_lookup);
764 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
765 struct net_device *dev)
767 struct pneigh_entry *n, **np;
768 unsigned int key_len = tbl->key_len;
769 u32 hash_val = pneigh_hash(pkey, key_len);
771 write_lock_bh(&tbl->lock);
772 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
774 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
775 net_eq(pneigh_net(n), net)) {
777 write_unlock_bh(&tbl->lock);
778 if (tbl->pdestructor)
786 write_unlock_bh(&tbl->lock);
790 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
791 struct net_device *dev)
793 struct pneigh_entry *n, **np, *freelist = NULL;
796 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
797 np = &tbl->phash_buckets[h];
798 while ((n = *np) != NULL) {
799 if (!dev || n->dev == dev) {
808 write_unlock_bh(&tbl->lock);
809 while ((n = freelist)) {
812 if (tbl->pdestructor)
821 static void neigh_parms_destroy(struct neigh_parms *parms);
823 static inline void neigh_parms_put(struct neigh_parms *parms)
825 if (refcount_dec_and_test(&parms->refcnt))
826 neigh_parms_destroy(parms);
830 * neighbour must already be out of the table;
833 void neigh_destroy(struct neighbour *neigh)
835 struct net_device *dev = neigh->dev;
837 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
840 pr_warn("Destroying alive neighbour %p\n", neigh);
845 if (neigh_del_timer(neigh))
846 pr_warn("Impossible event\n");
848 write_lock_bh(&neigh->lock);
849 __skb_queue_purge(&neigh->arp_queue);
850 write_unlock_bh(&neigh->lock);
851 neigh->arp_queue_len_bytes = 0;
853 if (dev->netdev_ops->ndo_neigh_destroy)
854 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
857 neigh_parms_put(neigh->parms);
859 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
861 atomic_dec(&neigh->tbl->entries);
862 kfree_rcu(neigh, rcu);
864 EXPORT_SYMBOL(neigh_destroy);
866 /* Neighbour state is suspicious;
869 Called with write_locked neigh.
871 static void neigh_suspect(struct neighbour *neigh)
873 neigh_dbg(2, "neigh %p is suspected\n", neigh);
875 neigh->output = neigh->ops->output;
878 /* Neighbour state is OK;
881 Called with write_locked neigh.
883 static void neigh_connect(struct neighbour *neigh)
885 neigh_dbg(2, "neigh %p is connected\n", neigh);
887 neigh->output = neigh->ops->connected_output;
890 static void neigh_periodic_work(struct work_struct *work)
892 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
894 struct neighbour __rcu **np;
896 struct neigh_hash_table *nht;
898 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
900 write_lock_bh(&tbl->lock);
901 nht = rcu_dereference_protected(tbl->nht,
902 lockdep_is_held(&tbl->lock));
905 * periodically recompute ReachableTime from random function
908 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
909 struct neigh_parms *p;
910 tbl->last_rand = jiffies;
911 list_for_each_entry(p, &tbl->parms_list, list)
913 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
916 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
919 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
920 np = &nht->hash_buckets[i];
922 while ((n = rcu_dereference_protected(*np,
923 lockdep_is_held(&tbl->lock))) != NULL) {
926 write_lock(&n->lock);
928 state = n->nud_state;
929 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
930 (n->flags & NTF_EXT_LEARNED)) {
931 write_unlock(&n->lock);
935 if (time_before(n->used, n->confirmed))
936 n->used = n->confirmed;
938 if (refcount_read(&n->refcnt) == 1 &&
939 (state == NUD_FAILED ||
940 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
943 write_unlock(&n->lock);
944 neigh_cleanup_and_release(n);
947 write_unlock(&n->lock);
953 * It's fine to release lock here, even if hash table
954 * grows while we are preempted.
956 write_unlock_bh(&tbl->lock);
958 write_lock_bh(&tbl->lock);
959 nht = rcu_dereference_protected(tbl->nht,
960 lockdep_is_held(&tbl->lock));
963 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
964 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
965 * BASE_REACHABLE_TIME.
967 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
968 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
969 write_unlock_bh(&tbl->lock);
972 static __inline__ int neigh_max_probes(struct neighbour *n)
974 struct neigh_parms *p = n->parms;
975 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
976 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
977 NEIGH_VAR(p, MCAST_PROBES));
980 static void neigh_invalidate(struct neighbour *neigh)
981 __releases(neigh->lock)
982 __acquires(neigh->lock)
986 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
987 neigh_dbg(2, "neigh %p is failed\n", neigh);
988 neigh->updated = jiffies;
990 /* It is very thin place. report_unreachable is very complicated
991 routine. Particularly, it can hit the same neighbour entry!
993 So that, we try to be accurate and avoid dead loop. --ANK
995 while (neigh->nud_state == NUD_FAILED &&
996 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
997 write_unlock(&neigh->lock);
998 neigh->ops->error_report(neigh, skb);
999 write_lock(&neigh->lock);
1001 __skb_queue_purge(&neigh->arp_queue);
1002 neigh->arp_queue_len_bytes = 0;
1005 static void neigh_probe(struct neighbour *neigh)
1006 __releases(neigh->lock)
1008 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1009 /* keep skb alive even if arp_queue overflows */
1011 skb = skb_clone(skb, GFP_ATOMIC);
1012 write_unlock(&neigh->lock);
1013 if (neigh->ops->solicit)
1014 neigh->ops->solicit(neigh, skb);
1015 atomic_inc(&neigh->probes);
1019 /* Called when a timer expires for a neighbour entry. */
1021 static void neigh_timer_handler(struct timer_list *t)
1023 unsigned long now, next;
1024 struct neighbour *neigh = from_timer(neigh, t, timer);
1028 write_lock(&neigh->lock);
1030 state = neigh->nud_state;
1034 if (!(state & NUD_IN_TIMER))
1037 if (state & NUD_REACHABLE) {
1038 if (time_before_eq(now,
1039 neigh->confirmed + neigh->parms->reachable_time)) {
1040 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1041 next = neigh->confirmed + neigh->parms->reachable_time;
1042 } else if (time_before_eq(now,
1044 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1045 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1046 neigh->nud_state = NUD_DELAY;
1047 neigh->updated = jiffies;
1048 neigh_suspect(neigh);
1049 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1051 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1052 neigh->nud_state = NUD_STALE;
1053 neigh->updated = jiffies;
1054 neigh_suspect(neigh);
1057 } else if (state & NUD_DELAY) {
1058 if (time_before_eq(now,
1060 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1061 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1062 neigh->nud_state = NUD_REACHABLE;
1063 neigh->updated = jiffies;
1064 neigh_connect(neigh);
1066 next = neigh->confirmed + neigh->parms->reachable_time;
1068 neigh_dbg(2, "neigh %p is probed\n", neigh);
1069 neigh->nud_state = NUD_PROBE;
1070 neigh->updated = jiffies;
1071 atomic_set(&neigh->probes, 0);
1073 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1077 /* NUD_PROBE|NUD_INCOMPLETE */
1078 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1081 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1082 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1083 neigh->nud_state = NUD_FAILED;
1085 neigh_invalidate(neigh);
1089 if (neigh->nud_state & NUD_IN_TIMER) {
1090 if (time_before(next, jiffies + HZ/100))
1091 next = jiffies + HZ/100;
1092 if (!mod_timer(&neigh->timer, next))
1095 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1099 write_unlock(&neigh->lock);
1103 neigh_update_notify(neigh, 0);
1105 trace_neigh_timer_handler(neigh, 0);
1107 neigh_release(neigh);
1110 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1113 bool immediate_probe = false;
1115 write_lock_bh(&neigh->lock);
1118 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1123 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1124 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1125 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1126 unsigned long next, now = jiffies;
1128 atomic_set(&neigh->probes,
1129 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1130 neigh_del_timer(neigh);
1131 neigh->nud_state = NUD_INCOMPLETE;
1132 neigh->updated = now;
1133 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1135 neigh_add_timer(neigh, next);
1136 immediate_probe = true;
1138 neigh->nud_state = NUD_FAILED;
1139 neigh->updated = jiffies;
1140 write_unlock_bh(&neigh->lock);
1145 } else if (neigh->nud_state & NUD_STALE) {
1146 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1147 neigh_del_timer(neigh);
1148 neigh->nud_state = NUD_DELAY;
1149 neigh->updated = jiffies;
1150 neigh_add_timer(neigh, jiffies +
1151 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1154 if (neigh->nud_state == NUD_INCOMPLETE) {
1156 while (neigh->arp_queue_len_bytes + skb->truesize >
1157 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1158 struct sk_buff *buff;
1160 buff = __skb_dequeue(&neigh->arp_queue);
1163 neigh->arp_queue_len_bytes -= buff->truesize;
1165 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1168 __skb_queue_tail(&neigh->arp_queue, skb);
1169 neigh->arp_queue_len_bytes += skb->truesize;
1174 if (immediate_probe)
1177 write_unlock(&neigh->lock);
1179 trace_neigh_event_send_done(neigh, rc);
1183 if (neigh->nud_state & NUD_STALE)
1185 write_unlock_bh(&neigh->lock);
1187 trace_neigh_event_send_dead(neigh, 1);
1190 EXPORT_SYMBOL(__neigh_event_send);
1192 static void neigh_update_hhs(struct neighbour *neigh)
1194 struct hh_cache *hh;
1195 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1198 if (neigh->dev->header_ops)
1199 update = neigh->dev->header_ops->cache_update;
1203 if (READ_ONCE(hh->hh_len)) {
1204 write_seqlock_bh(&hh->hh_lock);
1205 update(hh, neigh->dev, neigh->ha);
1206 write_sequnlock_bh(&hh->hh_lock);
1213 /* Generic update routine.
1214 -- lladdr is new lladdr or NULL, if it is not supplied.
1215 -- new is new state.
1217 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1219 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1220 lladdr instead of overriding it
1222 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1224 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1226 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1229 Caller MUST hold reference count on the entry.
1232 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1233 u8 new, u32 flags, u32 nlmsg_pid,
1234 struct netlink_ext_ack *extack)
1236 bool ext_learn_change = false;
1240 struct net_device *dev;
1241 int update_isrouter = 0;
1243 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1245 write_lock_bh(&neigh->lock);
1248 old = neigh->nud_state;
1252 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1256 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1257 (old & (NUD_NOARP | NUD_PERMANENT)))
1260 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1262 if (!(new & NUD_VALID)) {
1263 neigh_del_timer(neigh);
1264 if (old & NUD_CONNECTED)
1265 neigh_suspect(neigh);
1266 neigh->nud_state = new;
1268 notify = old & NUD_VALID;
1269 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1270 (new & NUD_FAILED)) {
1271 neigh_invalidate(neigh);
1277 /* Compare new lladdr with cached one */
1278 if (!dev->addr_len) {
1279 /* First case: device needs no address. */
1281 } else if (lladdr) {
1282 /* The second case: if something is already cached
1283 and a new address is proposed:
1285 - if they are different, check override flag
1287 if ((old & NUD_VALID) &&
1288 !memcmp(lladdr, neigh->ha, dev->addr_len))
1291 /* No address is supplied; if we know something,
1292 use it, otherwise discard the request.
1295 if (!(old & NUD_VALID)) {
1296 NL_SET_ERR_MSG(extack, "No link layer address given");
1302 /* Update confirmed timestamp for neighbour entry after we
1303 * received ARP packet even if it doesn't change IP to MAC binding.
1305 if (new & NUD_CONNECTED)
1306 neigh->confirmed = jiffies;
1308 /* If entry was valid and address is not changed,
1309 do not change entry state, if new one is STALE.
1312 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1313 if (old & NUD_VALID) {
1314 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1315 update_isrouter = 0;
1316 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1317 (old & NUD_CONNECTED)) {
1323 if (lladdr == neigh->ha && new == NUD_STALE &&
1324 !(flags & NEIGH_UPDATE_F_ADMIN))
1329 /* Update timestamp only once we know we will make a change to the
1330 * neighbour entry. Otherwise we risk to move the locktime window with
1331 * noop updates and ignore relevant ARP updates.
1333 if (new != old || lladdr != neigh->ha)
1334 neigh->updated = jiffies;
1337 neigh_del_timer(neigh);
1338 if (new & NUD_PROBE)
1339 atomic_set(&neigh->probes, 0);
1340 if (new & NUD_IN_TIMER)
1341 neigh_add_timer(neigh, (jiffies +
1342 ((new & NUD_REACHABLE) ?
1343 neigh->parms->reachable_time :
1345 neigh->nud_state = new;
1349 if (lladdr != neigh->ha) {
1350 write_seqlock(&neigh->ha_lock);
1351 memcpy(&neigh->ha, lladdr, dev->addr_len);
1352 write_sequnlock(&neigh->ha_lock);
1353 neigh_update_hhs(neigh);
1354 if (!(new & NUD_CONNECTED))
1355 neigh->confirmed = jiffies -
1356 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1361 if (new & NUD_CONNECTED)
1362 neigh_connect(neigh);
1364 neigh_suspect(neigh);
1365 if (!(old & NUD_VALID)) {
1366 struct sk_buff *skb;
1368 /* Again: avoid dead loop if something went wrong */
1370 while (neigh->nud_state & NUD_VALID &&
1371 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1372 struct dst_entry *dst = skb_dst(skb);
1373 struct neighbour *n2, *n1 = neigh;
1374 write_unlock_bh(&neigh->lock);
1378 /* Why not just use 'neigh' as-is? The problem is that
1379 * things such as shaper, eql, and sch_teql can end up
1380 * using alternative, different, neigh objects to output
1381 * the packet in the output path. So what we need to do
1382 * here is re-lookup the top-level neigh in the path so
1383 * we can reinject the packet there.
1386 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1387 n2 = dst_neigh_lookup_skb(dst, skb);
1391 n1->output(n1, skb);
1396 write_lock_bh(&neigh->lock);
1398 __skb_queue_purge(&neigh->arp_queue);
1399 neigh->arp_queue_len_bytes = 0;
1402 if (update_isrouter)
1403 neigh_update_is_router(neigh, flags, ¬ify);
1404 write_unlock_bh(&neigh->lock);
1406 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1407 neigh_update_gc_list(neigh);
1410 neigh_update_notify(neigh, nlmsg_pid);
1412 trace_neigh_update_done(neigh, err);
1417 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1418 u32 flags, u32 nlmsg_pid)
1420 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1422 EXPORT_SYMBOL(neigh_update);
1424 /* Update the neigh to listen temporarily for probe responses, even if it is
1425 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1427 void __neigh_set_probe_once(struct neighbour *neigh)
1431 neigh->updated = jiffies;
1432 if (!(neigh->nud_state & NUD_FAILED))
1434 neigh->nud_state = NUD_INCOMPLETE;
1435 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1436 neigh_add_timer(neigh,
1437 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1440 EXPORT_SYMBOL(__neigh_set_probe_once);
1442 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1443 u8 *lladdr, void *saddr,
1444 struct net_device *dev)
1446 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1447 lladdr || !dev->addr_len);
1449 neigh_update(neigh, lladdr, NUD_STALE,
1450 NEIGH_UPDATE_F_OVERRIDE, 0);
1453 EXPORT_SYMBOL(neigh_event_ns);
1455 /* called with read_lock_bh(&n->lock); */
1456 static void neigh_hh_init(struct neighbour *n)
1458 struct net_device *dev = n->dev;
1459 __be16 prot = n->tbl->protocol;
1460 struct hh_cache *hh = &n->hh;
1462 write_lock_bh(&n->lock);
1464 /* Only one thread can come in here and initialize the
1468 dev->header_ops->cache(n, hh, prot);
1470 write_unlock_bh(&n->lock);
1473 /* Slow and careful. */
1475 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1479 if (!neigh_event_send(neigh, skb)) {
1481 struct net_device *dev = neigh->dev;
1484 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1485 neigh_hh_init(neigh);
1488 __skb_pull(skb, skb_network_offset(skb));
1489 seq = read_seqbegin(&neigh->ha_lock);
1490 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1491 neigh->ha, NULL, skb->len);
1492 } while (read_seqretry(&neigh->ha_lock, seq));
1495 rc = dev_queue_xmit(skb);
1506 EXPORT_SYMBOL(neigh_resolve_output);
1508 /* As fast as possible without hh cache */
1510 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1512 struct net_device *dev = neigh->dev;
1517 __skb_pull(skb, skb_network_offset(skb));
1518 seq = read_seqbegin(&neigh->ha_lock);
1519 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1520 neigh->ha, NULL, skb->len);
1521 } while (read_seqretry(&neigh->ha_lock, seq));
1524 err = dev_queue_xmit(skb);
1531 EXPORT_SYMBOL(neigh_connected_output);
1533 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1535 return dev_queue_xmit(skb);
1537 EXPORT_SYMBOL(neigh_direct_output);
1539 static void neigh_proxy_process(struct timer_list *t)
1541 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1542 long sched_next = 0;
1543 unsigned long now = jiffies;
1544 struct sk_buff *skb, *n;
1546 spin_lock(&tbl->proxy_queue.lock);
1548 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1549 long tdif = NEIGH_CB(skb)->sched_next - now;
1552 struct net_device *dev = skb->dev;
1554 __skb_unlink(skb, &tbl->proxy_queue);
1555 if (tbl->proxy_redo && netif_running(dev)) {
1557 tbl->proxy_redo(skb);
1564 } else if (!sched_next || tdif < sched_next)
1567 del_timer(&tbl->proxy_timer);
1569 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1570 spin_unlock(&tbl->proxy_queue.lock);
1573 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1574 struct sk_buff *skb)
1576 unsigned long sched_next = jiffies +
1577 prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
1579 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1584 NEIGH_CB(skb)->sched_next = sched_next;
1585 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1587 spin_lock(&tbl->proxy_queue.lock);
1588 if (del_timer(&tbl->proxy_timer)) {
1589 if (time_before(tbl->proxy_timer.expires, sched_next))
1590 sched_next = tbl->proxy_timer.expires;
1594 __skb_queue_tail(&tbl->proxy_queue, skb);
1595 mod_timer(&tbl->proxy_timer, sched_next);
1596 spin_unlock(&tbl->proxy_queue.lock);
1598 EXPORT_SYMBOL(pneigh_enqueue);
1600 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1601 struct net *net, int ifindex)
1603 struct neigh_parms *p;
1605 list_for_each_entry(p, &tbl->parms_list, list) {
1606 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1607 (!p->dev && !ifindex && net_eq(net, &init_net)))
1614 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1615 struct neigh_table *tbl)
1617 struct neigh_parms *p;
1618 struct net *net = dev_net(dev);
1619 const struct net_device_ops *ops = dev->netdev_ops;
1621 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1624 refcount_set(&p->refcnt, 1);
1626 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1629 write_pnet(&p->net, net);
1630 p->sysctl_table = NULL;
1632 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1638 write_lock_bh(&tbl->lock);
1639 list_add(&p->list, &tbl->parms.list);
1640 write_unlock_bh(&tbl->lock);
1642 neigh_parms_data_state_cleanall(p);
1646 EXPORT_SYMBOL(neigh_parms_alloc);
1648 static void neigh_rcu_free_parms(struct rcu_head *head)
1650 struct neigh_parms *parms =
1651 container_of(head, struct neigh_parms, rcu_head);
1653 neigh_parms_put(parms);
1656 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1658 if (!parms || parms == &tbl->parms)
1660 write_lock_bh(&tbl->lock);
1661 list_del(&parms->list);
1663 write_unlock_bh(&tbl->lock);
1665 dev_put(parms->dev);
1666 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1668 EXPORT_SYMBOL(neigh_parms_release);
1670 static void neigh_parms_destroy(struct neigh_parms *parms)
1675 static struct lock_class_key neigh_table_proxy_queue_class;
1677 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1679 void neigh_table_init(int index, struct neigh_table *tbl)
1681 unsigned long now = jiffies;
1682 unsigned long phsize;
1684 INIT_LIST_HEAD(&tbl->parms_list);
1685 INIT_LIST_HEAD(&tbl->gc_list);
1686 list_add(&tbl->parms.list, &tbl->parms_list);
1687 write_pnet(&tbl->parms.net, &init_net);
1688 refcount_set(&tbl->parms.refcnt, 1);
1689 tbl->parms.reachable_time =
1690 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1692 tbl->stats = alloc_percpu(struct neigh_statistics);
1694 panic("cannot create neighbour cache statistics");
1696 #ifdef CONFIG_PROC_FS
1697 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1698 &neigh_stat_seq_ops, tbl))
1699 panic("cannot create neighbour proc dir entry");
1702 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1704 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1705 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1707 if (!tbl->nht || !tbl->phash_buckets)
1708 panic("cannot allocate neighbour cache hashes");
1710 if (!tbl->entry_size)
1711 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1712 tbl->key_len, NEIGH_PRIV_ALIGN);
1714 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1716 rwlock_init(&tbl->lock);
1717 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1718 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1719 tbl->parms.reachable_time);
1720 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1721 skb_queue_head_init_class(&tbl->proxy_queue,
1722 &neigh_table_proxy_queue_class);
1724 tbl->last_flush = now;
1725 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1727 neigh_tables[index] = tbl;
1729 EXPORT_SYMBOL(neigh_table_init);
1731 int neigh_table_clear(int index, struct neigh_table *tbl)
1733 neigh_tables[index] = NULL;
1734 /* It is not clean... Fix it to unload IPv6 module safely */
1735 cancel_delayed_work_sync(&tbl->gc_work);
1736 del_timer_sync(&tbl->proxy_timer);
1737 pneigh_queue_purge(&tbl->proxy_queue);
1738 neigh_ifdown(tbl, NULL);
1739 if (atomic_read(&tbl->entries))
1740 pr_crit("neighbour leakage\n");
1742 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1743 neigh_hash_free_rcu);
1746 kfree(tbl->phash_buckets);
1747 tbl->phash_buckets = NULL;
1749 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1751 free_percpu(tbl->stats);
1756 EXPORT_SYMBOL(neigh_table_clear);
1758 static struct neigh_table *neigh_find_table(int family)
1760 struct neigh_table *tbl = NULL;
1764 tbl = neigh_tables[NEIGH_ARP_TABLE];
1767 tbl = neigh_tables[NEIGH_ND_TABLE];
1770 tbl = neigh_tables[NEIGH_DN_TABLE];
1777 const struct nla_policy nda_policy[NDA_MAX+1] = {
1778 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1779 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1780 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1781 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1782 [NDA_PROBES] = { .type = NLA_U32 },
1783 [NDA_VLAN] = { .type = NLA_U16 },
1784 [NDA_PORT] = { .type = NLA_U16 },
1785 [NDA_VNI] = { .type = NLA_U32 },
1786 [NDA_IFINDEX] = { .type = NLA_U32 },
1787 [NDA_MASTER] = { .type = NLA_U32 },
1788 [NDA_PROTOCOL] = { .type = NLA_U8 },
1789 [NDA_NH_ID] = { .type = NLA_U32 },
1790 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1793 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1794 struct netlink_ext_ack *extack)
1796 struct net *net = sock_net(skb->sk);
1798 struct nlattr *dst_attr;
1799 struct neigh_table *tbl;
1800 struct neighbour *neigh;
1801 struct net_device *dev = NULL;
1805 if (nlmsg_len(nlh) < sizeof(*ndm))
1808 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1810 NL_SET_ERR_MSG(extack, "Network address not specified");
1814 ndm = nlmsg_data(nlh);
1815 if (ndm->ndm_ifindex) {
1816 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1823 tbl = neigh_find_table(ndm->ndm_family);
1825 return -EAFNOSUPPORT;
1827 if (nla_len(dst_attr) < (int)tbl->key_len) {
1828 NL_SET_ERR_MSG(extack, "Invalid network address");
1832 if (ndm->ndm_flags & NTF_PROXY) {
1833 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1840 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1841 if (neigh == NULL) {
1846 err = __neigh_update(neigh, NULL, NUD_FAILED,
1847 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1848 NETLINK_CB(skb).portid, extack);
1849 write_lock_bh(&tbl->lock);
1850 neigh_release(neigh);
1851 neigh_remove_one(neigh, tbl);
1852 write_unlock_bh(&tbl->lock);
1858 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1859 struct netlink_ext_ack *extack)
1861 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1862 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1863 struct net *net = sock_net(skb->sk);
1865 struct nlattr *tb[NDA_MAX+1];
1866 struct neigh_table *tbl;
1867 struct net_device *dev = NULL;
1868 struct neighbour *neigh;
1874 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1875 nda_policy, extack);
1881 NL_SET_ERR_MSG(extack, "Network address not specified");
1885 ndm = nlmsg_data(nlh);
1886 if (ndm->ndm_ifindex) {
1887 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1893 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1894 NL_SET_ERR_MSG(extack, "Invalid link address");
1899 tbl = neigh_find_table(ndm->ndm_family);
1901 return -EAFNOSUPPORT;
1903 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1904 NL_SET_ERR_MSG(extack, "Invalid network address");
1908 dst = nla_data(tb[NDA_DST]);
1909 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1911 if (tb[NDA_PROTOCOL])
1912 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1914 if (ndm->ndm_flags & NTF_PROXY) {
1915 struct pneigh_entry *pn;
1918 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1920 pn->flags = ndm->ndm_flags;
1922 pn->protocol = protocol;
1929 NL_SET_ERR_MSG(extack, "Device not specified");
1933 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
1938 neigh = neigh_lookup(tbl, dst, dev);
1939 if (neigh == NULL) {
1940 bool exempt_from_gc;
1942 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1947 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1948 ndm->ndm_flags & NTF_EXT_LEARNED;
1949 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1950 if (IS_ERR(neigh)) {
1951 err = PTR_ERR(neigh);
1955 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1957 neigh_release(neigh);
1961 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1962 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1963 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1967 neigh->protocol = protocol;
1969 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1970 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1972 if (ndm->ndm_flags & NTF_ROUTER)
1973 flags |= NEIGH_UPDATE_F_ISROUTER;
1975 if (ndm->ndm_flags & NTF_USE) {
1976 neigh_event_send(neigh, NULL);
1979 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1980 NETLINK_CB(skb).portid, extack);
1982 neigh_release(neigh);
1988 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1990 struct nlattr *nest;
1992 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
1997 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1998 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1999 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2000 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2001 /* approximative value for deprecated QUEUE_LEN (in packets) */
2002 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2003 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2004 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2005 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2006 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2007 NEIGH_VAR(parms, UCAST_PROBES)) ||
2008 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2009 NEIGH_VAR(parms, MCAST_PROBES)) ||
2010 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2011 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2012 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2014 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2015 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2016 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2017 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2018 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2019 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2020 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2021 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2022 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2023 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2024 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2025 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2026 nla_put_msecs(skb, NDTPA_LOCKTIME,
2027 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2028 goto nla_put_failure;
2029 return nla_nest_end(skb, nest);
2032 nla_nest_cancel(skb, nest);
2036 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2037 u32 pid, u32 seq, int type, int flags)
2039 struct nlmsghdr *nlh;
2040 struct ndtmsg *ndtmsg;
2042 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2046 ndtmsg = nlmsg_data(nlh);
2048 read_lock_bh(&tbl->lock);
2049 ndtmsg->ndtm_family = tbl->family;
2050 ndtmsg->ndtm_pad1 = 0;
2051 ndtmsg->ndtm_pad2 = 0;
2053 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2054 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2055 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2056 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2057 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2058 goto nla_put_failure;
2060 unsigned long now = jiffies;
2061 long flush_delta = now - tbl->last_flush;
2062 long rand_delta = now - tbl->last_rand;
2063 struct neigh_hash_table *nht;
2064 struct ndt_config ndc = {
2065 .ndtc_key_len = tbl->key_len,
2066 .ndtc_entry_size = tbl->entry_size,
2067 .ndtc_entries = atomic_read(&tbl->entries),
2068 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2069 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2070 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2074 nht = rcu_dereference_bh(tbl->nht);
2075 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2076 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2077 rcu_read_unlock_bh();
2079 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2080 goto nla_put_failure;
2085 struct ndt_stats ndst;
2087 memset(&ndst, 0, sizeof(ndst));
2089 for_each_possible_cpu(cpu) {
2090 struct neigh_statistics *st;
2092 st = per_cpu_ptr(tbl->stats, cpu);
2093 ndst.ndts_allocs += st->allocs;
2094 ndst.ndts_destroys += st->destroys;
2095 ndst.ndts_hash_grows += st->hash_grows;
2096 ndst.ndts_res_failed += st->res_failed;
2097 ndst.ndts_lookups += st->lookups;
2098 ndst.ndts_hits += st->hits;
2099 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2100 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2101 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2102 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2103 ndst.ndts_table_fulls += st->table_fulls;
2106 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2108 goto nla_put_failure;
2111 BUG_ON(tbl->parms.dev);
2112 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2113 goto nla_put_failure;
2115 read_unlock_bh(&tbl->lock);
2116 nlmsg_end(skb, nlh);
2120 read_unlock_bh(&tbl->lock);
2121 nlmsg_cancel(skb, nlh);
2125 static int neightbl_fill_param_info(struct sk_buff *skb,
2126 struct neigh_table *tbl,
2127 struct neigh_parms *parms,
2128 u32 pid, u32 seq, int type,
2131 struct ndtmsg *ndtmsg;
2132 struct nlmsghdr *nlh;
2134 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2138 ndtmsg = nlmsg_data(nlh);
2140 read_lock_bh(&tbl->lock);
2141 ndtmsg->ndtm_family = tbl->family;
2142 ndtmsg->ndtm_pad1 = 0;
2143 ndtmsg->ndtm_pad2 = 0;
2145 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2146 neightbl_fill_parms(skb, parms) < 0)
2149 read_unlock_bh(&tbl->lock);
2150 nlmsg_end(skb, nlh);
2153 read_unlock_bh(&tbl->lock);
2154 nlmsg_cancel(skb, nlh);
2158 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2159 [NDTA_NAME] = { .type = NLA_STRING },
2160 [NDTA_THRESH1] = { .type = NLA_U32 },
2161 [NDTA_THRESH2] = { .type = NLA_U32 },
2162 [NDTA_THRESH3] = { .type = NLA_U32 },
2163 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2164 [NDTA_PARMS] = { .type = NLA_NESTED },
2167 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2168 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2169 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2170 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2171 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2172 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2173 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2174 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2175 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2176 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2177 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2178 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2179 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2180 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2181 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2184 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2185 struct netlink_ext_ack *extack)
2187 struct net *net = sock_net(skb->sk);
2188 struct neigh_table *tbl;
2189 struct ndtmsg *ndtmsg;
2190 struct nlattr *tb[NDTA_MAX+1];
2194 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2195 nl_neightbl_policy, extack);
2199 if (tb[NDTA_NAME] == NULL) {
2204 ndtmsg = nlmsg_data(nlh);
2206 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2207 tbl = neigh_tables[tidx];
2210 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2212 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2222 * We acquire tbl->lock to be nice to the periodic timers and
2223 * make sure they always see a consistent set of values.
2225 write_lock_bh(&tbl->lock);
2227 if (tb[NDTA_PARMS]) {
2228 struct nlattr *tbp[NDTPA_MAX+1];
2229 struct neigh_parms *p;
2232 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2234 nl_ntbl_parm_policy, extack);
2236 goto errout_tbl_lock;
2238 if (tbp[NDTPA_IFINDEX])
2239 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2241 p = lookup_neigh_parms(tbl, net, ifindex);
2244 goto errout_tbl_lock;
2247 for (i = 1; i <= NDTPA_MAX; i++) {
2252 case NDTPA_QUEUE_LEN:
2253 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2254 nla_get_u32(tbp[i]) *
2255 SKB_TRUESIZE(ETH_FRAME_LEN));
2257 case NDTPA_QUEUE_LENBYTES:
2258 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2259 nla_get_u32(tbp[i]));
2261 case NDTPA_PROXY_QLEN:
2262 NEIGH_VAR_SET(p, PROXY_QLEN,
2263 nla_get_u32(tbp[i]));
2265 case NDTPA_APP_PROBES:
2266 NEIGH_VAR_SET(p, APP_PROBES,
2267 nla_get_u32(tbp[i]));
2269 case NDTPA_UCAST_PROBES:
2270 NEIGH_VAR_SET(p, UCAST_PROBES,
2271 nla_get_u32(tbp[i]));
2273 case NDTPA_MCAST_PROBES:
2274 NEIGH_VAR_SET(p, MCAST_PROBES,
2275 nla_get_u32(tbp[i]));
2277 case NDTPA_MCAST_REPROBES:
2278 NEIGH_VAR_SET(p, MCAST_REPROBES,
2279 nla_get_u32(tbp[i]));
2281 case NDTPA_BASE_REACHABLE_TIME:
2282 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2283 nla_get_msecs(tbp[i]));
2284 /* update reachable_time as well, otherwise, the change will
2285 * only be effective after the next time neigh_periodic_work
2286 * decides to recompute it (can be multiple minutes)
2289 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2291 case NDTPA_GC_STALETIME:
2292 NEIGH_VAR_SET(p, GC_STALETIME,
2293 nla_get_msecs(tbp[i]));
2295 case NDTPA_DELAY_PROBE_TIME:
2296 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2297 nla_get_msecs(tbp[i]));
2298 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2300 case NDTPA_RETRANS_TIME:
2301 NEIGH_VAR_SET(p, RETRANS_TIME,
2302 nla_get_msecs(tbp[i]));
2304 case NDTPA_ANYCAST_DELAY:
2305 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2306 nla_get_msecs(tbp[i]));
2308 case NDTPA_PROXY_DELAY:
2309 NEIGH_VAR_SET(p, PROXY_DELAY,
2310 nla_get_msecs(tbp[i]));
2312 case NDTPA_LOCKTIME:
2313 NEIGH_VAR_SET(p, LOCKTIME,
2314 nla_get_msecs(tbp[i]));
2321 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2322 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2323 !net_eq(net, &init_net))
2324 goto errout_tbl_lock;
2326 if (tb[NDTA_THRESH1])
2327 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2329 if (tb[NDTA_THRESH2])
2330 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2332 if (tb[NDTA_THRESH3])
2333 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2335 if (tb[NDTA_GC_INTERVAL])
2336 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2341 write_unlock_bh(&tbl->lock);
2346 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2347 struct netlink_ext_ack *extack)
2349 struct ndtmsg *ndtm;
2351 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2352 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2356 ndtm = nlmsg_data(nlh);
2357 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2358 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2362 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2363 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2370 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2372 const struct nlmsghdr *nlh = cb->nlh;
2373 struct net *net = sock_net(skb->sk);
2374 int family, tidx, nidx = 0;
2375 int tbl_skip = cb->args[0];
2376 int neigh_skip = cb->args[1];
2377 struct neigh_table *tbl;
2379 if (cb->strict_check) {
2380 int err = neightbl_valid_dump_info(nlh, cb->extack);
2386 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2388 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2389 struct neigh_parms *p;
2391 tbl = neigh_tables[tidx];
2395 if (tidx < tbl_skip || (family && tbl->family != family))
2398 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2399 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2404 p = list_next_entry(&tbl->parms, list);
2405 list_for_each_entry_from(p, &tbl->parms_list, list) {
2406 if (!net_eq(neigh_parms_net(p), net))
2409 if (nidx < neigh_skip)
2412 if (neightbl_fill_param_info(skb, tbl, p,
2413 NETLINK_CB(cb->skb).portid,
2431 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2432 u32 pid, u32 seq, int type, unsigned int flags)
2434 unsigned long now = jiffies;
2435 struct nda_cacheinfo ci;
2436 struct nlmsghdr *nlh;
2439 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2443 ndm = nlmsg_data(nlh);
2444 ndm->ndm_family = neigh->ops->family;
2447 ndm->ndm_flags = neigh->flags;
2448 ndm->ndm_type = neigh->type;
2449 ndm->ndm_ifindex = neigh->dev->ifindex;
2451 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2452 goto nla_put_failure;
2454 read_lock_bh(&neigh->lock);
2455 ndm->ndm_state = neigh->nud_state;
2456 if (neigh->nud_state & NUD_VALID) {
2457 char haddr[MAX_ADDR_LEN];
2459 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2460 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2461 read_unlock_bh(&neigh->lock);
2462 goto nla_put_failure;
2466 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2467 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2468 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2469 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2470 read_unlock_bh(&neigh->lock);
2472 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2473 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2474 goto nla_put_failure;
2476 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2477 goto nla_put_failure;
2479 nlmsg_end(skb, nlh);
2483 nlmsg_cancel(skb, nlh);
2487 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2488 u32 pid, u32 seq, int type, unsigned int flags,
2489 struct neigh_table *tbl)
2491 struct nlmsghdr *nlh;
2494 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2498 ndm = nlmsg_data(nlh);
2499 ndm->ndm_family = tbl->family;
2502 ndm->ndm_flags = pn->flags | NTF_PROXY;
2503 ndm->ndm_type = RTN_UNICAST;
2504 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2505 ndm->ndm_state = NUD_NONE;
2507 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2508 goto nla_put_failure;
2510 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2511 goto nla_put_failure;
2513 nlmsg_end(skb, nlh);
2517 nlmsg_cancel(skb, nlh);
2521 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2523 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2524 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2527 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2529 struct net_device *master;
2534 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2535 if (!master || master->ifindex != master_idx)
2541 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2543 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2549 struct neigh_dump_filter {
2554 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2555 struct netlink_callback *cb,
2556 struct neigh_dump_filter *filter)
2558 struct net *net = sock_net(skb->sk);
2559 struct neighbour *n;
2560 int rc, h, s_h = cb->args[1];
2561 int idx, s_idx = idx = cb->args[2];
2562 struct neigh_hash_table *nht;
2563 unsigned int flags = NLM_F_MULTI;
2565 if (filter->dev_idx || filter->master_idx)
2566 flags |= NLM_F_DUMP_FILTERED;
2569 nht = rcu_dereference_bh(tbl->nht);
2571 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2574 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2576 n = rcu_dereference_bh(n->next)) {
2577 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2579 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2580 neigh_master_filtered(n->dev, filter->master_idx))
2582 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2595 rcu_read_unlock_bh();
2601 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2602 struct netlink_callback *cb,
2603 struct neigh_dump_filter *filter)
2605 struct pneigh_entry *n;
2606 struct net *net = sock_net(skb->sk);
2607 int rc, h, s_h = cb->args[3];
2608 int idx, s_idx = idx = cb->args[4];
2609 unsigned int flags = NLM_F_MULTI;
2611 if (filter->dev_idx || filter->master_idx)
2612 flags |= NLM_F_DUMP_FILTERED;
2614 read_lock_bh(&tbl->lock);
2616 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2619 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2620 if (idx < s_idx || pneigh_net(n) != net)
2622 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2623 neigh_master_filtered(n->dev, filter->master_idx))
2625 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2627 RTM_NEWNEIGH, flags, tbl) < 0) {
2628 read_unlock_bh(&tbl->lock);
2637 read_unlock_bh(&tbl->lock);
2646 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2648 struct neigh_dump_filter *filter,
2649 struct netlink_ext_ack *extack)
2651 struct nlattr *tb[NDA_MAX + 1];
2657 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2658 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2662 ndm = nlmsg_data(nlh);
2663 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2664 ndm->ndm_state || ndm->ndm_type) {
2665 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2669 if (ndm->ndm_flags & ~NTF_PROXY) {
2670 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2674 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2675 tb, NDA_MAX, nda_policy,
2678 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2679 NDA_MAX, nda_policy, extack);
2684 for (i = 0; i <= NDA_MAX; ++i) {
2688 /* all new attributes should require strict_check */
2691 filter->dev_idx = nla_get_u32(tb[i]);
2694 filter->master_idx = nla_get_u32(tb[i]);
2698 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2707 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2709 const struct nlmsghdr *nlh = cb->nlh;
2710 struct neigh_dump_filter filter = {};
2711 struct neigh_table *tbl;
2716 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2718 /* check for full ndmsg structure presence, family member is
2719 * the same for both structures
2721 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2722 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2725 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2726 if (err < 0 && cb->strict_check)
2731 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2732 tbl = neigh_tables[t];
2736 if (t < s_t || (family && tbl->family != family))
2739 memset(&cb->args[1], 0, sizeof(cb->args) -
2740 sizeof(cb->args[0]));
2742 err = pneigh_dump_table(tbl, skb, cb, &filter);
2744 err = neigh_dump_table(tbl, skb, cb, &filter);
2753 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2754 struct neigh_table **tbl,
2755 void **dst, int *dev_idx, u8 *ndm_flags,
2756 struct netlink_ext_ack *extack)
2758 struct nlattr *tb[NDA_MAX + 1];
2762 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2763 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2767 ndm = nlmsg_data(nlh);
2768 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2770 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2774 if (ndm->ndm_flags & ~NTF_PROXY) {
2775 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2779 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2780 NDA_MAX, nda_policy, extack);
2784 *ndm_flags = ndm->ndm_flags;
2785 *dev_idx = ndm->ndm_ifindex;
2786 *tbl = neigh_find_table(ndm->ndm_family);
2788 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2789 return -EAFNOSUPPORT;
2792 for (i = 0; i <= NDA_MAX; ++i) {
2798 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2799 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2802 *dst = nla_data(tb[i]);
2805 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2813 static inline size_t neigh_nlmsg_size(void)
2815 return NLMSG_ALIGN(sizeof(struct ndmsg))
2816 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2817 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2818 + nla_total_size(sizeof(struct nda_cacheinfo))
2819 + nla_total_size(4) /* NDA_PROBES */
2820 + nla_total_size(1); /* NDA_PROTOCOL */
2823 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2826 struct sk_buff *skb;
2829 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2833 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2839 err = rtnl_unicast(skb, net, pid);
2844 static inline size_t pneigh_nlmsg_size(void)
2846 return NLMSG_ALIGN(sizeof(struct ndmsg))
2847 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2848 + nla_total_size(1); /* NDA_PROTOCOL */
2851 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2852 u32 pid, u32 seq, struct neigh_table *tbl)
2854 struct sk_buff *skb;
2857 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2861 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2867 err = rtnl_unicast(skb, net, pid);
2872 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2873 struct netlink_ext_ack *extack)
2875 struct net *net = sock_net(in_skb->sk);
2876 struct net_device *dev = NULL;
2877 struct neigh_table *tbl = NULL;
2878 struct neighbour *neigh;
2884 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2890 dev = __dev_get_by_index(net, dev_idx);
2892 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2898 NL_SET_ERR_MSG(extack, "Network address not specified");
2902 if (ndm_flags & NTF_PROXY) {
2903 struct pneigh_entry *pn;
2905 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2907 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2910 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2911 nlh->nlmsg_seq, tbl);
2915 NL_SET_ERR_MSG(extack, "No device specified");
2919 neigh = neigh_lookup(tbl, dst, dev);
2921 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2925 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2928 neigh_release(neigh);
2933 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2936 struct neigh_hash_table *nht;
2939 nht = rcu_dereference_bh(tbl->nht);
2941 read_lock(&tbl->lock); /* avoid resizes */
2942 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2943 struct neighbour *n;
2945 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2947 n = rcu_dereference_bh(n->next))
2950 read_unlock(&tbl->lock);
2951 rcu_read_unlock_bh();
2953 EXPORT_SYMBOL(neigh_for_each);
2955 /* The tbl->lock must be held as a writer and BH disabled. */
2956 void __neigh_for_each_release(struct neigh_table *tbl,
2957 int (*cb)(struct neighbour *))
2960 struct neigh_hash_table *nht;
2962 nht = rcu_dereference_protected(tbl->nht,
2963 lockdep_is_held(&tbl->lock));
2964 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2965 struct neighbour *n;
2966 struct neighbour __rcu **np;
2968 np = &nht->hash_buckets[chain];
2969 while ((n = rcu_dereference_protected(*np,
2970 lockdep_is_held(&tbl->lock))) != NULL) {
2973 write_lock(&n->lock);
2976 rcu_assign_pointer(*np,
2977 rcu_dereference_protected(n->next,
2978 lockdep_is_held(&tbl->lock)));
2982 write_unlock(&n->lock);
2984 neigh_cleanup_and_release(n);
2988 EXPORT_SYMBOL(__neigh_for_each_release);
2990 int neigh_xmit(int index, struct net_device *dev,
2991 const void *addr, struct sk_buff *skb)
2993 int err = -EAFNOSUPPORT;
2994 if (likely(index < NEIGH_NR_TABLES)) {
2995 struct neigh_table *tbl;
2996 struct neighbour *neigh;
2998 tbl = neigh_tables[index];
3002 if (index == NEIGH_ARP_TABLE) {
3003 u32 key = *((u32 *)addr);
3005 neigh = __ipv4_neigh_lookup_noref(dev, key);
3007 neigh = __neigh_lookup_noref(tbl, addr, dev);
3010 neigh = __neigh_create(tbl, addr, dev, false);
3011 err = PTR_ERR(neigh);
3012 if (IS_ERR(neigh)) {
3013 rcu_read_unlock_bh();
3016 err = neigh->output(neigh, skb);
3017 rcu_read_unlock_bh();
3019 else if (index == NEIGH_LINK_TABLE) {
3020 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3021 addr, NULL, skb->len);
3024 err = dev_queue_xmit(skb);
3032 EXPORT_SYMBOL(neigh_xmit);
3034 #ifdef CONFIG_PROC_FS
3036 static struct neighbour *neigh_get_first(struct seq_file *seq)
3038 struct neigh_seq_state *state = seq->private;
3039 struct net *net = seq_file_net(seq);
3040 struct neigh_hash_table *nht = state->nht;
3041 struct neighbour *n = NULL;
3044 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3045 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3046 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3049 if (!net_eq(dev_net(n->dev), net))
3051 if (state->neigh_sub_iter) {
3055 v = state->neigh_sub_iter(state, n, &fakep);
3059 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3061 if (n->nud_state & ~NUD_NOARP)
3064 n = rcu_dereference_bh(n->next);
3070 state->bucket = bucket;
3075 static struct neighbour *neigh_get_next(struct seq_file *seq,
3076 struct neighbour *n,
3079 struct neigh_seq_state *state = seq->private;
3080 struct net *net = seq_file_net(seq);
3081 struct neigh_hash_table *nht = state->nht;
3083 if (state->neigh_sub_iter) {
3084 void *v = state->neigh_sub_iter(state, n, pos);
3088 n = rcu_dereference_bh(n->next);
3092 if (!net_eq(dev_net(n->dev), net))
3094 if (state->neigh_sub_iter) {
3095 void *v = state->neigh_sub_iter(state, n, pos);
3100 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3103 if (n->nud_state & ~NUD_NOARP)
3106 n = rcu_dereference_bh(n->next);
3112 if (++state->bucket >= (1 << nht->hash_shift))
3115 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3123 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3125 struct neighbour *n = neigh_get_first(seq);
3130 n = neigh_get_next(seq, n, pos);
3135 return *pos ? NULL : n;
3138 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3140 struct neigh_seq_state *state = seq->private;
3141 struct net *net = seq_file_net(seq);
3142 struct neigh_table *tbl = state->tbl;
3143 struct pneigh_entry *pn = NULL;
3144 int bucket = state->bucket;
3146 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3147 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3148 pn = tbl->phash_buckets[bucket];
3149 while (pn && !net_eq(pneigh_net(pn), net))
3154 state->bucket = bucket;
3159 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3160 struct pneigh_entry *pn,
3163 struct neigh_seq_state *state = seq->private;
3164 struct net *net = seq_file_net(seq);
3165 struct neigh_table *tbl = state->tbl;
3169 } while (pn && !net_eq(pneigh_net(pn), net));
3172 if (++state->bucket > PNEIGH_HASHMASK)
3174 pn = tbl->phash_buckets[state->bucket];
3175 while (pn && !net_eq(pneigh_net(pn), net))
3187 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3189 struct pneigh_entry *pn = pneigh_get_first(seq);
3194 pn = pneigh_get_next(seq, pn, pos);
3199 return *pos ? NULL : pn;
3202 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3204 struct neigh_seq_state *state = seq->private;
3206 loff_t idxpos = *pos;
3208 rc = neigh_get_idx(seq, &idxpos);
3209 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3210 rc = pneigh_get_idx(seq, &idxpos);
3215 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3216 __acquires(tbl->lock)
3219 struct neigh_seq_state *state = seq->private;
3223 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3226 state->nht = rcu_dereference_bh(tbl->nht);
3227 read_lock(&tbl->lock);
3229 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3231 EXPORT_SYMBOL(neigh_seq_start);
3233 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3235 struct neigh_seq_state *state;
3238 if (v == SEQ_START_TOKEN) {
3239 rc = neigh_get_first(seq);
3243 state = seq->private;
3244 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3245 rc = neigh_get_next(seq, v, NULL);
3248 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3249 rc = pneigh_get_first(seq);
3251 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3252 rc = pneigh_get_next(seq, v, NULL);
3258 EXPORT_SYMBOL(neigh_seq_next);
3260 void neigh_seq_stop(struct seq_file *seq, void *v)
3261 __releases(tbl->lock)
3264 struct neigh_seq_state *state = seq->private;
3265 struct neigh_table *tbl = state->tbl;
3267 read_unlock(&tbl->lock);
3268 rcu_read_unlock_bh();
3270 EXPORT_SYMBOL(neigh_seq_stop);
3272 /* statistics via seq_file */
3274 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3276 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3280 return SEQ_START_TOKEN;
3282 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3283 if (!cpu_possible(cpu))
3286 return per_cpu_ptr(tbl->stats, cpu);
3291 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3293 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3296 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3297 if (!cpu_possible(cpu))
3300 return per_cpu_ptr(tbl->stats, cpu);
3306 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3311 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3313 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3314 struct neigh_statistics *st = v;
3316 if (v == SEQ_START_TOKEN) {
3317 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3321 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3322 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3323 atomic_read(&tbl->entries),
3334 st->rcv_probes_mcast,
3335 st->rcv_probes_ucast,
3337 st->periodic_gc_runs,
3346 static const struct seq_operations neigh_stat_seq_ops = {
3347 .start = neigh_stat_seq_start,
3348 .next = neigh_stat_seq_next,
3349 .stop = neigh_stat_seq_stop,
3350 .show = neigh_stat_seq_show,
3352 #endif /* CONFIG_PROC_FS */
3354 static void __neigh_notify(struct neighbour *n, int type, int flags,
3357 struct net *net = dev_net(n->dev);
3358 struct sk_buff *skb;
3361 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3365 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3367 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3368 WARN_ON(err == -EMSGSIZE);
3372 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3376 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3379 void neigh_app_ns(struct neighbour *n)
3381 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3383 EXPORT_SYMBOL(neigh_app_ns);
3385 #ifdef CONFIG_SYSCTL
3386 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3388 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3389 void *buffer, size_t *lenp, loff_t *ppos)
3392 struct ctl_table tmp = *ctl;
3394 tmp.extra1 = SYSCTL_ZERO;
3395 tmp.extra2 = &unres_qlen_max;
3398 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3399 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3402 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3406 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3411 return __in_dev_arp_parms_get_rcu(dev);
3413 return __in6_dev_nd_parms_get_rcu(dev);
3418 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3421 struct net_device *dev;
3422 int family = neigh_parms_family(p);
3425 for_each_netdev_rcu(net, dev) {
3426 struct neigh_parms *dst_p =
3427 neigh_get_dev_parms_rcu(dev, family);
3429 if (dst_p && !test_bit(index, dst_p->data_state))
3430 dst_p->data[index] = p->data[index];
3435 static void neigh_proc_update(struct ctl_table *ctl, int write)
3437 struct net_device *dev = ctl->extra1;
3438 struct neigh_parms *p = ctl->extra2;
3439 struct net *net = neigh_parms_net(p);
3440 int index = (int *) ctl->data - p->data;
3445 set_bit(index, p->data_state);
3446 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3447 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3448 if (!dev) /* NULL dev means this is default value */
3449 neigh_copy_dflt_parms(net, p, index);
3452 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3453 void *buffer, size_t *lenp,
3456 struct ctl_table tmp = *ctl;
3459 tmp.extra1 = SYSCTL_ZERO;
3460 tmp.extra2 = SYSCTL_INT_MAX;
3462 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3463 neigh_proc_update(ctl, write);
3467 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3468 size_t *lenp, loff_t *ppos)
3470 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3472 neigh_proc_update(ctl, write);
3475 EXPORT_SYMBOL(neigh_proc_dointvec);
3477 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3478 size_t *lenp, loff_t *ppos)
3480 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3482 neigh_proc_update(ctl, write);
3485 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3487 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3488 void *buffer, size_t *lenp,
3491 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3493 neigh_proc_update(ctl, write);
3497 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3498 void *buffer, size_t *lenp, loff_t *ppos)
3500 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3502 neigh_proc_update(ctl, write);
3505 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3507 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3508 void *buffer, size_t *lenp,
3511 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3513 neigh_proc_update(ctl, write);
3517 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3518 void *buffer, size_t *lenp,
3521 struct neigh_parms *p = ctl->extra2;
3524 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3525 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3526 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3527 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3531 if (write && ret == 0) {
3532 /* update reachable_time as well, otherwise, the change will
3533 * only be effective after the next time neigh_periodic_work
3534 * decides to recompute it
3537 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3542 #define NEIGH_PARMS_DATA_OFFSET(index) \
3543 (&((struct neigh_parms *) 0)->data[index])
3545 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3546 [NEIGH_VAR_ ## attr] = { \
3548 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3549 .maxlen = sizeof(int), \
3551 .proc_handler = proc, \
3554 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3555 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3557 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3558 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3560 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3561 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3563 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3564 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3566 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3567 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3569 static struct neigh_sysctl_table {
3570 struct ctl_table_header *sysctl_header;
3571 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3572 } neigh_sysctl_template __read_mostly = {
3574 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3575 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3576 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3577 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3578 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3579 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3580 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3581 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3582 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3583 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3584 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3585 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3586 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3587 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3588 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3589 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3590 [NEIGH_VAR_GC_INTERVAL] = {
3591 .procname = "gc_interval",
3592 .maxlen = sizeof(int),
3594 .proc_handler = proc_dointvec_jiffies,
3596 [NEIGH_VAR_GC_THRESH1] = {
3597 .procname = "gc_thresh1",
3598 .maxlen = sizeof(int),
3600 .extra1 = SYSCTL_ZERO,
3601 .extra2 = SYSCTL_INT_MAX,
3602 .proc_handler = proc_dointvec_minmax,
3604 [NEIGH_VAR_GC_THRESH2] = {
3605 .procname = "gc_thresh2",
3606 .maxlen = sizeof(int),
3608 .extra1 = SYSCTL_ZERO,
3609 .extra2 = SYSCTL_INT_MAX,
3610 .proc_handler = proc_dointvec_minmax,
3612 [NEIGH_VAR_GC_THRESH3] = {
3613 .procname = "gc_thresh3",
3614 .maxlen = sizeof(int),
3616 .extra1 = SYSCTL_ZERO,
3617 .extra2 = SYSCTL_INT_MAX,
3618 .proc_handler = proc_dointvec_minmax,
3624 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3625 proc_handler *handler)
3628 struct neigh_sysctl_table *t;
3629 const char *dev_name_source;
3630 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3633 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3637 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3638 t->neigh_vars[i].data += (long) p;
3639 t->neigh_vars[i].extra1 = dev;
3640 t->neigh_vars[i].extra2 = p;
3644 dev_name_source = dev->name;
3645 /* Terminate the table early */
3646 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3647 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3649 struct neigh_table *tbl = p->tbl;
3650 dev_name_source = "default";
3651 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3652 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3653 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3654 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3659 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3661 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3662 /* RetransTime (in milliseconds)*/
3663 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3664 /* ReachableTime (in milliseconds) */
3665 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3667 /* Those handlers will update p->reachable_time after
3668 * base_reachable_time(_ms) is set to ensure the new timer starts being
3669 * applied after the next neighbour update instead of waiting for
3670 * neigh_periodic_work to update its value (can be multiple minutes)
3671 * So any handler that replaces them should do this as well
3674 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3675 neigh_proc_base_reachable_time;
3676 /* ReachableTime (in milliseconds) */
3677 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3678 neigh_proc_base_reachable_time;
3681 /* Don't export sysctls to unprivileged users */
3682 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3683 t->neigh_vars[0].procname = NULL;
3685 switch (neigh_parms_family(p)) {
3696 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3697 p_name, dev_name_source);
3699 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3700 if (!t->sysctl_header)
3703 p->sysctl_table = t;
3711 EXPORT_SYMBOL(neigh_sysctl_register);
3713 void neigh_sysctl_unregister(struct neigh_parms *p)
3715 if (p->sysctl_table) {
3716 struct neigh_sysctl_table *t = p->sysctl_table;
3717 p->sysctl_table = NULL;
3718 unregister_net_sysctl_table(t->sysctl_header);
3722 EXPORT_SYMBOL(neigh_sysctl_unregister);
3724 #endif /* CONFIG_SYSCTL */
3726 static int __init neigh_init(void)
3728 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3729 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3730 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3732 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3734 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3739 subsys_initcall(neigh_init);