Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)              \
47 do {                                            \
48         if (level <= NEIGH_DEBUG)               \
49                 pr_debug(fmt, ##__VA_ARGS__);   \
50 } while (0)
51
52 #define PNEIGH_HASHMASK         0xF
53
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56                            u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59                                     struct net_device *dev);
60
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79
80    Reference count prevents destruction.
81
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95         kfree_skb(skb);
96         return -ENETDOWN;
97 }
98
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101         if (neigh->parms->neigh_cleanup)
102                 neigh->parms->neigh_cleanup(neigh);
103
104         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106         neigh_release(neigh);
107 }
108
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117         return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120
121
122 static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
123                       struct neighbour __rcu **np, struct neigh_table *tbl)
124 {
125         bool retval = false;
126
127         write_lock(&n->lock);
128         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
129             !(n->flags & flags)) {
130                 struct neighbour *neigh;
131
132                 neigh = rcu_dereference_protected(n->next,
133                                                   lockdep_is_held(&tbl->lock));
134                 rcu_assign_pointer(*np, neigh);
135                 n->dead = 1;
136                 retval = true;
137         }
138         write_unlock(&n->lock);
139         if (retval)
140                 neigh_cleanup_and_release(n);
141         return retval;
142 }
143
144 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
145 {
146         struct neigh_hash_table *nht;
147         void *pkey = ndel->primary_key;
148         u32 hash_val;
149         struct neighbour *n;
150         struct neighbour __rcu **np;
151
152         nht = rcu_dereference_protected(tbl->nht,
153                                         lockdep_is_held(&tbl->lock));
154         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
155         hash_val = hash_val >> (32 - nht->hash_shift);
156
157         np = &nht->hash_buckets[hash_val];
158         while ((n = rcu_dereference_protected(*np,
159                                               lockdep_is_held(&tbl->lock)))) {
160                 if (n == ndel)
161                         return neigh_del(n, 0, 0, np, tbl);
162                 np = &n->next;
163         }
164         return false;
165 }
166
167 static int neigh_forced_gc(struct neigh_table *tbl)
168 {
169         int shrunk = 0;
170         int i;
171         struct neigh_hash_table *nht;
172
173         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
174
175         write_lock_bh(&tbl->lock);
176         nht = rcu_dereference_protected(tbl->nht,
177                                         lockdep_is_held(&tbl->lock));
178         for (i = 0; i < (1 << nht->hash_shift); i++) {
179                 struct neighbour *n;
180                 struct neighbour __rcu **np;
181
182                 np = &nht->hash_buckets[i];
183                 while ((n = rcu_dereference_protected(*np,
184                                         lockdep_is_held(&tbl->lock))) != NULL) {
185                         /* Neighbour record may be discarded if:
186                          * - nobody refers to it.
187                          * - it is not permanent
188                          */
189                         if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
190                                       tbl)) {
191                                 shrunk = 1;
192                                 continue;
193                         }
194                         np = &n->next;
195                 }
196         }
197
198         tbl->last_flush = jiffies;
199
200         write_unlock_bh(&tbl->lock);
201
202         return shrunk;
203 }
204
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207         neigh_hold(n);
208         if (unlikely(mod_timer(&n->timer, when))) {
209                 printk("NEIGH: BUG, double timer add, state is %x\n",
210                        n->nud_state);
211                 dump_stack();
212         }
213 }
214
215 static int neigh_del_timer(struct neighbour *n)
216 {
217         if ((n->nud_state & NUD_IN_TIMER) &&
218             del_timer(&n->timer)) {
219                 neigh_release(n);
220                 return 1;
221         }
222         return 0;
223 }
224
225 static void pneigh_queue_purge(struct sk_buff_head *list)
226 {
227         struct sk_buff *skb;
228
229         while ((skb = skb_dequeue(list)) != NULL) {
230                 dev_put(skb->dev);
231                 kfree_skb(skb);
232         }
233 }
234
235 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
236 {
237         int i;
238         struct neigh_hash_table *nht;
239
240         nht = rcu_dereference_protected(tbl->nht,
241                                         lockdep_is_held(&tbl->lock));
242
243         for (i = 0; i < (1 << nht->hash_shift); i++) {
244                 struct neighbour *n;
245                 struct neighbour __rcu **np = &nht->hash_buckets[i];
246
247                 while ((n = rcu_dereference_protected(*np,
248                                         lockdep_is_held(&tbl->lock))) != NULL) {
249                         if (dev && n->dev != dev) {
250                                 np = &n->next;
251                                 continue;
252                         }
253                         rcu_assign_pointer(*np,
254                                    rcu_dereference_protected(n->next,
255                                                 lockdep_is_held(&tbl->lock)));
256                         write_lock(&n->lock);
257                         neigh_del_timer(n);
258                         n->dead = 1;
259
260                         if (refcount_read(&n->refcnt) != 1) {
261                                 /* The most unpleasant situation.
262                                    We must destroy neighbour entry,
263                                    but someone still uses it.
264
265                                    The destroy will be delayed until
266                                    the last user releases us, but
267                                    we must kill timers etc. and move
268                                    it to safe state.
269                                  */
270                                 __skb_queue_purge(&n->arp_queue);
271                                 n->arp_queue_len_bytes = 0;
272                                 n->output = neigh_blackhole;
273                                 if (n->nud_state & NUD_VALID)
274                                         n->nud_state = NUD_NOARP;
275                                 else
276                                         n->nud_state = NUD_NONE;
277                                 neigh_dbg(2, "neigh %p is stray\n", n);
278                         }
279                         write_unlock(&n->lock);
280                         neigh_cleanup_and_release(n);
281                 }
282         }
283 }
284
285 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
286 {
287         write_lock_bh(&tbl->lock);
288         neigh_flush_dev(tbl, dev);
289         write_unlock_bh(&tbl->lock);
290 }
291 EXPORT_SYMBOL(neigh_changeaddr);
292
293 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
294 {
295         write_lock_bh(&tbl->lock);
296         neigh_flush_dev(tbl, dev);
297         pneigh_ifdown_and_unlock(tbl, dev);
298
299         del_timer_sync(&tbl->proxy_timer);
300         pneigh_queue_purge(&tbl->proxy_queue);
301         return 0;
302 }
303 EXPORT_SYMBOL(neigh_ifdown);
304
305 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
306 {
307         struct neighbour *n = NULL;
308         unsigned long now = jiffies;
309         int entries;
310
311         entries = atomic_inc_return(&tbl->entries) - 1;
312         if (entries >= tbl->gc_thresh3 ||
313             (entries >= tbl->gc_thresh2 &&
314              time_after(now, tbl->last_flush + 5 * HZ))) {
315                 if (!neigh_forced_gc(tbl) &&
316                     entries >= tbl->gc_thresh3) {
317                         net_info_ratelimited("%s: neighbor table overflow!\n",
318                                              tbl->id);
319                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
320                         goto out_entries;
321                 }
322         }
323
324         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
325         if (!n)
326                 goto out_entries;
327
328         __skb_queue_head_init(&n->arp_queue);
329         rwlock_init(&n->lock);
330         seqlock_init(&n->ha_lock);
331         n->updated        = n->used = now;
332         n->nud_state      = NUD_NONE;
333         n->output         = neigh_blackhole;
334         seqlock_init(&n->hh.hh_lock);
335         n->parms          = neigh_parms_clone(&tbl->parms);
336         timer_setup(&n->timer, neigh_timer_handler, 0);
337
338         NEIGH_CACHE_STAT_INC(tbl, allocs);
339         n->tbl            = tbl;
340         refcount_set(&n->refcnt, 1);
341         n->dead           = 1;
342 out:
343         return n;
344
345 out_entries:
346         atomic_dec(&tbl->entries);
347         goto out;
348 }
349
350 static void neigh_get_hash_rnd(u32 *x)
351 {
352         *x = get_random_u32() | 1;
353 }
354
355 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
356 {
357         size_t size = (1 << shift) * sizeof(struct neighbour *);
358         struct neigh_hash_table *ret;
359         struct neighbour __rcu **buckets;
360         int i;
361
362         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
363         if (!ret)
364                 return NULL;
365         if (size <= PAGE_SIZE)
366                 buckets = kzalloc(size, GFP_ATOMIC);
367         else
368                 buckets = (struct neighbour __rcu **)
369                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
370                                            get_order(size));
371         if (!buckets) {
372                 kfree(ret);
373                 return NULL;
374         }
375         ret->hash_buckets = buckets;
376         ret->hash_shift = shift;
377         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
378                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
379         return ret;
380 }
381
382 static void neigh_hash_free_rcu(struct rcu_head *head)
383 {
384         struct neigh_hash_table *nht = container_of(head,
385                                                     struct neigh_hash_table,
386                                                     rcu);
387         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
388         struct neighbour __rcu **buckets = nht->hash_buckets;
389
390         if (size <= PAGE_SIZE)
391                 kfree(buckets);
392         else
393                 free_pages((unsigned long)buckets, get_order(size));
394         kfree(nht);
395 }
396
397 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
398                                                 unsigned long new_shift)
399 {
400         unsigned int i, hash;
401         struct neigh_hash_table *new_nht, *old_nht;
402
403         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
404
405         old_nht = rcu_dereference_protected(tbl->nht,
406                                             lockdep_is_held(&tbl->lock));
407         new_nht = neigh_hash_alloc(new_shift);
408         if (!new_nht)
409                 return old_nht;
410
411         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
412                 struct neighbour *n, *next;
413
414                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
415                                                    lockdep_is_held(&tbl->lock));
416                      n != NULL;
417                      n = next) {
418                         hash = tbl->hash(n->primary_key, n->dev,
419                                          new_nht->hash_rnd);
420
421                         hash >>= (32 - new_nht->hash_shift);
422                         next = rcu_dereference_protected(n->next,
423                                                 lockdep_is_held(&tbl->lock));
424
425                         rcu_assign_pointer(n->next,
426                                            rcu_dereference_protected(
427                                                 new_nht->hash_buckets[hash],
428                                                 lockdep_is_held(&tbl->lock)));
429                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
430                 }
431         }
432
433         rcu_assign_pointer(tbl->nht, new_nht);
434         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
435         return new_nht;
436 }
437
438 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
439                                struct net_device *dev)
440 {
441         struct neighbour *n;
442
443         NEIGH_CACHE_STAT_INC(tbl, lookups);
444
445         rcu_read_lock_bh();
446         n = __neigh_lookup_noref(tbl, pkey, dev);
447         if (n) {
448                 if (!refcount_inc_not_zero(&n->refcnt))
449                         n = NULL;
450                 NEIGH_CACHE_STAT_INC(tbl, hits);
451         }
452
453         rcu_read_unlock_bh();
454         return n;
455 }
456 EXPORT_SYMBOL(neigh_lookup);
457
458 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
459                                      const void *pkey)
460 {
461         struct neighbour *n;
462         unsigned int key_len = tbl->key_len;
463         u32 hash_val;
464         struct neigh_hash_table *nht;
465
466         NEIGH_CACHE_STAT_INC(tbl, lookups);
467
468         rcu_read_lock_bh();
469         nht = rcu_dereference_bh(tbl->nht);
470         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
471
472         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
473              n != NULL;
474              n = rcu_dereference_bh(n->next)) {
475                 if (!memcmp(n->primary_key, pkey, key_len) &&
476                     net_eq(dev_net(n->dev), net)) {
477                         if (!refcount_inc_not_zero(&n->refcnt))
478                                 n = NULL;
479                         NEIGH_CACHE_STAT_INC(tbl, hits);
480                         break;
481                 }
482         }
483
484         rcu_read_unlock_bh();
485         return n;
486 }
487 EXPORT_SYMBOL(neigh_lookup_nodev);
488
489 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
490                                  struct net_device *dev, bool want_ref)
491 {
492         u32 hash_val;
493         unsigned int key_len = tbl->key_len;
494         int error;
495         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
496         struct neigh_hash_table *nht;
497
498         if (!n) {
499                 rc = ERR_PTR(-ENOBUFS);
500                 goto out;
501         }
502
503         memcpy(n->primary_key, pkey, key_len);
504         n->dev = dev;
505         dev_hold(dev);
506
507         /* Protocol specific setup. */
508         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
509                 rc = ERR_PTR(error);
510                 goto out_neigh_release;
511         }
512
513         if (dev->netdev_ops->ndo_neigh_construct) {
514                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
515                 if (error < 0) {
516                         rc = ERR_PTR(error);
517                         goto out_neigh_release;
518                 }
519         }
520
521         /* Device specific setup. */
522         if (n->parms->neigh_setup &&
523             (error = n->parms->neigh_setup(n)) < 0) {
524                 rc = ERR_PTR(error);
525                 goto out_neigh_release;
526         }
527
528         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
529
530         write_lock_bh(&tbl->lock);
531         nht = rcu_dereference_protected(tbl->nht,
532                                         lockdep_is_held(&tbl->lock));
533
534         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
535                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
536
537         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
538
539         if (n->parms->dead) {
540                 rc = ERR_PTR(-EINVAL);
541                 goto out_tbl_unlock;
542         }
543
544         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
545                                             lockdep_is_held(&tbl->lock));
546              n1 != NULL;
547              n1 = rcu_dereference_protected(n1->next,
548                         lockdep_is_held(&tbl->lock))) {
549                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
550                         if (want_ref)
551                                 neigh_hold(n1);
552                         rc = n1;
553                         goto out_tbl_unlock;
554                 }
555         }
556
557         n->dead = 0;
558         if (want_ref)
559                 neigh_hold(n);
560         rcu_assign_pointer(n->next,
561                            rcu_dereference_protected(nht->hash_buckets[hash_val],
562                                                      lockdep_is_held(&tbl->lock)));
563         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
564         write_unlock_bh(&tbl->lock);
565         neigh_dbg(2, "neigh %p is created\n", n);
566         rc = n;
567 out:
568         return rc;
569 out_tbl_unlock:
570         write_unlock_bh(&tbl->lock);
571 out_neigh_release:
572         neigh_release(n);
573         goto out;
574 }
575 EXPORT_SYMBOL(__neigh_create);
576
577 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
578 {
579         u32 hash_val = *(u32 *)(pkey + key_len - 4);
580         hash_val ^= (hash_val >> 16);
581         hash_val ^= hash_val >> 8;
582         hash_val ^= hash_val >> 4;
583         hash_val &= PNEIGH_HASHMASK;
584         return hash_val;
585 }
586
587 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
588                                               struct net *net,
589                                               const void *pkey,
590                                               unsigned int key_len,
591                                               struct net_device *dev)
592 {
593         while (n) {
594                 if (!memcmp(n->key, pkey, key_len) &&
595                     net_eq(pneigh_net(n), net) &&
596                     (n->dev == dev || !n->dev))
597                         return n;
598                 n = n->next;
599         }
600         return NULL;
601 }
602
603 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
604                 struct net *net, const void *pkey, struct net_device *dev)
605 {
606         unsigned int key_len = tbl->key_len;
607         u32 hash_val = pneigh_hash(pkey, key_len);
608
609         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
610                                  net, pkey, key_len, dev);
611 }
612 EXPORT_SYMBOL_GPL(__pneigh_lookup);
613
614 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
615                                     struct net *net, const void *pkey,
616                                     struct net_device *dev, int creat)
617 {
618         struct pneigh_entry *n;
619         unsigned int key_len = tbl->key_len;
620         u32 hash_val = pneigh_hash(pkey, key_len);
621
622         read_lock_bh(&tbl->lock);
623         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
624                               net, pkey, key_len, dev);
625         read_unlock_bh(&tbl->lock);
626
627         if (n || !creat)
628                 goto out;
629
630         ASSERT_RTNL();
631
632         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
633         if (!n)
634                 goto out;
635
636         write_pnet(&n->net, net);
637         memcpy(n->key, pkey, key_len);
638         n->dev = dev;
639         if (dev)
640                 dev_hold(dev);
641
642         if (tbl->pconstructor && tbl->pconstructor(n)) {
643                 if (dev)
644                         dev_put(dev);
645                 kfree(n);
646                 n = NULL;
647                 goto out;
648         }
649
650         write_lock_bh(&tbl->lock);
651         n->next = tbl->phash_buckets[hash_val];
652         tbl->phash_buckets[hash_val] = n;
653         write_unlock_bh(&tbl->lock);
654 out:
655         return n;
656 }
657 EXPORT_SYMBOL(pneigh_lookup);
658
659
660 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
661                   struct net_device *dev)
662 {
663         struct pneigh_entry *n, **np;
664         unsigned int key_len = tbl->key_len;
665         u32 hash_val = pneigh_hash(pkey, key_len);
666
667         write_lock_bh(&tbl->lock);
668         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
669              np = &n->next) {
670                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
671                     net_eq(pneigh_net(n), net)) {
672                         *np = n->next;
673                         write_unlock_bh(&tbl->lock);
674                         if (tbl->pdestructor)
675                                 tbl->pdestructor(n);
676                         if (n->dev)
677                                 dev_put(n->dev);
678                         kfree(n);
679                         return 0;
680                 }
681         }
682         write_unlock_bh(&tbl->lock);
683         return -ENOENT;
684 }
685
686 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
687                                     struct net_device *dev)
688 {
689         struct pneigh_entry *n, **np, *freelist = NULL;
690         u32 h;
691
692         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
693                 np = &tbl->phash_buckets[h];
694                 while ((n = *np) != NULL) {
695                         if (!dev || n->dev == dev) {
696                                 *np = n->next;
697                                 n->next = freelist;
698                                 freelist = n;
699                                 continue;
700                         }
701                         np = &n->next;
702                 }
703         }
704         write_unlock_bh(&tbl->lock);
705         while ((n = freelist)) {
706                 freelist = n->next;
707                 n->next = NULL;
708                 if (tbl->pdestructor)
709                         tbl->pdestructor(n);
710                 if (n->dev)
711                         dev_put(n->dev);
712                 kfree(n);
713         }
714         return -ENOENT;
715 }
716
717 static void neigh_parms_destroy(struct neigh_parms *parms);
718
719 static inline void neigh_parms_put(struct neigh_parms *parms)
720 {
721         if (refcount_dec_and_test(&parms->refcnt))
722                 neigh_parms_destroy(parms);
723 }
724
725 /*
726  *      neighbour must already be out of the table;
727  *
728  */
729 void neigh_destroy(struct neighbour *neigh)
730 {
731         struct net_device *dev = neigh->dev;
732
733         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
734
735         if (!neigh->dead) {
736                 pr_warn("Destroying alive neighbour %p\n", neigh);
737                 dump_stack();
738                 return;
739         }
740
741         if (neigh_del_timer(neigh))
742                 pr_warn("Impossible event\n");
743
744         write_lock_bh(&neigh->lock);
745         __skb_queue_purge(&neigh->arp_queue);
746         write_unlock_bh(&neigh->lock);
747         neigh->arp_queue_len_bytes = 0;
748
749         if (dev->netdev_ops->ndo_neigh_destroy)
750                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
751
752         dev_put(dev);
753         neigh_parms_put(neigh->parms);
754
755         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
756
757         atomic_dec(&neigh->tbl->entries);
758         kfree_rcu(neigh, rcu);
759 }
760 EXPORT_SYMBOL(neigh_destroy);
761
762 /* Neighbour state is suspicious;
763    disable fast path.
764
765    Called with write_locked neigh.
766  */
767 static void neigh_suspect(struct neighbour *neigh)
768 {
769         neigh_dbg(2, "neigh %p is suspected\n", neigh);
770
771         neigh->output = neigh->ops->output;
772 }
773
774 /* Neighbour state is OK;
775    enable fast path.
776
777    Called with write_locked neigh.
778  */
779 static void neigh_connect(struct neighbour *neigh)
780 {
781         neigh_dbg(2, "neigh %p is connected\n", neigh);
782
783         neigh->output = neigh->ops->connected_output;
784 }
785
786 static void neigh_periodic_work(struct work_struct *work)
787 {
788         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
789         struct neighbour *n;
790         struct neighbour __rcu **np;
791         unsigned int i;
792         struct neigh_hash_table *nht;
793
794         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
795
796         write_lock_bh(&tbl->lock);
797         nht = rcu_dereference_protected(tbl->nht,
798                                         lockdep_is_held(&tbl->lock));
799
800         /*
801          *      periodically recompute ReachableTime from random function
802          */
803
804         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
805                 struct neigh_parms *p;
806                 tbl->last_rand = jiffies;
807                 list_for_each_entry(p, &tbl->parms_list, list)
808                         p->reachable_time =
809                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
810         }
811
812         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
813                 goto out;
814
815         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
816                 np = &nht->hash_buckets[i];
817
818                 while ((n = rcu_dereference_protected(*np,
819                                 lockdep_is_held(&tbl->lock))) != NULL) {
820                         unsigned int state;
821
822                         write_lock(&n->lock);
823
824                         state = n->nud_state;
825                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
826                             (n->flags & NTF_EXT_LEARNED)) {
827                                 write_unlock(&n->lock);
828                                 goto next_elt;
829                         }
830
831                         if (time_before(n->used, n->confirmed))
832                                 n->used = n->confirmed;
833
834                         if (refcount_read(&n->refcnt) == 1 &&
835                             (state == NUD_FAILED ||
836                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
837                                 *np = n->next;
838                                 n->dead = 1;
839                                 write_unlock(&n->lock);
840                                 neigh_cleanup_and_release(n);
841                                 continue;
842                         }
843                         write_unlock(&n->lock);
844
845 next_elt:
846                         np = &n->next;
847                 }
848                 /*
849                  * It's fine to release lock here, even if hash table
850                  * grows while we are preempted.
851                  */
852                 write_unlock_bh(&tbl->lock);
853                 cond_resched();
854                 write_lock_bh(&tbl->lock);
855                 nht = rcu_dereference_protected(tbl->nht,
856                                                 lockdep_is_held(&tbl->lock));
857         }
858 out:
859         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
860          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
861          * BASE_REACHABLE_TIME.
862          */
863         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
864                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
865         write_unlock_bh(&tbl->lock);
866 }
867
868 static __inline__ int neigh_max_probes(struct neighbour *n)
869 {
870         struct neigh_parms *p = n->parms;
871         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
872                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
873                 NEIGH_VAR(p, MCAST_PROBES));
874 }
875
876 static void neigh_invalidate(struct neighbour *neigh)
877         __releases(neigh->lock)
878         __acquires(neigh->lock)
879 {
880         struct sk_buff *skb;
881
882         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
883         neigh_dbg(2, "neigh %p is failed\n", neigh);
884         neigh->updated = jiffies;
885
886         /* It is very thin place. report_unreachable is very complicated
887            routine. Particularly, it can hit the same neighbour entry!
888
889            So that, we try to be accurate and avoid dead loop. --ANK
890          */
891         while (neigh->nud_state == NUD_FAILED &&
892                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
893                 write_unlock(&neigh->lock);
894                 neigh->ops->error_report(neigh, skb);
895                 write_lock(&neigh->lock);
896         }
897         __skb_queue_purge(&neigh->arp_queue);
898         neigh->arp_queue_len_bytes = 0;
899 }
900
901 static void neigh_probe(struct neighbour *neigh)
902         __releases(neigh->lock)
903 {
904         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
905         /* keep skb alive even if arp_queue overflows */
906         if (skb)
907                 skb = skb_clone(skb, GFP_ATOMIC);
908         write_unlock(&neigh->lock);
909         if (neigh->ops->solicit)
910                 neigh->ops->solicit(neigh, skb);
911         atomic_inc(&neigh->probes);
912         kfree_skb(skb);
913 }
914
915 /* Called when a timer expires for a neighbour entry. */
916
917 static void neigh_timer_handler(struct timer_list *t)
918 {
919         unsigned long now, next;
920         struct neighbour *neigh = from_timer(neigh, t, timer);
921         unsigned int state;
922         int notify = 0;
923
924         write_lock(&neigh->lock);
925
926         state = neigh->nud_state;
927         now = jiffies;
928         next = now + HZ;
929
930         if (!(state & NUD_IN_TIMER))
931                 goto out;
932
933         if (state & NUD_REACHABLE) {
934                 if (time_before_eq(now,
935                                    neigh->confirmed + neigh->parms->reachable_time)) {
936                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
937                         next = neigh->confirmed + neigh->parms->reachable_time;
938                 } else if (time_before_eq(now,
939                                           neigh->used +
940                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
941                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
942                         neigh->nud_state = NUD_DELAY;
943                         neigh->updated = jiffies;
944                         neigh_suspect(neigh);
945                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
946                 } else {
947                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
948                         neigh->nud_state = NUD_STALE;
949                         neigh->updated = jiffies;
950                         neigh_suspect(neigh);
951                         notify = 1;
952                 }
953         } else if (state & NUD_DELAY) {
954                 if (time_before_eq(now,
955                                    neigh->confirmed +
956                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
957                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
958                         neigh->nud_state = NUD_REACHABLE;
959                         neigh->updated = jiffies;
960                         neigh_connect(neigh);
961                         notify = 1;
962                         next = neigh->confirmed + neigh->parms->reachable_time;
963                 } else {
964                         neigh_dbg(2, "neigh %p is probed\n", neigh);
965                         neigh->nud_state = NUD_PROBE;
966                         neigh->updated = jiffies;
967                         atomic_set(&neigh->probes, 0);
968                         notify = 1;
969                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
970                 }
971         } else {
972                 /* NUD_PROBE|NUD_INCOMPLETE */
973                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
974         }
975
976         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
977             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
978                 neigh->nud_state = NUD_FAILED;
979                 notify = 1;
980                 neigh_invalidate(neigh);
981                 goto out;
982         }
983
984         if (neigh->nud_state & NUD_IN_TIMER) {
985                 if (time_before(next, jiffies + HZ/2))
986                         next = jiffies + HZ/2;
987                 if (!mod_timer(&neigh->timer, next))
988                         neigh_hold(neigh);
989         }
990         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
991                 neigh_probe(neigh);
992         } else {
993 out:
994                 write_unlock(&neigh->lock);
995         }
996
997         if (notify)
998                 neigh_update_notify(neigh, 0);
999
1000         neigh_release(neigh);
1001 }
1002
1003 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1004 {
1005         int rc;
1006         bool immediate_probe = false;
1007
1008         write_lock_bh(&neigh->lock);
1009
1010         rc = 0;
1011         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1012                 goto out_unlock_bh;
1013         if (neigh->dead)
1014                 goto out_dead;
1015
1016         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1017                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1018                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1019                         unsigned long next, now = jiffies;
1020
1021                         atomic_set(&neigh->probes,
1022                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1023                         neigh->nud_state     = NUD_INCOMPLETE;
1024                         neigh->updated = now;
1025                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1026                                          HZ/2);
1027                         neigh_add_timer(neigh, next);
1028                         immediate_probe = true;
1029                 } else {
1030                         neigh->nud_state = NUD_FAILED;
1031                         neigh->updated = jiffies;
1032                         write_unlock_bh(&neigh->lock);
1033
1034                         kfree_skb(skb);
1035                         return 1;
1036                 }
1037         } else if (neigh->nud_state & NUD_STALE) {
1038                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1039                 neigh->nud_state = NUD_DELAY;
1040                 neigh->updated = jiffies;
1041                 neigh_add_timer(neigh, jiffies +
1042                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1043         }
1044
1045         if (neigh->nud_state == NUD_INCOMPLETE) {
1046                 if (skb) {
1047                         while (neigh->arp_queue_len_bytes + skb->truesize >
1048                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1049                                 struct sk_buff *buff;
1050
1051                                 buff = __skb_dequeue(&neigh->arp_queue);
1052                                 if (!buff)
1053                                         break;
1054                                 neigh->arp_queue_len_bytes -= buff->truesize;
1055                                 kfree_skb(buff);
1056                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1057                         }
1058                         skb_dst_force(skb);
1059                         __skb_queue_tail(&neigh->arp_queue, skb);
1060                         neigh->arp_queue_len_bytes += skb->truesize;
1061                 }
1062                 rc = 1;
1063         }
1064 out_unlock_bh:
1065         if (immediate_probe)
1066                 neigh_probe(neigh);
1067         else
1068                 write_unlock(&neigh->lock);
1069         local_bh_enable();
1070         return rc;
1071
1072 out_dead:
1073         if (neigh->nud_state & NUD_STALE)
1074                 goto out_unlock_bh;
1075         write_unlock_bh(&neigh->lock);
1076         kfree_skb(skb);
1077         return 1;
1078 }
1079 EXPORT_SYMBOL(__neigh_event_send);
1080
1081 static void neigh_update_hhs(struct neighbour *neigh)
1082 {
1083         struct hh_cache *hh;
1084         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1085                 = NULL;
1086
1087         if (neigh->dev->header_ops)
1088                 update = neigh->dev->header_ops->cache_update;
1089
1090         if (update) {
1091                 hh = &neigh->hh;
1092                 if (hh->hh_len) {
1093                         write_seqlock_bh(&hh->hh_lock);
1094                         update(hh, neigh->dev, neigh->ha);
1095                         write_sequnlock_bh(&hh->hh_lock);
1096                 }
1097         }
1098 }
1099
1100
1101
1102 /* Generic update routine.
1103    -- lladdr is new lladdr or NULL, if it is not supplied.
1104    -- new    is new state.
1105    -- flags
1106         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1107                                 if it is different.
1108         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1109                                 lladdr instead of overriding it
1110                                 if it is different.
1111         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1112
1113         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1114                                 NTF_ROUTER flag.
1115         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1116                                 a router.
1117
1118    Caller MUST hold reference count on the entry.
1119  */
1120
1121 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1122                  u32 flags, u32 nlmsg_pid)
1123 {
1124         u8 old;
1125         int err;
1126         int notify = 0;
1127         struct net_device *dev;
1128         int update_isrouter = 0;
1129
1130         write_lock_bh(&neigh->lock);
1131
1132         dev    = neigh->dev;
1133         old    = neigh->nud_state;
1134         err    = -EPERM;
1135
1136         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1137             (old & (NUD_NOARP | NUD_PERMANENT)))
1138                 goto out;
1139         if (neigh->dead)
1140                 goto out;
1141
1142         neigh_update_ext_learned(neigh, flags, &notify);
1143
1144         if (!(new & NUD_VALID)) {
1145                 neigh_del_timer(neigh);
1146                 if (old & NUD_CONNECTED)
1147                         neigh_suspect(neigh);
1148                 neigh->nud_state = new;
1149                 err = 0;
1150                 notify = old & NUD_VALID;
1151                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1152                     (new & NUD_FAILED)) {
1153                         neigh_invalidate(neigh);
1154                         notify = 1;
1155                 }
1156                 goto out;
1157         }
1158
1159         /* Compare new lladdr with cached one */
1160         if (!dev->addr_len) {
1161                 /* First case: device needs no address. */
1162                 lladdr = neigh->ha;
1163         } else if (lladdr) {
1164                 /* The second case: if something is already cached
1165                    and a new address is proposed:
1166                    - compare new & old
1167                    - if they are different, check override flag
1168                  */
1169                 if ((old & NUD_VALID) &&
1170                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1171                         lladdr = neigh->ha;
1172         } else {
1173                 /* No address is supplied; if we know something,
1174                    use it, otherwise discard the request.
1175                  */
1176                 err = -EINVAL;
1177                 if (!(old & NUD_VALID))
1178                         goto out;
1179                 lladdr = neigh->ha;
1180         }
1181
1182         /* Update confirmed timestamp for neighbour entry after we
1183          * received ARP packet even if it doesn't change IP to MAC binding.
1184          */
1185         if (new & NUD_CONNECTED)
1186                 neigh->confirmed = jiffies;
1187
1188         /* If entry was valid and address is not changed,
1189            do not change entry state, if new one is STALE.
1190          */
1191         err = 0;
1192         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1193         if (old & NUD_VALID) {
1194                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1195                         update_isrouter = 0;
1196                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1197                             (old & NUD_CONNECTED)) {
1198                                 lladdr = neigh->ha;
1199                                 new = NUD_STALE;
1200                         } else
1201                                 goto out;
1202                 } else {
1203                         if (lladdr == neigh->ha && new == NUD_STALE &&
1204                             !(flags & NEIGH_UPDATE_F_ADMIN))
1205                                 new = old;
1206                 }
1207         }
1208
1209         /* Update timestamp only once we know we will make a change to the
1210          * neighbour entry. Otherwise we risk to move the locktime window with
1211          * noop updates and ignore relevant ARP updates.
1212          */
1213         if (new != old || lladdr != neigh->ha)
1214                 neigh->updated = jiffies;
1215
1216         if (new != old) {
1217                 neigh_del_timer(neigh);
1218                 if (new & NUD_PROBE)
1219                         atomic_set(&neigh->probes, 0);
1220                 if (new & NUD_IN_TIMER)
1221                         neigh_add_timer(neigh, (jiffies +
1222                                                 ((new & NUD_REACHABLE) ?
1223                                                  neigh->parms->reachable_time :
1224                                                  0)));
1225                 neigh->nud_state = new;
1226                 notify = 1;
1227         }
1228
1229         if (lladdr != neigh->ha) {
1230                 write_seqlock(&neigh->ha_lock);
1231                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1232                 write_sequnlock(&neigh->ha_lock);
1233                 neigh_update_hhs(neigh);
1234                 if (!(new & NUD_CONNECTED))
1235                         neigh->confirmed = jiffies -
1236                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1237                 notify = 1;
1238         }
1239         if (new == old)
1240                 goto out;
1241         if (new & NUD_CONNECTED)
1242                 neigh_connect(neigh);
1243         else
1244                 neigh_suspect(neigh);
1245         if (!(old & NUD_VALID)) {
1246                 struct sk_buff *skb;
1247
1248                 /* Again: avoid dead loop if something went wrong */
1249
1250                 while (neigh->nud_state & NUD_VALID &&
1251                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1252                         struct dst_entry *dst = skb_dst(skb);
1253                         struct neighbour *n2, *n1 = neigh;
1254                         write_unlock_bh(&neigh->lock);
1255
1256                         rcu_read_lock();
1257
1258                         /* Why not just use 'neigh' as-is?  The problem is that
1259                          * things such as shaper, eql, and sch_teql can end up
1260                          * using alternative, different, neigh objects to output
1261                          * the packet in the output path.  So what we need to do
1262                          * here is re-lookup the top-level neigh in the path so
1263                          * we can reinject the packet there.
1264                          */
1265                         n2 = NULL;
1266                         if (dst) {
1267                                 n2 = dst_neigh_lookup_skb(dst, skb);
1268                                 if (n2)
1269                                         n1 = n2;
1270                         }
1271                         n1->output(n1, skb);
1272                         if (n2)
1273                                 neigh_release(n2);
1274                         rcu_read_unlock();
1275
1276                         write_lock_bh(&neigh->lock);
1277                 }
1278                 __skb_queue_purge(&neigh->arp_queue);
1279                 neigh->arp_queue_len_bytes = 0;
1280         }
1281 out:
1282         if (update_isrouter) {
1283                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1284                         (neigh->flags | NTF_ROUTER) :
1285                         (neigh->flags & ~NTF_ROUTER);
1286         }
1287         write_unlock_bh(&neigh->lock);
1288
1289         if (notify)
1290                 neigh_update_notify(neigh, nlmsg_pid);
1291
1292         return err;
1293 }
1294 EXPORT_SYMBOL(neigh_update);
1295
1296 /* Update the neigh to listen temporarily for probe responses, even if it is
1297  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1298  */
1299 void __neigh_set_probe_once(struct neighbour *neigh)
1300 {
1301         if (neigh->dead)
1302                 return;
1303         neigh->updated = jiffies;
1304         if (!(neigh->nud_state & NUD_FAILED))
1305                 return;
1306         neigh->nud_state = NUD_INCOMPLETE;
1307         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1308         neigh_add_timer(neigh,
1309                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1310 }
1311 EXPORT_SYMBOL(__neigh_set_probe_once);
1312
1313 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1314                                  u8 *lladdr, void *saddr,
1315                                  struct net_device *dev)
1316 {
1317         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1318                                                  lladdr || !dev->addr_len);
1319         if (neigh)
1320                 neigh_update(neigh, lladdr, NUD_STALE,
1321                              NEIGH_UPDATE_F_OVERRIDE, 0);
1322         return neigh;
1323 }
1324 EXPORT_SYMBOL(neigh_event_ns);
1325
1326 /* called with read_lock_bh(&n->lock); */
1327 static void neigh_hh_init(struct neighbour *n)
1328 {
1329         struct net_device *dev = n->dev;
1330         __be16 prot = n->tbl->protocol;
1331         struct hh_cache *hh = &n->hh;
1332
1333         write_lock_bh(&n->lock);
1334
1335         /* Only one thread can come in here and initialize the
1336          * hh_cache entry.
1337          */
1338         if (!hh->hh_len)
1339                 dev->header_ops->cache(n, hh, prot);
1340
1341         write_unlock_bh(&n->lock);
1342 }
1343
1344 /* Slow and careful. */
1345
1346 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1347 {
1348         int rc = 0;
1349
1350         if (!neigh_event_send(neigh, skb)) {
1351                 int err;
1352                 struct net_device *dev = neigh->dev;
1353                 unsigned int seq;
1354
1355                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1356                         neigh_hh_init(neigh);
1357
1358                 do {
1359                         __skb_pull(skb, skb_network_offset(skb));
1360                         seq = read_seqbegin(&neigh->ha_lock);
1361                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1362                                               neigh->ha, NULL, skb->len);
1363                 } while (read_seqretry(&neigh->ha_lock, seq));
1364
1365                 if (err >= 0)
1366                         rc = dev_queue_xmit(skb);
1367                 else
1368                         goto out_kfree_skb;
1369         }
1370 out:
1371         return rc;
1372 out_kfree_skb:
1373         rc = -EINVAL;
1374         kfree_skb(skb);
1375         goto out;
1376 }
1377 EXPORT_SYMBOL(neigh_resolve_output);
1378
1379 /* As fast as possible without hh cache */
1380
1381 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1382 {
1383         struct net_device *dev = neigh->dev;
1384         unsigned int seq;
1385         int err;
1386
1387         do {
1388                 __skb_pull(skb, skb_network_offset(skb));
1389                 seq = read_seqbegin(&neigh->ha_lock);
1390                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1391                                       neigh->ha, NULL, skb->len);
1392         } while (read_seqretry(&neigh->ha_lock, seq));
1393
1394         if (err >= 0)
1395                 err = dev_queue_xmit(skb);
1396         else {
1397                 err = -EINVAL;
1398                 kfree_skb(skb);
1399         }
1400         return err;
1401 }
1402 EXPORT_SYMBOL(neigh_connected_output);
1403
1404 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1405 {
1406         return dev_queue_xmit(skb);
1407 }
1408 EXPORT_SYMBOL(neigh_direct_output);
1409
1410 static void neigh_proxy_process(struct timer_list *t)
1411 {
1412         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1413         long sched_next = 0;
1414         unsigned long now = jiffies;
1415         struct sk_buff *skb, *n;
1416
1417         spin_lock(&tbl->proxy_queue.lock);
1418
1419         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1420                 long tdif = NEIGH_CB(skb)->sched_next - now;
1421
1422                 if (tdif <= 0) {
1423                         struct net_device *dev = skb->dev;
1424
1425                         __skb_unlink(skb, &tbl->proxy_queue);
1426                         if (tbl->proxy_redo && netif_running(dev)) {
1427                                 rcu_read_lock();
1428                                 tbl->proxy_redo(skb);
1429                                 rcu_read_unlock();
1430                         } else {
1431                                 kfree_skb(skb);
1432                         }
1433
1434                         dev_put(dev);
1435                 } else if (!sched_next || tdif < sched_next)
1436                         sched_next = tdif;
1437         }
1438         del_timer(&tbl->proxy_timer);
1439         if (sched_next)
1440                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1441         spin_unlock(&tbl->proxy_queue.lock);
1442 }
1443
1444 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1445                     struct sk_buff *skb)
1446 {
1447         unsigned long now = jiffies;
1448
1449         unsigned long sched_next = now + (prandom_u32() %
1450                                           NEIGH_VAR(p, PROXY_DELAY));
1451
1452         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1453                 kfree_skb(skb);
1454                 return;
1455         }
1456
1457         NEIGH_CB(skb)->sched_next = sched_next;
1458         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1459
1460         spin_lock(&tbl->proxy_queue.lock);
1461         if (del_timer(&tbl->proxy_timer)) {
1462                 if (time_before(tbl->proxy_timer.expires, sched_next))
1463                         sched_next = tbl->proxy_timer.expires;
1464         }
1465         skb_dst_drop(skb);
1466         dev_hold(skb->dev);
1467         __skb_queue_tail(&tbl->proxy_queue, skb);
1468         mod_timer(&tbl->proxy_timer, sched_next);
1469         spin_unlock(&tbl->proxy_queue.lock);
1470 }
1471 EXPORT_SYMBOL(pneigh_enqueue);
1472
1473 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1474                                                       struct net *net, int ifindex)
1475 {
1476         struct neigh_parms *p;
1477
1478         list_for_each_entry(p, &tbl->parms_list, list) {
1479                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1480                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1481                         return p;
1482         }
1483
1484         return NULL;
1485 }
1486
1487 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1488                                       struct neigh_table *tbl)
1489 {
1490         struct neigh_parms *p;
1491         struct net *net = dev_net(dev);
1492         const struct net_device_ops *ops = dev->netdev_ops;
1493
1494         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1495         if (p) {
1496                 p->tbl            = tbl;
1497                 refcount_set(&p->refcnt, 1);
1498                 p->reachable_time =
1499                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1500                 dev_hold(dev);
1501                 p->dev = dev;
1502                 write_pnet(&p->net, net);
1503                 p->sysctl_table = NULL;
1504
1505                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1506                         dev_put(dev);
1507                         kfree(p);
1508                         return NULL;
1509                 }
1510
1511                 write_lock_bh(&tbl->lock);
1512                 list_add(&p->list, &tbl->parms.list);
1513                 write_unlock_bh(&tbl->lock);
1514
1515                 neigh_parms_data_state_cleanall(p);
1516         }
1517         return p;
1518 }
1519 EXPORT_SYMBOL(neigh_parms_alloc);
1520
1521 static void neigh_rcu_free_parms(struct rcu_head *head)
1522 {
1523         struct neigh_parms *parms =
1524                 container_of(head, struct neigh_parms, rcu_head);
1525
1526         neigh_parms_put(parms);
1527 }
1528
1529 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1530 {
1531         if (!parms || parms == &tbl->parms)
1532                 return;
1533         write_lock_bh(&tbl->lock);
1534         list_del(&parms->list);
1535         parms->dead = 1;
1536         write_unlock_bh(&tbl->lock);
1537         if (parms->dev)
1538                 dev_put(parms->dev);
1539         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1540 }
1541 EXPORT_SYMBOL(neigh_parms_release);
1542
1543 static void neigh_parms_destroy(struct neigh_parms *parms)
1544 {
1545         kfree(parms);
1546 }
1547
1548 static struct lock_class_key neigh_table_proxy_queue_class;
1549
1550 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1551
1552 void neigh_table_init(int index, struct neigh_table *tbl)
1553 {
1554         unsigned long now = jiffies;
1555         unsigned long phsize;
1556
1557         INIT_LIST_HEAD(&tbl->parms_list);
1558         list_add(&tbl->parms.list, &tbl->parms_list);
1559         write_pnet(&tbl->parms.net, &init_net);
1560         refcount_set(&tbl->parms.refcnt, 1);
1561         tbl->parms.reachable_time =
1562                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1563
1564         tbl->stats = alloc_percpu(struct neigh_statistics);
1565         if (!tbl->stats)
1566                 panic("cannot create neighbour cache statistics");
1567
1568 #ifdef CONFIG_PROC_FS
1569         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1570                               &neigh_stat_seq_ops, tbl))
1571                 panic("cannot create neighbour proc dir entry");
1572 #endif
1573
1574         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1575
1576         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1577         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1578
1579         if (!tbl->nht || !tbl->phash_buckets)
1580                 panic("cannot allocate neighbour cache hashes");
1581
1582         if (!tbl->entry_size)
1583                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1584                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1585         else
1586                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1587
1588         rwlock_init(&tbl->lock);
1589         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1590         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1591                         tbl->parms.reachable_time);
1592         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1593         skb_queue_head_init_class(&tbl->proxy_queue,
1594                         &neigh_table_proxy_queue_class);
1595
1596         tbl->last_flush = now;
1597         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1598
1599         neigh_tables[index] = tbl;
1600 }
1601 EXPORT_SYMBOL(neigh_table_init);
1602
1603 int neigh_table_clear(int index, struct neigh_table *tbl)
1604 {
1605         neigh_tables[index] = NULL;
1606         /* It is not clean... Fix it to unload IPv6 module safely */
1607         cancel_delayed_work_sync(&tbl->gc_work);
1608         del_timer_sync(&tbl->proxy_timer);
1609         pneigh_queue_purge(&tbl->proxy_queue);
1610         neigh_ifdown(tbl, NULL);
1611         if (atomic_read(&tbl->entries))
1612                 pr_crit("neighbour leakage\n");
1613
1614         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1615                  neigh_hash_free_rcu);
1616         tbl->nht = NULL;
1617
1618         kfree(tbl->phash_buckets);
1619         tbl->phash_buckets = NULL;
1620
1621         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1622
1623         free_percpu(tbl->stats);
1624         tbl->stats = NULL;
1625
1626         return 0;
1627 }
1628 EXPORT_SYMBOL(neigh_table_clear);
1629
1630 static struct neigh_table *neigh_find_table(int family)
1631 {
1632         struct neigh_table *tbl = NULL;
1633
1634         switch (family) {
1635         case AF_INET:
1636                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1637                 break;
1638         case AF_INET6:
1639                 tbl = neigh_tables[NEIGH_ND_TABLE];
1640                 break;
1641         case AF_DECnet:
1642                 tbl = neigh_tables[NEIGH_DN_TABLE];
1643                 break;
1644         }
1645
1646         return tbl;
1647 }
1648
1649 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1650                         struct netlink_ext_ack *extack)
1651 {
1652         struct net *net = sock_net(skb->sk);
1653         struct ndmsg *ndm;
1654         struct nlattr *dst_attr;
1655         struct neigh_table *tbl;
1656         struct neighbour *neigh;
1657         struct net_device *dev = NULL;
1658         int err = -EINVAL;
1659
1660         ASSERT_RTNL();
1661         if (nlmsg_len(nlh) < sizeof(*ndm))
1662                 goto out;
1663
1664         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1665         if (dst_attr == NULL)
1666                 goto out;
1667
1668         ndm = nlmsg_data(nlh);
1669         if (ndm->ndm_ifindex) {
1670                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1671                 if (dev == NULL) {
1672                         err = -ENODEV;
1673                         goto out;
1674                 }
1675         }
1676
1677         tbl = neigh_find_table(ndm->ndm_family);
1678         if (tbl == NULL)
1679                 return -EAFNOSUPPORT;
1680
1681         if (nla_len(dst_attr) < (int)tbl->key_len)
1682                 goto out;
1683
1684         if (ndm->ndm_flags & NTF_PROXY) {
1685                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1686                 goto out;
1687         }
1688
1689         if (dev == NULL)
1690                 goto out;
1691
1692         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1693         if (neigh == NULL) {
1694                 err = -ENOENT;
1695                 goto out;
1696         }
1697
1698         err = neigh_update(neigh, NULL, NUD_FAILED,
1699                            NEIGH_UPDATE_F_OVERRIDE |
1700                            NEIGH_UPDATE_F_ADMIN,
1701                            NETLINK_CB(skb).portid);
1702         write_lock_bh(&tbl->lock);
1703         neigh_release(neigh);
1704         neigh_remove_one(neigh, tbl);
1705         write_unlock_bh(&tbl->lock);
1706
1707 out:
1708         return err;
1709 }
1710
1711 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1712                      struct netlink_ext_ack *extack)
1713 {
1714         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1715         struct net *net = sock_net(skb->sk);
1716         struct ndmsg *ndm;
1717         struct nlattr *tb[NDA_MAX+1];
1718         struct neigh_table *tbl;
1719         struct net_device *dev = NULL;
1720         struct neighbour *neigh;
1721         void *dst, *lladdr;
1722         int err;
1723
1724         ASSERT_RTNL();
1725         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1726         if (err < 0)
1727                 goto out;
1728
1729         err = -EINVAL;
1730         if (tb[NDA_DST] == NULL)
1731                 goto out;
1732
1733         ndm = nlmsg_data(nlh);
1734         if (ndm->ndm_ifindex) {
1735                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1736                 if (dev == NULL) {
1737                         err = -ENODEV;
1738                         goto out;
1739                 }
1740
1741                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1742                         goto out;
1743         }
1744
1745         tbl = neigh_find_table(ndm->ndm_family);
1746         if (tbl == NULL)
1747                 return -EAFNOSUPPORT;
1748
1749         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1750                 goto out;
1751         dst = nla_data(tb[NDA_DST]);
1752         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1753
1754         if (ndm->ndm_flags & NTF_PROXY) {
1755                 struct pneigh_entry *pn;
1756
1757                 err = -ENOBUFS;
1758                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1759                 if (pn) {
1760                         pn->flags = ndm->ndm_flags;
1761                         err = 0;
1762                 }
1763                 goto out;
1764         }
1765
1766         if (dev == NULL)
1767                 goto out;
1768
1769         neigh = neigh_lookup(tbl, dst, dev);
1770         if (neigh == NULL) {
1771                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1772                         err = -ENOENT;
1773                         goto out;
1774                 }
1775
1776                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1777                 if (IS_ERR(neigh)) {
1778                         err = PTR_ERR(neigh);
1779                         goto out;
1780                 }
1781         } else {
1782                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1783                         err = -EEXIST;
1784                         neigh_release(neigh);
1785                         goto out;
1786                 }
1787
1788                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1789                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1790         }
1791
1792         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1793                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1794
1795         if (ndm->ndm_flags & NTF_USE) {
1796                 neigh_event_send(neigh, NULL);
1797                 err = 0;
1798         } else
1799                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1800                                    NETLINK_CB(skb).portid);
1801         neigh_release(neigh);
1802
1803 out:
1804         return err;
1805 }
1806
1807 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1808 {
1809         struct nlattr *nest;
1810
1811         nest = nla_nest_start(skb, NDTA_PARMS);
1812         if (nest == NULL)
1813                 return -ENOBUFS;
1814
1815         if ((parms->dev &&
1816              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1817             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1818             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1819                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1820             /* approximative value for deprecated QUEUE_LEN (in packets) */
1821             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1822                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1823             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1824             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1825             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1826                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1827             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1828                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1829             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1830                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1831             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1832                           NDTPA_PAD) ||
1833             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1834                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1835             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1836                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1837             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1838                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1839             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1840                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1841             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1842                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1843             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1844                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1845             nla_put_msecs(skb, NDTPA_LOCKTIME,
1846                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1847                 goto nla_put_failure;
1848         return nla_nest_end(skb, nest);
1849
1850 nla_put_failure:
1851         nla_nest_cancel(skb, nest);
1852         return -EMSGSIZE;
1853 }
1854
1855 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1856                               u32 pid, u32 seq, int type, int flags)
1857 {
1858         struct nlmsghdr *nlh;
1859         struct ndtmsg *ndtmsg;
1860
1861         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1862         if (nlh == NULL)
1863                 return -EMSGSIZE;
1864
1865         ndtmsg = nlmsg_data(nlh);
1866
1867         read_lock_bh(&tbl->lock);
1868         ndtmsg->ndtm_family = tbl->family;
1869         ndtmsg->ndtm_pad1   = 0;
1870         ndtmsg->ndtm_pad2   = 0;
1871
1872         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1873             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1874             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1875             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1876             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1877                 goto nla_put_failure;
1878         {
1879                 unsigned long now = jiffies;
1880                 unsigned int flush_delta = now - tbl->last_flush;
1881                 unsigned int rand_delta = now - tbl->last_rand;
1882                 struct neigh_hash_table *nht;
1883                 struct ndt_config ndc = {
1884                         .ndtc_key_len           = tbl->key_len,
1885                         .ndtc_entry_size        = tbl->entry_size,
1886                         .ndtc_entries           = atomic_read(&tbl->entries),
1887                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1888                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1889                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1890                 };
1891
1892                 rcu_read_lock_bh();
1893                 nht = rcu_dereference_bh(tbl->nht);
1894                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1895                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1896                 rcu_read_unlock_bh();
1897
1898                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1899                         goto nla_put_failure;
1900         }
1901
1902         {
1903                 int cpu;
1904                 struct ndt_stats ndst;
1905
1906                 memset(&ndst, 0, sizeof(ndst));
1907
1908                 for_each_possible_cpu(cpu) {
1909                         struct neigh_statistics *st;
1910
1911                         st = per_cpu_ptr(tbl->stats, cpu);
1912                         ndst.ndts_allocs                += st->allocs;
1913                         ndst.ndts_destroys              += st->destroys;
1914                         ndst.ndts_hash_grows            += st->hash_grows;
1915                         ndst.ndts_res_failed            += st->res_failed;
1916                         ndst.ndts_lookups               += st->lookups;
1917                         ndst.ndts_hits                  += st->hits;
1918                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1919                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1920                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1921                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1922                         ndst.ndts_table_fulls           += st->table_fulls;
1923                 }
1924
1925                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1926                                   NDTA_PAD))
1927                         goto nla_put_failure;
1928         }
1929
1930         BUG_ON(tbl->parms.dev);
1931         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1932                 goto nla_put_failure;
1933
1934         read_unlock_bh(&tbl->lock);
1935         nlmsg_end(skb, nlh);
1936         return 0;
1937
1938 nla_put_failure:
1939         read_unlock_bh(&tbl->lock);
1940         nlmsg_cancel(skb, nlh);
1941         return -EMSGSIZE;
1942 }
1943
1944 static int neightbl_fill_param_info(struct sk_buff *skb,
1945                                     struct neigh_table *tbl,
1946                                     struct neigh_parms *parms,
1947                                     u32 pid, u32 seq, int type,
1948                                     unsigned int flags)
1949 {
1950         struct ndtmsg *ndtmsg;
1951         struct nlmsghdr *nlh;
1952
1953         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1954         if (nlh == NULL)
1955                 return -EMSGSIZE;
1956
1957         ndtmsg = nlmsg_data(nlh);
1958
1959         read_lock_bh(&tbl->lock);
1960         ndtmsg->ndtm_family = tbl->family;
1961         ndtmsg->ndtm_pad1   = 0;
1962         ndtmsg->ndtm_pad2   = 0;
1963
1964         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1965             neightbl_fill_parms(skb, parms) < 0)
1966                 goto errout;
1967
1968         read_unlock_bh(&tbl->lock);
1969         nlmsg_end(skb, nlh);
1970         return 0;
1971 errout:
1972         read_unlock_bh(&tbl->lock);
1973         nlmsg_cancel(skb, nlh);
1974         return -EMSGSIZE;
1975 }
1976
1977 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1978         [NDTA_NAME]             = { .type = NLA_STRING },
1979         [NDTA_THRESH1]          = { .type = NLA_U32 },
1980         [NDTA_THRESH2]          = { .type = NLA_U32 },
1981         [NDTA_THRESH3]          = { .type = NLA_U32 },
1982         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1983         [NDTA_PARMS]            = { .type = NLA_NESTED },
1984 };
1985
1986 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1987         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1988         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1989         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1990         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1991         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1992         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1993         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1994         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1995         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1996         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1997         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1998         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1999         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2000         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2001 };
2002
2003 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2004                         struct netlink_ext_ack *extack)
2005 {
2006         struct net *net = sock_net(skb->sk);
2007         struct neigh_table *tbl;
2008         struct ndtmsg *ndtmsg;
2009         struct nlattr *tb[NDTA_MAX+1];
2010         bool found = false;
2011         int err, tidx;
2012
2013         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2014                           nl_neightbl_policy, extack);
2015         if (err < 0)
2016                 goto errout;
2017
2018         if (tb[NDTA_NAME] == NULL) {
2019                 err = -EINVAL;
2020                 goto errout;
2021         }
2022
2023         ndtmsg = nlmsg_data(nlh);
2024
2025         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2026                 tbl = neigh_tables[tidx];
2027                 if (!tbl)
2028                         continue;
2029                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2030                         continue;
2031                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2032                         found = true;
2033                         break;
2034                 }
2035         }
2036
2037         if (!found)
2038                 return -ENOENT;
2039
2040         /*
2041          * We acquire tbl->lock to be nice to the periodic timers and
2042          * make sure they always see a consistent set of values.
2043          */
2044         write_lock_bh(&tbl->lock);
2045
2046         if (tb[NDTA_PARMS]) {
2047                 struct nlattr *tbp[NDTPA_MAX+1];
2048                 struct neigh_parms *p;
2049                 int i, ifindex = 0;
2050
2051                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2052                                        nl_ntbl_parm_policy, extack);
2053                 if (err < 0)
2054                         goto errout_tbl_lock;
2055
2056                 if (tbp[NDTPA_IFINDEX])
2057                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2058
2059                 p = lookup_neigh_parms(tbl, net, ifindex);
2060                 if (p == NULL) {
2061                         err = -ENOENT;
2062                         goto errout_tbl_lock;
2063                 }
2064
2065                 for (i = 1; i <= NDTPA_MAX; i++) {
2066                         if (tbp[i] == NULL)
2067                                 continue;
2068
2069                         switch (i) {
2070                         case NDTPA_QUEUE_LEN:
2071                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2072                                               nla_get_u32(tbp[i]) *
2073                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2074                                 break;
2075                         case NDTPA_QUEUE_LENBYTES:
2076                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2077                                               nla_get_u32(tbp[i]));
2078                                 break;
2079                         case NDTPA_PROXY_QLEN:
2080                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2081                                               nla_get_u32(tbp[i]));
2082                                 break;
2083                         case NDTPA_APP_PROBES:
2084                                 NEIGH_VAR_SET(p, APP_PROBES,
2085                                               nla_get_u32(tbp[i]));
2086                                 break;
2087                         case NDTPA_UCAST_PROBES:
2088                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2089                                               nla_get_u32(tbp[i]));
2090                                 break;
2091                         case NDTPA_MCAST_PROBES:
2092                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2093                                               nla_get_u32(tbp[i]));
2094                                 break;
2095                         case NDTPA_MCAST_REPROBES:
2096                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2097                                               nla_get_u32(tbp[i]));
2098                                 break;
2099                         case NDTPA_BASE_REACHABLE_TIME:
2100                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2101                                               nla_get_msecs(tbp[i]));
2102                                 /* update reachable_time as well, otherwise, the change will
2103                                  * only be effective after the next time neigh_periodic_work
2104                                  * decides to recompute it (can be multiple minutes)
2105                                  */
2106                                 p->reachable_time =
2107                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2108                                 break;
2109                         case NDTPA_GC_STALETIME:
2110                                 NEIGH_VAR_SET(p, GC_STALETIME,
2111                                               nla_get_msecs(tbp[i]));
2112                                 break;
2113                         case NDTPA_DELAY_PROBE_TIME:
2114                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2115                                               nla_get_msecs(tbp[i]));
2116                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2117                                 break;
2118                         case NDTPA_RETRANS_TIME:
2119                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2120                                               nla_get_msecs(tbp[i]));
2121                                 break;
2122                         case NDTPA_ANYCAST_DELAY:
2123                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2124                                               nla_get_msecs(tbp[i]));
2125                                 break;
2126                         case NDTPA_PROXY_DELAY:
2127                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2128                                               nla_get_msecs(tbp[i]));
2129                                 break;
2130                         case NDTPA_LOCKTIME:
2131                                 NEIGH_VAR_SET(p, LOCKTIME,
2132                                               nla_get_msecs(tbp[i]));
2133                                 break;
2134                         }
2135                 }
2136         }
2137
2138         err = -ENOENT;
2139         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2140              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2141             !net_eq(net, &init_net))
2142                 goto errout_tbl_lock;
2143
2144         if (tb[NDTA_THRESH1])
2145                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2146
2147         if (tb[NDTA_THRESH2])
2148                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2149
2150         if (tb[NDTA_THRESH3])
2151                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2152
2153         if (tb[NDTA_GC_INTERVAL])
2154                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2155
2156         err = 0;
2157
2158 errout_tbl_lock:
2159         write_unlock_bh(&tbl->lock);
2160 errout:
2161         return err;
2162 }
2163
2164 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2165 {
2166         struct net *net = sock_net(skb->sk);
2167         int family, tidx, nidx = 0;
2168         int tbl_skip = cb->args[0];
2169         int neigh_skip = cb->args[1];
2170         struct neigh_table *tbl;
2171
2172         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2173
2174         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2175                 struct neigh_parms *p;
2176
2177                 tbl = neigh_tables[tidx];
2178                 if (!tbl)
2179                         continue;
2180
2181                 if (tidx < tbl_skip || (family && tbl->family != family))
2182                         continue;
2183
2184                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2185                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2186                                        NLM_F_MULTI) < 0)
2187                         break;
2188
2189                 nidx = 0;
2190                 p = list_next_entry(&tbl->parms, list);
2191                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2192                         if (!net_eq(neigh_parms_net(p), net))
2193                                 continue;
2194
2195                         if (nidx < neigh_skip)
2196                                 goto next;
2197
2198                         if (neightbl_fill_param_info(skb, tbl, p,
2199                                                      NETLINK_CB(cb->skb).portid,
2200                                                      cb->nlh->nlmsg_seq,
2201                                                      RTM_NEWNEIGHTBL,
2202                                                      NLM_F_MULTI) < 0)
2203                                 goto out;
2204                 next:
2205                         nidx++;
2206                 }
2207
2208                 neigh_skip = 0;
2209         }
2210 out:
2211         cb->args[0] = tidx;
2212         cb->args[1] = nidx;
2213
2214         return skb->len;
2215 }
2216
2217 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2218                            u32 pid, u32 seq, int type, unsigned int flags)
2219 {
2220         unsigned long now = jiffies;
2221         struct nda_cacheinfo ci;
2222         struct nlmsghdr *nlh;
2223         struct ndmsg *ndm;
2224
2225         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2226         if (nlh == NULL)
2227                 return -EMSGSIZE;
2228
2229         ndm = nlmsg_data(nlh);
2230         ndm->ndm_family  = neigh->ops->family;
2231         ndm->ndm_pad1    = 0;
2232         ndm->ndm_pad2    = 0;
2233         ndm->ndm_flags   = neigh->flags;
2234         ndm->ndm_type    = neigh->type;
2235         ndm->ndm_ifindex = neigh->dev->ifindex;
2236
2237         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2238                 goto nla_put_failure;
2239
2240         read_lock_bh(&neigh->lock);
2241         ndm->ndm_state   = neigh->nud_state;
2242         if (neigh->nud_state & NUD_VALID) {
2243                 char haddr[MAX_ADDR_LEN];
2244
2245                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2246                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2247                         read_unlock_bh(&neigh->lock);
2248                         goto nla_put_failure;
2249                 }
2250         }
2251
2252         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2253         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2254         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2255         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2256         read_unlock_bh(&neigh->lock);
2257
2258         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2259             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2260                 goto nla_put_failure;
2261
2262         nlmsg_end(skb, nlh);
2263         return 0;
2264
2265 nla_put_failure:
2266         nlmsg_cancel(skb, nlh);
2267         return -EMSGSIZE;
2268 }
2269
2270 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2271                             u32 pid, u32 seq, int type, unsigned int flags,
2272                             struct neigh_table *tbl)
2273 {
2274         struct nlmsghdr *nlh;
2275         struct ndmsg *ndm;
2276
2277         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2278         if (nlh == NULL)
2279                 return -EMSGSIZE;
2280
2281         ndm = nlmsg_data(nlh);
2282         ndm->ndm_family  = tbl->family;
2283         ndm->ndm_pad1    = 0;
2284         ndm->ndm_pad2    = 0;
2285         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2286         ndm->ndm_type    = RTN_UNICAST;
2287         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2288         ndm->ndm_state   = NUD_NONE;
2289
2290         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2291                 goto nla_put_failure;
2292
2293         nlmsg_end(skb, nlh);
2294         return 0;
2295
2296 nla_put_failure:
2297         nlmsg_cancel(skb, nlh);
2298         return -EMSGSIZE;
2299 }
2300
2301 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2302 {
2303         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2304         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2305 }
2306
2307 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2308 {
2309         struct net_device *master;
2310
2311         if (!master_idx)
2312                 return false;
2313
2314         master = netdev_master_upper_dev_get(dev);
2315         if (!master || master->ifindex != master_idx)
2316                 return true;
2317
2318         return false;
2319 }
2320
2321 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2322 {
2323         if (filter_idx && dev->ifindex != filter_idx)
2324                 return true;
2325
2326         return false;
2327 }
2328
2329 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2330                             struct netlink_callback *cb)
2331 {
2332         struct net *net = sock_net(skb->sk);
2333         const struct nlmsghdr *nlh = cb->nlh;
2334         struct nlattr *tb[NDA_MAX + 1];
2335         struct neighbour *n;
2336         int rc, h, s_h = cb->args[1];
2337         int idx, s_idx = idx = cb->args[2];
2338         struct neigh_hash_table *nht;
2339         int filter_master_idx = 0, filter_idx = 0;
2340         unsigned int flags = NLM_F_MULTI;
2341         int err;
2342
2343         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2344         if (!err) {
2345                 if (tb[NDA_IFINDEX]) {
2346                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2347                                 return -EINVAL;
2348                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2349                 }
2350                 if (tb[NDA_MASTER]) {
2351                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2352                                 return -EINVAL;
2353                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2354                 }
2355                 if (filter_idx || filter_master_idx)
2356                         flags |= NLM_F_DUMP_FILTERED;
2357         }
2358
2359         rcu_read_lock_bh();
2360         nht = rcu_dereference_bh(tbl->nht);
2361
2362         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2363                 if (h > s_h)
2364                         s_idx = 0;
2365                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2366                      n != NULL;
2367                      n = rcu_dereference_bh(n->next)) {
2368                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2369                                 goto next;
2370                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2371                             neigh_master_filtered(n->dev, filter_master_idx))
2372                                 goto next;
2373                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2374                                             cb->nlh->nlmsg_seq,
2375                                             RTM_NEWNEIGH,
2376                                             flags) < 0) {
2377                                 rc = -1;
2378                                 goto out;
2379                         }
2380 next:
2381                         idx++;
2382                 }
2383         }
2384         rc = skb->len;
2385 out:
2386         rcu_read_unlock_bh();
2387         cb->args[1] = h;
2388         cb->args[2] = idx;
2389         return rc;
2390 }
2391
2392 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2393                              struct netlink_callback *cb)
2394 {
2395         struct pneigh_entry *n;
2396         struct net *net = sock_net(skb->sk);
2397         int rc, h, s_h = cb->args[3];
2398         int idx, s_idx = idx = cb->args[4];
2399
2400         read_lock_bh(&tbl->lock);
2401
2402         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2403                 if (h > s_h)
2404                         s_idx = 0;
2405                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2406                         if (idx < s_idx || pneigh_net(n) != net)
2407                                 goto next;
2408                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2409                                             cb->nlh->nlmsg_seq,
2410                                             RTM_NEWNEIGH,
2411                                             NLM_F_MULTI, tbl) < 0) {
2412                                 read_unlock_bh(&tbl->lock);
2413                                 rc = -1;
2414                                 goto out;
2415                         }
2416                 next:
2417                         idx++;
2418                 }
2419         }
2420
2421         read_unlock_bh(&tbl->lock);
2422         rc = skb->len;
2423 out:
2424         cb->args[3] = h;
2425         cb->args[4] = idx;
2426         return rc;
2427
2428 }
2429
2430 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2431 {
2432         struct neigh_table *tbl;
2433         int t, family, s_t;
2434         int proxy = 0;
2435         int err;
2436
2437         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2438
2439         /* check for full ndmsg structure presence, family member is
2440          * the same for both structures
2441          */
2442         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2443             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2444                 proxy = 1;
2445
2446         s_t = cb->args[0];
2447
2448         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2449                 tbl = neigh_tables[t];
2450
2451                 if (!tbl)
2452                         continue;
2453                 if (t < s_t || (family && tbl->family != family))
2454                         continue;
2455                 if (t > s_t)
2456                         memset(&cb->args[1], 0, sizeof(cb->args) -
2457                                                 sizeof(cb->args[0]));
2458                 if (proxy)
2459                         err = pneigh_dump_table(tbl, skb, cb);
2460                 else
2461                         err = neigh_dump_table(tbl, skb, cb);
2462                 if (err < 0)
2463                         break;
2464         }
2465
2466         cb->args[0] = t;
2467         return skb->len;
2468 }
2469
2470 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2471 {
2472         int chain;
2473         struct neigh_hash_table *nht;
2474
2475         rcu_read_lock_bh();
2476         nht = rcu_dereference_bh(tbl->nht);
2477
2478         read_lock(&tbl->lock); /* avoid resizes */
2479         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2480                 struct neighbour *n;
2481
2482                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2483                      n != NULL;
2484                      n = rcu_dereference_bh(n->next))
2485                         cb(n, cookie);
2486         }
2487         read_unlock(&tbl->lock);
2488         rcu_read_unlock_bh();
2489 }
2490 EXPORT_SYMBOL(neigh_for_each);
2491
2492 /* The tbl->lock must be held as a writer and BH disabled. */
2493 void __neigh_for_each_release(struct neigh_table *tbl,
2494                               int (*cb)(struct neighbour *))
2495 {
2496         int chain;
2497         struct neigh_hash_table *nht;
2498
2499         nht = rcu_dereference_protected(tbl->nht,
2500                                         lockdep_is_held(&tbl->lock));
2501         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2502                 struct neighbour *n;
2503                 struct neighbour __rcu **np;
2504
2505                 np = &nht->hash_buckets[chain];
2506                 while ((n = rcu_dereference_protected(*np,
2507                                         lockdep_is_held(&tbl->lock))) != NULL) {
2508                         int release;
2509
2510                         write_lock(&n->lock);
2511                         release = cb(n);
2512                         if (release) {
2513                                 rcu_assign_pointer(*np,
2514                                         rcu_dereference_protected(n->next,
2515                                                 lockdep_is_held(&tbl->lock)));
2516                                 n->dead = 1;
2517                         } else
2518                                 np = &n->next;
2519                         write_unlock(&n->lock);
2520                         if (release)
2521                                 neigh_cleanup_and_release(n);
2522                 }
2523         }
2524 }
2525 EXPORT_SYMBOL(__neigh_for_each_release);
2526
2527 int neigh_xmit(int index, struct net_device *dev,
2528                const void *addr, struct sk_buff *skb)
2529 {
2530         int err = -EAFNOSUPPORT;
2531         if (likely(index < NEIGH_NR_TABLES)) {
2532                 struct neigh_table *tbl;
2533                 struct neighbour *neigh;
2534
2535                 tbl = neigh_tables[index];
2536                 if (!tbl)
2537                         goto out;
2538                 rcu_read_lock_bh();
2539                 neigh = __neigh_lookup_noref(tbl, addr, dev);
2540                 if (!neigh)
2541                         neigh = __neigh_create(tbl, addr, dev, false);
2542                 err = PTR_ERR(neigh);
2543                 if (IS_ERR(neigh)) {
2544                         rcu_read_unlock_bh();
2545                         goto out_kfree_skb;
2546                 }
2547                 err = neigh->output(neigh, skb);
2548                 rcu_read_unlock_bh();
2549         }
2550         else if (index == NEIGH_LINK_TABLE) {
2551                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2552                                       addr, NULL, skb->len);
2553                 if (err < 0)
2554                         goto out_kfree_skb;
2555                 err = dev_queue_xmit(skb);
2556         }
2557 out:
2558         return err;
2559 out_kfree_skb:
2560         kfree_skb(skb);
2561         goto out;
2562 }
2563 EXPORT_SYMBOL(neigh_xmit);
2564
2565 #ifdef CONFIG_PROC_FS
2566
2567 static struct neighbour *neigh_get_first(struct seq_file *seq)
2568 {
2569         struct neigh_seq_state *state = seq->private;
2570         struct net *net = seq_file_net(seq);
2571         struct neigh_hash_table *nht = state->nht;
2572         struct neighbour *n = NULL;
2573         int bucket = state->bucket;
2574
2575         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2576         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2577                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2578
2579                 while (n) {
2580                         if (!net_eq(dev_net(n->dev), net))
2581                                 goto next;
2582                         if (state->neigh_sub_iter) {
2583                                 loff_t fakep = 0;
2584                                 void *v;
2585
2586                                 v = state->neigh_sub_iter(state, n, &fakep);
2587                                 if (!v)
2588                                         goto next;
2589                         }
2590                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2591                                 break;
2592                         if (n->nud_state & ~NUD_NOARP)
2593                                 break;
2594 next:
2595                         n = rcu_dereference_bh(n->next);
2596                 }
2597
2598                 if (n)
2599                         break;
2600         }
2601         state->bucket = bucket;
2602
2603         return n;
2604 }
2605
2606 static struct neighbour *neigh_get_next(struct seq_file *seq,
2607                                         struct neighbour *n,
2608                                         loff_t *pos)
2609 {
2610         struct neigh_seq_state *state = seq->private;
2611         struct net *net = seq_file_net(seq);
2612         struct neigh_hash_table *nht = state->nht;
2613
2614         if (state->neigh_sub_iter) {
2615                 void *v = state->neigh_sub_iter(state, n, pos);
2616                 if (v)
2617                         return n;
2618         }
2619         n = rcu_dereference_bh(n->next);
2620
2621         while (1) {
2622                 while (n) {
2623                         if (!net_eq(dev_net(n->dev), net))
2624                                 goto next;
2625                         if (state->neigh_sub_iter) {
2626                                 void *v = state->neigh_sub_iter(state, n, pos);
2627                                 if (v)
2628                                         return n;
2629                                 goto next;
2630                         }
2631                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2632                                 break;
2633
2634                         if (n->nud_state & ~NUD_NOARP)
2635                                 break;
2636 next:
2637                         n = rcu_dereference_bh(n->next);
2638                 }
2639
2640                 if (n)
2641                         break;
2642
2643                 if (++state->bucket >= (1 << nht->hash_shift))
2644                         break;
2645
2646                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2647         }
2648
2649         if (n && pos)
2650                 --(*pos);
2651         return n;
2652 }
2653
2654 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2655 {
2656         struct neighbour *n = neigh_get_first(seq);
2657
2658         if (n) {
2659                 --(*pos);
2660                 while (*pos) {
2661                         n = neigh_get_next(seq, n, pos);
2662                         if (!n)
2663                                 break;
2664                 }
2665         }
2666         return *pos ? NULL : n;
2667 }
2668
2669 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2670 {
2671         struct neigh_seq_state *state = seq->private;
2672         struct net *net = seq_file_net(seq);
2673         struct neigh_table *tbl = state->tbl;
2674         struct pneigh_entry *pn = NULL;
2675         int bucket = state->bucket;
2676
2677         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2678         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2679                 pn = tbl->phash_buckets[bucket];
2680                 while (pn && !net_eq(pneigh_net(pn), net))
2681                         pn = pn->next;
2682                 if (pn)
2683                         break;
2684         }
2685         state->bucket = bucket;
2686
2687         return pn;
2688 }
2689
2690 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2691                                             struct pneigh_entry *pn,
2692                                             loff_t *pos)
2693 {
2694         struct neigh_seq_state *state = seq->private;
2695         struct net *net = seq_file_net(seq);
2696         struct neigh_table *tbl = state->tbl;
2697
2698         do {
2699                 pn = pn->next;
2700         } while (pn && !net_eq(pneigh_net(pn), net));
2701
2702         while (!pn) {
2703                 if (++state->bucket > PNEIGH_HASHMASK)
2704                         break;
2705                 pn = tbl->phash_buckets[state->bucket];
2706                 while (pn && !net_eq(pneigh_net(pn), net))
2707                         pn = pn->next;
2708                 if (pn)
2709                         break;
2710         }
2711
2712         if (pn && pos)
2713                 --(*pos);
2714
2715         return pn;
2716 }
2717
2718 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2719 {
2720         struct pneigh_entry *pn = pneigh_get_first(seq);
2721
2722         if (pn) {
2723                 --(*pos);
2724                 while (*pos) {
2725                         pn = pneigh_get_next(seq, pn, pos);
2726                         if (!pn)
2727                                 break;
2728                 }
2729         }
2730         return *pos ? NULL : pn;
2731 }
2732
2733 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2734 {
2735         struct neigh_seq_state *state = seq->private;
2736         void *rc;
2737         loff_t idxpos = *pos;
2738
2739         rc = neigh_get_idx(seq, &idxpos);
2740         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2741                 rc = pneigh_get_idx(seq, &idxpos);
2742
2743         return rc;
2744 }
2745
2746 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2747         __acquires(rcu_bh)
2748 {
2749         struct neigh_seq_state *state = seq->private;
2750
2751         state->tbl = tbl;
2752         state->bucket = 0;
2753         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2754
2755         rcu_read_lock_bh();
2756         state->nht = rcu_dereference_bh(tbl->nht);
2757
2758         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2759 }
2760 EXPORT_SYMBOL(neigh_seq_start);
2761
2762 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2763 {
2764         struct neigh_seq_state *state;
2765         void *rc;
2766
2767         if (v == SEQ_START_TOKEN) {
2768                 rc = neigh_get_first(seq);
2769                 goto out;
2770         }
2771
2772         state = seq->private;
2773         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2774                 rc = neigh_get_next(seq, v, NULL);
2775                 if (rc)
2776                         goto out;
2777                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2778                         rc = pneigh_get_first(seq);
2779         } else {
2780                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2781                 rc = pneigh_get_next(seq, v, NULL);
2782         }
2783 out:
2784         ++(*pos);
2785         return rc;
2786 }
2787 EXPORT_SYMBOL(neigh_seq_next);
2788
2789 void neigh_seq_stop(struct seq_file *seq, void *v)
2790         __releases(rcu_bh)
2791 {
2792         rcu_read_unlock_bh();
2793 }
2794 EXPORT_SYMBOL(neigh_seq_stop);
2795
2796 /* statistics via seq_file */
2797
2798 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2799 {
2800         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2801         int cpu;
2802
2803         if (*pos == 0)
2804                 return SEQ_START_TOKEN;
2805
2806         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2807                 if (!cpu_possible(cpu))
2808                         continue;
2809                 *pos = cpu+1;
2810                 return per_cpu_ptr(tbl->stats, cpu);
2811         }
2812         return NULL;
2813 }
2814
2815 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2816 {
2817         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2818         int cpu;
2819
2820         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2821                 if (!cpu_possible(cpu))
2822                         continue;
2823                 *pos = cpu+1;
2824                 return per_cpu_ptr(tbl->stats, cpu);
2825         }
2826         return NULL;
2827 }
2828
2829 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2830 {
2831
2832 }
2833
2834 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2835 {
2836         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2837         struct neigh_statistics *st = v;
2838
2839         if (v == SEQ_START_TOKEN) {
2840                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2841                 return 0;
2842         }
2843
2844         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2845                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2846                    atomic_read(&tbl->entries),
2847
2848                    st->allocs,
2849                    st->destroys,
2850                    st->hash_grows,
2851
2852                    st->lookups,
2853                    st->hits,
2854
2855                    st->res_failed,
2856
2857                    st->rcv_probes_mcast,
2858                    st->rcv_probes_ucast,
2859
2860                    st->periodic_gc_runs,
2861                    st->forced_gc_runs,
2862                    st->unres_discards,
2863                    st->table_fulls
2864                    );
2865
2866         return 0;
2867 }
2868
2869 static const struct seq_operations neigh_stat_seq_ops = {
2870         .start  = neigh_stat_seq_start,
2871         .next   = neigh_stat_seq_next,
2872         .stop   = neigh_stat_seq_stop,
2873         .show   = neigh_stat_seq_show,
2874 };
2875 #endif /* CONFIG_PROC_FS */
2876
2877 static inline size_t neigh_nlmsg_size(void)
2878 {
2879         return NLMSG_ALIGN(sizeof(struct ndmsg))
2880                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2881                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2882                + nla_total_size(sizeof(struct nda_cacheinfo))
2883                + nla_total_size(4); /* NDA_PROBES */
2884 }
2885
2886 static void __neigh_notify(struct neighbour *n, int type, int flags,
2887                            u32 pid)
2888 {
2889         struct net *net = dev_net(n->dev);
2890         struct sk_buff *skb;
2891         int err = -ENOBUFS;
2892
2893         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2894         if (skb == NULL)
2895                 goto errout;
2896
2897         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2898         if (err < 0) {
2899                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2900                 WARN_ON(err == -EMSGSIZE);
2901                 kfree_skb(skb);
2902                 goto errout;
2903         }
2904         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2905         return;
2906 errout:
2907         if (err < 0)
2908                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2909 }
2910
2911 void neigh_app_ns(struct neighbour *n)
2912 {
2913         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2914 }
2915 EXPORT_SYMBOL(neigh_app_ns);
2916
2917 #ifdef CONFIG_SYSCTL
2918 static int zero;
2919 static int int_max = INT_MAX;
2920 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2921
2922 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2923                            void __user *buffer, size_t *lenp, loff_t *ppos)
2924 {
2925         int size, ret;
2926         struct ctl_table tmp = *ctl;
2927
2928         tmp.extra1 = &zero;
2929         tmp.extra2 = &unres_qlen_max;
2930         tmp.data = &size;
2931
2932         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2933         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2934
2935         if (write && !ret)
2936                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2937         return ret;
2938 }
2939
2940 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2941                                                    int family)
2942 {
2943         switch (family) {
2944         case AF_INET:
2945                 return __in_dev_arp_parms_get_rcu(dev);
2946         case AF_INET6:
2947                 return __in6_dev_nd_parms_get_rcu(dev);
2948         }
2949         return NULL;
2950 }
2951
2952 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2953                                   int index)
2954 {
2955         struct net_device *dev;
2956         int family = neigh_parms_family(p);
2957
2958         rcu_read_lock();
2959         for_each_netdev_rcu(net, dev) {
2960                 struct neigh_parms *dst_p =
2961                                 neigh_get_dev_parms_rcu(dev, family);
2962
2963                 if (dst_p && !test_bit(index, dst_p->data_state))
2964                         dst_p->data[index] = p->data[index];
2965         }
2966         rcu_read_unlock();
2967 }
2968
2969 static void neigh_proc_update(struct ctl_table *ctl, int write)
2970 {
2971         struct net_device *dev = ctl->extra1;
2972         struct neigh_parms *p = ctl->extra2;
2973         struct net *net = neigh_parms_net(p);
2974         int index = (int *) ctl->data - p->data;
2975
2976         if (!write)
2977                 return;
2978
2979         set_bit(index, p->data_state);
2980         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2981                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2982         if (!dev) /* NULL dev means this is default value */
2983                 neigh_copy_dflt_parms(net, p, index);
2984 }
2985
2986 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2987                                            void __user *buffer,
2988                                            size_t *lenp, loff_t *ppos)
2989 {
2990         struct ctl_table tmp = *ctl;
2991         int ret;
2992
2993         tmp.extra1 = &zero;
2994         tmp.extra2 = &int_max;
2995
2996         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2997         neigh_proc_update(ctl, write);
2998         return ret;
2999 }
3000
3001 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3002                         void __user *buffer, size_t *lenp, loff_t *ppos)
3003 {
3004         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3005
3006         neigh_proc_update(ctl, write);
3007         return ret;
3008 }
3009 EXPORT_SYMBOL(neigh_proc_dointvec);
3010
3011 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3012                                 void __user *buffer,
3013                                 size_t *lenp, loff_t *ppos)
3014 {
3015         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3016
3017         neigh_proc_update(ctl, write);
3018         return ret;
3019 }
3020 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3021
3022 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3023                                               void __user *buffer,
3024                                               size_t *lenp, loff_t *ppos)
3025 {
3026         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3027
3028         neigh_proc_update(ctl, write);
3029         return ret;
3030 }
3031
3032 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3033                                    void __user *buffer,
3034                                    size_t *lenp, loff_t *ppos)
3035 {
3036         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3037
3038         neigh_proc_update(ctl, write);
3039         return ret;
3040 }
3041 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3042
3043 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3044                                           void __user *buffer,
3045                                           size_t *lenp, loff_t *ppos)
3046 {
3047         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3048
3049         neigh_proc_update(ctl, write);
3050         return ret;
3051 }
3052
3053 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3054                                           void __user *buffer,
3055                                           size_t *lenp, loff_t *ppos)
3056 {
3057         struct neigh_parms *p = ctl->extra2;
3058         int ret;
3059
3060         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3061                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3062         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3063                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3064         else
3065                 ret = -1;
3066
3067         if (write && ret == 0) {
3068                 /* update reachable_time as well, otherwise, the change will
3069                  * only be effective after the next time neigh_periodic_work
3070                  * decides to recompute it
3071                  */
3072                 p->reachable_time =
3073                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3074         }
3075         return ret;
3076 }
3077
3078 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3079         (&((struct neigh_parms *) 0)->data[index])
3080
3081 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3082         [NEIGH_VAR_ ## attr] = { \
3083                 .procname       = name, \
3084                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3085                 .maxlen         = sizeof(int), \
3086                 .mode           = mval, \
3087                 .proc_handler   = proc, \
3088         }
3089
3090 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3091         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3092
3093 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3094         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3095
3096 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3097         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3098
3099 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3100         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3101
3102 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3103         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3104
3105 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3106         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3107
3108 static struct neigh_sysctl_table {
3109         struct ctl_table_header *sysctl_header;
3110         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3111 } neigh_sysctl_template __read_mostly = {
3112         .neigh_vars = {
3113                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3114                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3115                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3116                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3117                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3118                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3119                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3120                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3121                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3122                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3123                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3124                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3125                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3126                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3127                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3128                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3129                 [NEIGH_VAR_GC_INTERVAL] = {
3130                         .procname       = "gc_interval",
3131                         .maxlen         = sizeof(int),
3132                         .mode           = 0644,
3133                         .proc_handler   = proc_dointvec_jiffies,
3134                 },
3135                 [NEIGH_VAR_GC_THRESH1] = {
3136                         .procname       = "gc_thresh1",
3137                         .maxlen         = sizeof(int),
3138                         .mode           = 0644,
3139                         .extra1         = &zero,
3140                         .extra2         = &int_max,
3141                         .proc_handler   = proc_dointvec_minmax,
3142                 },
3143                 [NEIGH_VAR_GC_THRESH2] = {
3144                         .procname       = "gc_thresh2",
3145                         .maxlen         = sizeof(int),
3146                         .mode           = 0644,
3147                         .extra1         = &zero,
3148                         .extra2         = &int_max,
3149                         .proc_handler   = proc_dointvec_minmax,
3150                 },
3151                 [NEIGH_VAR_GC_THRESH3] = {
3152                         .procname       = "gc_thresh3",
3153                         .maxlen         = sizeof(int),
3154                         .mode           = 0644,
3155                         .extra1         = &zero,
3156                         .extra2         = &int_max,
3157                         .proc_handler   = proc_dointvec_minmax,
3158                 },
3159                 {},
3160         },
3161 };
3162
3163 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3164                           proc_handler *handler)
3165 {
3166         int i;
3167         struct neigh_sysctl_table *t;
3168         const char *dev_name_source;
3169         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3170         char *p_name;
3171
3172         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3173         if (!t)
3174                 goto err;
3175
3176         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3177                 t->neigh_vars[i].data += (long) p;
3178                 t->neigh_vars[i].extra1 = dev;
3179                 t->neigh_vars[i].extra2 = p;
3180         }
3181
3182         if (dev) {
3183                 dev_name_source = dev->name;
3184                 /* Terminate the table early */
3185                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3186                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3187         } else {
3188                 struct neigh_table *tbl = p->tbl;
3189                 dev_name_source = "default";
3190                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3191                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3192                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3193                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3194         }
3195
3196         if (handler) {
3197                 /* RetransTime */
3198                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3199                 /* ReachableTime */
3200                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3201                 /* RetransTime (in milliseconds)*/
3202                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3203                 /* ReachableTime (in milliseconds) */
3204                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3205         } else {
3206                 /* Those handlers will update p->reachable_time after
3207                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3208                  * applied after the next neighbour update instead of waiting for
3209                  * neigh_periodic_work to update its value (can be multiple minutes)
3210                  * So any handler that replaces them should do this as well
3211                  */
3212                 /* ReachableTime */
3213                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3214                         neigh_proc_base_reachable_time;
3215                 /* ReachableTime (in milliseconds) */
3216                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3217                         neigh_proc_base_reachable_time;
3218         }
3219
3220         /* Don't export sysctls to unprivileged users */
3221         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3222                 t->neigh_vars[0].procname = NULL;
3223
3224         switch (neigh_parms_family(p)) {
3225         case AF_INET:
3226               p_name = "ipv4";
3227               break;
3228         case AF_INET6:
3229               p_name = "ipv6";
3230               break;
3231         default:
3232               BUG();
3233         }
3234
3235         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3236                 p_name, dev_name_source);
3237         t->sysctl_header =
3238                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3239         if (!t->sysctl_header)
3240                 goto free;
3241
3242         p->sysctl_table = t;
3243         return 0;
3244
3245 free:
3246         kfree(t);
3247 err:
3248         return -ENOBUFS;
3249 }
3250 EXPORT_SYMBOL(neigh_sysctl_register);
3251
3252 void neigh_sysctl_unregister(struct neigh_parms *p)
3253 {
3254         if (p->sysctl_table) {
3255                 struct neigh_sysctl_table *t = p->sysctl_table;
3256                 p->sysctl_table = NULL;
3257                 unregister_net_sysctl_table(t->sysctl_header);
3258                 kfree(t);
3259         }
3260 }
3261 EXPORT_SYMBOL(neigh_sysctl_unregister);
3262
3263 #endif  /* CONFIG_SYSCTL */
3264
3265 static int __init neigh_init(void)
3266 {
3267         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3268         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3269         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3270
3271         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3272                       0);
3273         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3274
3275         return 0;
3276 }
3277
3278 subsys_initcall(neigh_init);