Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
[linux-2.6-microblaze.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)              \
47 do {                                            \
48         if (level <= NEIGH_DEBUG)               \
49                 pr_debug(fmt, ##__VA_ARGS__);   \
50 } while (0)
51
52 #define PNEIGH_HASHMASK         0xF
53
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56                            u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59                                     struct net_device *dev);
60
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79
80    Reference count prevents destruction.
81
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95         kfree_skb(skb);
96         return -ENETDOWN;
97 }
98
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101         if (neigh->parms->neigh_cleanup)
102                 neigh->parms->neigh_cleanup(neigh);
103
104         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106         neigh_release(neigh);
107 }
108
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117         return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120
121
122 static bool neigh_del(struct neighbour *n, __u8 state,
123                       struct neighbour __rcu **np, struct neigh_table *tbl)
124 {
125         bool retval = false;
126
127         write_lock(&n->lock);
128         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
129                 struct neighbour *neigh;
130
131                 neigh = rcu_dereference_protected(n->next,
132                                                   lockdep_is_held(&tbl->lock));
133                 rcu_assign_pointer(*np, neigh);
134                 n->dead = 1;
135                 retval = true;
136         }
137         write_unlock(&n->lock);
138         if (retval)
139                 neigh_cleanup_and_release(n);
140         return retval;
141 }
142
143 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
144 {
145         struct neigh_hash_table *nht;
146         void *pkey = ndel->primary_key;
147         u32 hash_val;
148         struct neighbour *n;
149         struct neighbour __rcu **np;
150
151         nht = rcu_dereference_protected(tbl->nht,
152                                         lockdep_is_held(&tbl->lock));
153         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
154         hash_val = hash_val >> (32 - nht->hash_shift);
155
156         np = &nht->hash_buckets[hash_val];
157         while ((n = rcu_dereference_protected(*np,
158                                               lockdep_is_held(&tbl->lock)))) {
159                 if (n == ndel)
160                         return neigh_del(n, 0, np, tbl);
161                 np = &n->next;
162         }
163         return false;
164 }
165
166 static int neigh_forced_gc(struct neigh_table *tbl)
167 {
168         int shrunk = 0;
169         int i;
170         struct neigh_hash_table *nht;
171
172         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
173
174         write_lock_bh(&tbl->lock);
175         nht = rcu_dereference_protected(tbl->nht,
176                                         lockdep_is_held(&tbl->lock));
177         for (i = 0; i < (1 << nht->hash_shift); i++) {
178                 struct neighbour *n;
179                 struct neighbour __rcu **np;
180
181                 np = &nht->hash_buckets[i];
182                 while ((n = rcu_dereference_protected(*np,
183                                         lockdep_is_held(&tbl->lock))) != NULL) {
184                         /* Neighbour record may be discarded if:
185                          * - nobody refers to it.
186                          * - it is not permanent
187                          */
188                         if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
189                                 shrunk = 1;
190                                 continue;
191                         }
192                         np = &n->next;
193                 }
194         }
195
196         tbl->last_flush = jiffies;
197
198         write_unlock_bh(&tbl->lock);
199
200         return shrunk;
201 }
202
203 static void neigh_add_timer(struct neighbour *n, unsigned long when)
204 {
205         neigh_hold(n);
206         if (unlikely(mod_timer(&n->timer, when))) {
207                 printk("NEIGH: BUG, double timer add, state is %x\n",
208                        n->nud_state);
209                 dump_stack();
210         }
211 }
212
213 static int neigh_del_timer(struct neighbour *n)
214 {
215         if ((n->nud_state & NUD_IN_TIMER) &&
216             del_timer(&n->timer)) {
217                 neigh_release(n);
218                 return 1;
219         }
220         return 0;
221 }
222
223 static void pneigh_queue_purge(struct sk_buff_head *list)
224 {
225         struct sk_buff *skb;
226
227         while ((skb = skb_dequeue(list)) != NULL) {
228                 dev_put(skb->dev);
229                 kfree_skb(skb);
230         }
231 }
232
233 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
234 {
235         int i;
236         struct neigh_hash_table *nht;
237
238         nht = rcu_dereference_protected(tbl->nht,
239                                         lockdep_is_held(&tbl->lock));
240
241         for (i = 0; i < (1 << nht->hash_shift); i++) {
242                 struct neighbour *n;
243                 struct neighbour __rcu **np = &nht->hash_buckets[i];
244
245                 while ((n = rcu_dereference_protected(*np,
246                                         lockdep_is_held(&tbl->lock))) != NULL) {
247                         if (dev && n->dev != dev) {
248                                 np = &n->next;
249                                 continue;
250                         }
251                         rcu_assign_pointer(*np,
252                                    rcu_dereference_protected(n->next,
253                                                 lockdep_is_held(&tbl->lock)));
254                         write_lock(&n->lock);
255                         neigh_del_timer(n);
256                         n->dead = 1;
257
258                         if (refcount_read(&n->refcnt) != 1) {
259                                 /* The most unpleasant situation.
260                                    We must destroy neighbour entry,
261                                    but someone still uses it.
262
263                                    The destroy will be delayed until
264                                    the last user releases us, but
265                                    we must kill timers etc. and move
266                                    it to safe state.
267                                  */
268                                 __skb_queue_purge(&n->arp_queue);
269                                 n->arp_queue_len_bytes = 0;
270                                 n->output = neigh_blackhole;
271                                 if (n->nud_state & NUD_VALID)
272                                         n->nud_state = NUD_NOARP;
273                                 else
274                                         n->nud_state = NUD_NONE;
275                                 neigh_dbg(2, "neigh %p is stray\n", n);
276                         }
277                         write_unlock(&n->lock);
278                         neigh_cleanup_and_release(n);
279                 }
280         }
281 }
282
283 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
284 {
285         write_lock_bh(&tbl->lock);
286         neigh_flush_dev(tbl, dev);
287         write_unlock_bh(&tbl->lock);
288 }
289 EXPORT_SYMBOL(neigh_changeaddr);
290
291 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
292 {
293         write_lock_bh(&tbl->lock);
294         neigh_flush_dev(tbl, dev);
295         pneigh_ifdown_and_unlock(tbl, dev);
296
297         del_timer_sync(&tbl->proxy_timer);
298         pneigh_queue_purge(&tbl->proxy_queue);
299         return 0;
300 }
301 EXPORT_SYMBOL(neigh_ifdown);
302
303 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
304 {
305         struct neighbour *n = NULL;
306         unsigned long now = jiffies;
307         int entries;
308
309         entries = atomic_inc_return(&tbl->entries) - 1;
310         if (entries >= tbl->gc_thresh3 ||
311             (entries >= tbl->gc_thresh2 &&
312              time_after(now, tbl->last_flush + 5 * HZ))) {
313                 if (!neigh_forced_gc(tbl) &&
314                     entries >= tbl->gc_thresh3) {
315                         net_info_ratelimited("%s: neighbor table overflow!\n",
316                                              tbl->id);
317                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
318                         goto out_entries;
319                 }
320         }
321
322         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
323         if (!n)
324                 goto out_entries;
325
326         __skb_queue_head_init(&n->arp_queue);
327         rwlock_init(&n->lock);
328         seqlock_init(&n->ha_lock);
329         n->updated        = n->used = now;
330         n->nud_state      = NUD_NONE;
331         n->output         = neigh_blackhole;
332         seqlock_init(&n->hh.hh_lock);
333         n->parms          = neigh_parms_clone(&tbl->parms);
334         timer_setup(&n->timer, neigh_timer_handler, 0);
335
336         NEIGH_CACHE_STAT_INC(tbl, allocs);
337         n->tbl            = tbl;
338         refcount_set(&n->refcnt, 1);
339         n->dead           = 1;
340 out:
341         return n;
342
343 out_entries:
344         atomic_dec(&tbl->entries);
345         goto out;
346 }
347
348 static void neigh_get_hash_rnd(u32 *x)
349 {
350         *x = get_random_u32() | 1;
351 }
352
353 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
354 {
355         size_t size = (1 << shift) * sizeof(struct neighbour *);
356         struct neigh_hash_table *ret;
357         struct neighbour __rcu **buckets;
358         int i;
359
360         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
361         if (!ret)
362                 return NULL;
363         if (size <= PAGE_SIZE)
364                 buckets = kzalloc(size, GFP_ATOMIC);
365         else
366                 buckets = (struct neighbour __rcu **)
367                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
368                                            get_order(size));
369         if (!buckets) {
370                 kfree(ret);
371                 return NULL;
372         }
373         ret->hash_buckets = buckets;
374         ret->hash_shift = shift;
375         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
376                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
377         return ret;
378 }
379
380 static void neigh_hash_free_rcu(struct rcu_head *head)
381 {
382         struct neigh_hash_table *nht = container_of(head,
383                                                     struct neigh_hash_table,
384                                                     rcu);
385         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
386         struct neighbour __rcu **buckets = nht->hash_buckets;
387
388         if (size <= PAGE_SIZE)
389                 kfree(buckets);
390         else
391                 free_pages((unsigned long)buckets, get_order(size));
392         kfree(nht);
393 }
394
395 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
396                                                 unsigned long new_shift)
397 {
398         unsigned int i, hash;
399         struct neigh_hash_table *new_nht, *old_nht;
400
401         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
402
403         old_nht = rcu_dereference_protected(tbl->nht,
404                                             lockdep_is_held(&tbl->lock));
405         new_nht = neigh_hash_alloc(new_shift);
406         if (!new_nht)
407                 return old_nht;
408
409         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
410                 struct neighbour *n, *next;
411
412                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
413                                                    lockdep_is_held(&tbl->lock));
414                      n != NULL;
415                      n = next) {
416                         hash = tbl->hash(n->primary_key, n->dev,
417                                          new_nht->hash_rnd);
418
419                         hash >>= (32 - new_nht->hash_shift);
420                         next = rcu_dereference_protected(n->next,
421                                                 lockdep_is_held(&tbl->lock));
422
423                         rcu_assign_pointer(n->next,
424                                            rcu_dereference_protected(
425                                                 new_nht->hash_buckets[hash],
426                                                 lockdep_is_held(&tbl->lock)));
427                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
428                 }
429         }
430
431         rcu_assign_pointer(tbl->nht, new_nht);
432         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
433         return new_nht;
434 }
435
436 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
437                                struct net_device *dev)
438 {
439         struct neighbour *n;
440
441         NEIGH_CACHE_STAT_INC(tbl, lookups);
442
443         rcu_read_lock_bh();
444         n = __neigh_lookup_noref(tbl, pkey, dev);
445         if (n) {
446                 if (!refcount_inc_not_zero(&n->refcnt))
447                         n = NULL;
448                 NEIGH_CACHE_STAT_INC(tbl, hits);
449         }
450
451         rcu_read_unlock_bh();
452         return n;
453 }
454 EXPORT_SYMBOL(neigh_lookup);
455
456 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
457                                      const void *pkey)
458 {
459         struct neighbour *n;
460         unsigned int key_len = tbl->key_len;
461         u32 hash_val;
462         struct neigh_hash_table *nht;
463
464         NEIGH_CACHE_STAT_INC(tbl, lookups);
465
466         rcu_read_lock_bh();
467         nht = rcu_dereference_bh(tbl->nht);
468         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
469
470         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
471              n != NULL;
472              n = rcu_dereference_bh(n->next)) {
473                 if (!memcmp(n->primary_key, pkey, key_len) &&
474                     net_eq(dev_net(n->dev), net)) {
475                         if (!refcount_inc_not_zero(&n->refcnt))
476                                 n = NULL;
477                         NEIGH_CACHE_STAT_INC(tbl, hits);
478                         break;
479                 }
480         }
481
482         rcu_read_unlock_bh();
483         return n;
484 }
485 EXPORT_SYMBOL(neigh_lookup_nodev);
486
487 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
488                                  struct net_device *dev, bool want_ref)
489 {
490         u32 hash_val;
491         unsigned int key_len = tbl->key_len;
492         int error;
493         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
494         struct neigh_hash_table *nht;
495
496         if (!n) {
497                 rc = ERR_PTR(-ENOBUFS);
498                 goto out;
499         }
500
501         memcpy(n->primary_key, pkey, key_len);
502         n->dev = dev;
503         dev_hold(dev);
504
505         /* Protocol specific setup. */
506         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
507                 rc = ERR_PTR(error);
508                 goto out_neigh_release;
509         }
510
511         if (dev->netdev_ops->ndo_neigh_construct) {
512                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
513                 if (error < 0) {
514                         rc = ERR_PTR(error);
515                         goto out_neigh_release;
516                 }
517         }
518
519         /* Device specific setup. */
520         if (n->parms->neigh_setup &&
521             (error = n->parms->neigh_setup(n)) < 0) {
522                 rc = ERR_PTR(error);
523                 goto out_neigh_release;
524         }
525
526         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
527
528         write_lock_bh(&tbl->lock);
529         nht = rcu_dereference_protected(tbl->nht,
530                                         lockdep_is_held(&tbl->lock));
531
532         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
533                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
534
535         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
536
537         if (n->parms->dead) {
538                 rc = ERR_PTR(-EINVAL);
539                 goto out_tbl_unlock;
540         }
541
542         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
543                                             lockdep_is_held(&tbl->lock));
544              n1 != NULL;
545              n1 = rcu_dereference_protected(n1->next,
546                         lockdep_is_held(&tbl->lock))) {
547                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
548                         if (want_ref)
549                                 neigh_hold(n1);
550                         rc = n1;
551                         goto out_tbl_unlock;
552                 }
553         }
554
555         n->dead = 0;
556         if (want_ref)
557                 neigh_hold(n);
558         rcu_assign_pointer(n->next,
559                            rcu_dereference_protected(nht->hash_buckets[hash_val],
560                                                      lockdep_is_held(&tbl->lock)));
561         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
562         write_unlock_bh(&tbl->lock);
563         neigh_dbg(2, "neigh %p is created\n", n);
564         rc = n;
565 out:
566         return rc;
567 out_tbl_unlock:
568         write_unlock_bh(&tbl->lock);
569 out_neigh_release:
570         neigh_release(n);
571         goto out;
572 }
573 EXPORT_SYMBOL(__neigh_create);
574
575 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
576 {
577         u32 hash_val = *(u32 *)(pkey + key_len - 4);
578         hash_val ^= (hash_val >> 16);
579         hash_val ^= hash_val >> 8;
580         hash_val ^= hash_val >> 4;
581         hash_val &= PNEIGH_HASHMASK;
582         return hash_val;
583 }
584
585 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
586                                               struct net *net,
587                                               const void *pkey,
588                                               unsigned int key_len,
589                                               struct net_device *dev)
590 {
591         while (n) {
592                 if (!memcmp(n->key, pkey, key_len) &&
593                     net_eq(pneigh_net(n), net) &&
594                     (n->dev == dev || !n->dev))
595                         return n;
596                 n = n->next;
597         }
598         return NULL;
599 }
600
601 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
602                 struct net *net, const void *pkey, struct net_device *dev)
603 {
604         unsigned int key_len = tbl->key_len;
605         u32 hash_val = pneigh_hash(pkey, key_len);
606
607         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
608                                  net, pkey, key_len, dev);
609 }
610 EXPORT_SYMBOL_GPL(__pneigh_lookup);
611
612 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
613                                     struct net *net, const void *pkey,
614                                     struct net_device *dev, int creat)
615 {
616         struct pneigh_entry *n;
617         unsigned int key_len = tbl->key_len;
618         u32 hash_val = pneigh_hash(pkey, key_len);
619
620         read_lock_bh(&tbl->lock);
621         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
622                               net, pkey, key_len, dev);
623         read_unlock_bh(&tbl->lock);
624
625         if (n || !creat)
626                 goto out;
627
628         ASSERT_RTNL();
629
630         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
631         if (!n)
632                 goto out;
633
634         write_pnet(&n->net, net);
635         memcpy(n->key, pkey, key_len);
636         n->dev = dev;
637         if (dev)
638                 dev_hold(dev);
639
640         if (tbl->pconstructor && tbl->pconstructor(n)) {
641                 if (dev)
642                         dev_put(dev);
643                 kfree(n);
644                 n = NULL;
645                 goto out;
646         }
647
648         write_lock_bh(&tbl->lock);
649         n->next = tbl->phash_buckets[hash_val];
650         tbl->phash_buckets[hash_val] = n;
651         write_unlock_bh(&tbl->lock);
652 out:
653         return n;
654 }
655 EXPORT_SYMBOL(pneigh_lookup);
656
657
658 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
659                   struct net_device *dev)
660 {
661         struct pneigh_entry *n, **np;
662         unsigned int key_len = tbl->key_len;
663         u32 hash_val = pneigh_hash(pkey, key_len);
664
665         write_lock_bh(&tbl->lock);
666         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
667              np = &n->next) {
668                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
669                     net_eq(pneigh_net(n), net)) {
670                         *np = n->next;
671                         write_unlock_bh(&tbl->lock);
672                         if (tbl->pdestructor)
673                                 tbl->pdestructor(n);
674                         if (n->dev)
675                                 dev_put(n->dev);
676                         kfree(n);
677                         return 0;
678                 }
679         }
680         write_unlock_bh(&tbl->lock);
681         return -ENOENT;
682 }
683
684 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
685                                     struct net_device *dev)
686 {
687         struct pneigh_entry *n, **np, *freelist = NULL;
688         u32 h;
689
690         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
691                 np = &tbl->phash_buckets[h];
692                 while ((n = *np) != NULL) {
693                         if (!dev || n->dev == dev) {
694                                 *np = n->next;
695                                 n->next = freelist;
696                                 freelist = n;
697                                 continue;
698                         }
699                         np = &n->next;
700                 }
701         }
702         write_unlock_bh(&tbl->lock);
703         while ((n = freelist)) {
704                 freelist = n->next;
705                 n->next = NULL;
706                 if (tbl->pdestructor)
707                         tbl->pdestructor(n);
708                 if (n->dev)
709                         dev_put(n->dev);
710                 kfree(n);
711         }
712         return -ENOENT;
713 }
714
715 static void neigh_parms_destroy(struct neigh_parms *parms);
716
717 static inline void neigh_parms_put(struct neigh_parms *parms)
718 {
719         if (refcount_dec_and_test(&parms->refcnt))
720                 neigh_parms_destroy(parms);
721 }
722
723 /*
724  *      neighbour must already be out of the table;
725  *
726  */
727 void neigh_destroy(struct neighbour *neigh)
728 {
729         struct net_device *dev = neigh->dev;
730
731         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
732
733         if (!neigh->dead) {
734                 pr_warn("Destroying alive neighbour %p\n", neigh);
735                 dump_stack();
736                 return;
737         }
738
739         if (neigh_del_timer(neigh))
740                 pr_warn("Impossible event\n");
741
742         write_lock_bh(&neigh->lock);
743         __skb_queue_purge(&neigh->arp_queue);
744         write_unlock_bh(&neigh->lock);
745         neigh->arp_queue_len_bytes = 0;
746
747         if (dev->netdev_ops->ndo_neigh_destroy)
748                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
749
750         dev_put(dev);
751         neigh_parms_put(neigh->parms);
752
753         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
754
755         atomic_dec(&neigh->tbl->entries);
756         kfree_rcu(neigh, rcu);
757 }
758 EXPORT_SYMBOL(neigh_destroy);
759
760 /* Neighbour state is suspicious;
761    disable fast path.
762
763    Called with write_locked neigh.
764  */
765 static void neigh_suspect(struct neighbour *neigh)
766 {
767         neigh_dbg(2, "neigh %p is suspected\n", neigh);
768
769         neigh->output = neigh->ops->output;
770 }
771
772 /* Neighbour state is OK;
773    enable fast path.
774
775    Called with write_locked neigh.
776  */
777 static void neigh_connect(struct neighbour *neigh)
778 {
779         neigh_dbg(2, "neigh %p is connected\n", neigh);
780
781         neigh->output = neigh->ops->connected_output;
782 }
783
784 static void neigh_periodic_work(struct work_struct *work)
785 {
786         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
787         struct neighbour *n;
788         struct neighbour __rcu **np;
789         unsigned int i;
790         struct neigh_hash_table *nht;
791
792         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
793
794         write_lock_bh(&tbl->lock);
795         nht = rcu_dereference_protected(tbl->nht,
796                                         lockdep_is_held(&tbl->lock));
797
798         /*
799          *      periodically recompute ReachableTime from random function
800          */
801
802         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
803                 struct neigh_parms *p;
804                 tbl->last_rand = jiffies;
805                 list_for_each_entry(p, &tbl->parms_list, list)
806                         p->reachable_time =
807                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
808         }
809
810         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
811                 goto out;
812
813         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
814                 np = &nht->hash_buckets[i];
815
816                 while ((n = rcu_dereference_protected(*np,
817                                 lockdep_is_held(&tbl->lock))) != NULL) {
818                         unsigned int state;
819
820                         write_lock(&n->lock);
821
822                         state = n->nud_state;
823                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
824                             (n->flags & NTF_EXT_LEARNED)) {
825                                 write_unlock(&n->lock);
826                                 goto next_elt;
827                         }
828
829                         if (time_before(n->used, n->confirmed))
830                                 n->used = n->confirmed;
831
832                         if (refcount_read(&n->refcnt) == 1 &&
833                             (state == NUD_FAILED ||
834                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
835                                 *np = n->next;
836                                 n->dead = 1;
837                                 write_unlock(&n->lock);
838                                 neigh_cleanup_and_release(n);
839                                 continue;
840                         }
841                         write_unlock(&n->lock);
842
843 next_elt:
844                         np = &n->next;
845                 }
846                 /*
847                  * It's fine to release lock here, even if hash table
848                  * grows while we are preempted.
849                  */
850                 write_unlock_bh(&tbl->lock);
851                 cond_resched();
852                 write_lock_bh(&tbl->lock);
853                 nht = rcu_dereference_protected(tbl->nht,
854                                                 lockdep_is_held(&tbl->lock));
855         }
856 out:
857         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
858          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
859          * BASE_REACHABLE_TIME.
860          */
861         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
862                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
863         write_unlock_bh(&tbl->lock);
864 }
865
866 static __inline__ int neigh_max_probes(struct neighbour *n)
867 {
868         struct neigh_parms *p = n->parms;
869         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
870                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
871                 NEIGH_VAR(p, MCAST_PROBES));
872 }
873
874 static void neigh_invalidate(struct neighbour *neigh)
875         __releases(neigh->lock)
876         __acquires(neigh->lock)
877 {
878         struct sk_buff *skb;
879
880         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
881         neigh_dbg(2, "neigh %p is failed\n", neigh);
882         neigh->updated = jiffies;
883
884         /* It is very thin place. report_unreachable is very complicated
885            routine. Particularly, it can hit the same neighbour entry!
886
887            So that, we try to be accurate and avoid dead loop. --ANK
888          */
889         while (neigh->nud_state == NUD_FAILED &&
890                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
891                 write_unlock(&neigh->lock);
892                 neigh->ops->error_report(neigh, skb);
893                 write_lock(&neigh->lock);
894         }
895         __skb_queue_purge(&neigh->arp_queue);
896         neigh->arp_queue_len_bytes = 0;
897 }
898
899 static void neigh_probe(struct neighbour *neigh)
900         __releases(neigh->lock)
901 {
902         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
903         /* keep skb alive even if arp_queue overflows */
904         if (skb)
905                 skb = skb_clone(skb, GFP_ATOMIC);
906         write_unlock(&neigh->lock);
907         if (neigh->ops->solicit)
908                 neigh->ops->solicit(neigh, skb);
909         atomic_inc(&neigh->probes);
910         kfree_skb(skb);
911 }
912
913 /* Called when a timer expires for a neighbour entry. */
914
915 static void neigh_timer_handler(struct timer_list *t)
916 {
917         unsigned long now, next;
918         struct neighbour *neigh = from_timer(neigh, t, timer);
919         unsigned int state;
920         int notify = 0;
921
922         write_lock(&neigh->lock);
923
924         state = neigh->nud_state;
925         now = jiffies;
926         next = now + HZ;
927
928         if (!(state & NUD_IN_TIMER))
929                 goto out;
930
931         if (state & NUD_REACHABLE) {
932                 if (time_before_eq(now,
933                                    neigh->confirmed + neigh->parms->reachable_time)) {
934                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
935                         next = neigh->confirmed + neigh->parms->reachable_time;
936                 } else if (time_before_eq(now,
937                                           neigh->used +
938                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
939                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
940                         neigh->nud_state = NUD_DELAY;
941                         neigh->updated = jiffies;
942                         neigh_suspect(neigh);
943                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
944                 } else {
945                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
946                         neigh->nud_state = NUD_STALE;
947                         neigh->updated = jiffies;
948                         neigh_suspect(neigh);
949                         notify = 1;
950                 }
951         } else if (state & NUD_DELAY) {
952                 if (time_before_eq(now,
953                                    neigh->confirmed +
954                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
955                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
956                         neigh->nud_state = NUD_REACHABLE;
957                         neigh->updated = jiffies;
958                         neigh_connect(neigh);
959                         notify = 1;
960                         next = neigh->confirmed + neigh->parms->reachable_time;
961                 } else {
962                         neigh_dbg(2, "neigh %p is probed\n", neigh);
963                         neigh->nud_state = NUD_PROBE;
964                         neigh->updated = jiffies;
965                         atomic_set(&neigh->probes, 0);
966                         notify = 1;
967                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
968                 }
969         } else {
970                 /* NUD_PROBE|NUD_INCOMPLETE */
971                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
972         }
973
974         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
975             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
976                 neigh->nud_state = NUD_FAILED;
977                 notify = 1;
978                 neigh_invalidate(neigh);
979                 goto out;
980         }
981
982         if (neigh->nud_state & NUD_IN_TIMER) {
983                 if (time_before(next, jiffies + HZ/2))
984                         next = jiffies + HZ/2;
985                 if (!mod_timer(&neigh->timer, next))
986                         neigh_hold(neigh);
987         }
988         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
989                 neigh_probe(neigh);
990         } else {
991 out:
992                 write_unlock(&neigh->lock);
993         }
994
995         if (notify)
996                 neigh_update_notify(neigh, 0);
997
998         neigh_release(neigh);
999 }
1000
1001 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1002 {
1003         int rc;
1004         bool immediate_probe = false;
1005
1006         write_lock_bh(&neigh->lock);
1007
1008         rc = 0;
1009         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1010                 goto out_unlock_bh;
1011         if (neigh->dead)
1012                 goto out_dead;
1013
1014         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1015                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1016                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1017                         unsigned long next, now = jiffies;
1018
1019                         atomic_set(&neigh->probes,
1020                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1021                         neigh->nud_state     = NUD_INCOMPLETE;
1022                         neigh->updated = now;
1023                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1024                                          HZ/2);
1025                         neigh_add_timer(neigh, next);
1026                         immediate_probe = true;
1027                 } else {
1028                         neigh->nud_state = NUD_FAILED;
1029                         neigh->updated = jiffies;
1030                         write_unlock_bh(&neigh->lock);
1031
1032                         kfree_skb(skb);
1033                         return 1;
1034                 }
1035         } else if (neigh->nud_state & NUD_STALE) {
1036                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1037                 neigh->nud_state = NUD_DELAY;
1038                 neigh->updated = jiffies;
1039                 neigh_add_timer(neigh, jiffies +
1040                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1041         }
1042
1043         if (neigh->nud_state == NUD_INCOMPLETE) {
1044                 if (skb) {
1045                         while (neigh->arp_queue_len_bytes + skb->truesize >
1046                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1047                                 struct sk_buff *buff;
1048
1049                                 buff = __skb_dequeue(&neigh->arp_queue);
1050                                 if (!buff)
1051                                         break;
1052                                 neigh->arp_queue_len_bytes -= buff->truesize;
1053                                 kfree_skb(buff);
1054                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1055                         }
1056                         skb_dst_force(skb);
1057                         __skb_queue_tail(&neigh->arp_queue, skb);
1058                         neigh->arp_queue_len_bytes += skb->truesize;
1059                 }
1060                 rc = 1;
1061         }
1062 out_unlock_bh:
1063         if (immediate_probe)
1064                 neigh_probe(neigh);
1065         else
1066                 write_unlock(&neigh->lock);
1067         local_bh_enable();
1068         return rc;
1069
1070 out_dead:
1071         if (neigh->nud_state & NUD_STALE)
1072                 goto out_unlock_bh;
1073         write_unlock_bh(&neigh->lock);
1074         kfree_skb(skb);
1075         return 1;
1076 }
1077 EXPORT_SYMBOL(__neigh_event_send);
1078
1079 static void neigh_update_hhs(struct neighbour *neigh)
1080 {
1081         struct hh_cache *hh;
1082         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1083                 = NULL;
1084
1085         if (neigh->dev->header_ops)
1086                 update = neigh->dev->header_ops->cache_update;
1087
1088         if (update) {
1089                 hh = &neigh->hh;
1090                 if (hh->hh_len) {
1091                         write_seqlock_bh(&hh->hh_lock);
1092                         update(hh, neigh->dev, neigh->ha);
1093                         write_sequnlock_bh(&hh->hh_lock);
1094                 }
1095         }
1096 }
1097
1098
1099
1100 /* Generic update routine.
1101    -- lladdr is new lladdr or NULL, if it is not supplied.
1102    -- new    is new state.
1103    -- flags
1104         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1105                                 if it is different.
1106         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1107                                 lladdr instead of overriding it
1108                                 if it is different.
1109         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1110
1111         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1112                                 NTF_ROUTER flag.
1113         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1114                                 a router.
1115
1116    Caller MUST hold reference count on the entry.
1117  */
1118
1119 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1120                  u32 flags, u32 nlmsg_pid)
1121 {
1122         u8 old;
1123         int err;
1124         int notify = 0;
1125         struct net_device *dev;
1126         int update_isrouter = 0;
1127
1128         write_lock_bh(&neigh->lock);
1129
1130         dev    = neigh->dev;
1131         old    = neigh->nud_state;
1132         err    = -EPERM;
1133
1134         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1135             (old & (NUD_NOARP | NUD_PERMANENT)))
1136                 goto out;
1137         if (neigh->dead)
1138                 goto out;
1139
1140         neigh_update_ext_learned(neigh, flags, &notify);
1141
1142         if (!(new & NUD_VALID)) {
1143                 neigh_del_timer(neigh);
1144                 if (old & NUD_CONNECTED)
1145                         neigh_suspect(neigh);
1146                 neigh->nud_state = new;
1147                 err = 0;
1148                 notify = old & NUD_VALID;
1149                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1150                     (new & NUD_FAILED)) {
1151                         neigh_invalidate(neigh);
1152                         notify = 1;
1153                 }
1154                 goto out;
1155         }
1156
1157         /* Compare new lladdr with cached one */
1158         if (!dev->addr_len) {
1159                 /* First case: device needs no address. */
1160                 lladdr = neigh->ha;
1161         } else if (lladdr) {
1162                 /* The second case: if something is already cached
1163                    and a new address is proposed:
1164                    - compare new & old
1165                    - if they are different, check override flag
1166                  */
1167                 if ((old & NUD_VALID) &&
1168                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1169                         lladdr = neigh->ha;
1170         } else {
1171                 /* No address is supplied; if we know something,
1172                    use it, otherwise discard the request.
1173                  */
1174                 err = -EINVAL;
1175                 if (!(old & NUD_VALID))
1176                         goto out;
1177                 lladdr = neigh->ha;
1178         }
1179
1180         /* If entry was valid and address is not changed,
1181            do not change entry state, if new one is STALE.
1182          */
1183         err = 0;
1184         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1185         if (old & NUD_VALID) {
1186                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1187                         update_isrouter = 0;
1188                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1189                             (old & NUD_CONNECTED)) {
1190                                 lladdr = neigh->ha;
1191                                 new = NUD_STALE;
1192                         } else
1193                                 goto out;
1194                 } else {
1195                         if (lladdr == neigh->ha && new == NUD_STALE &&
1196                             !(flags & NEIGH_UPDATE_F_ADMIN))
1197                                 new = old;
1198                 }
1199         }
1200
1201         /* Update timestamps only once we know we will make a change to the
1202          * neighbour entry. Otherwise we risk to move the locktime window with
1203          * noop updates and ignore relevant ARP updates.
1204          */
1205         if (new != old || lladdr != neigh->ha) {
1206                 if (new & NUD_CONNECTED)
1207                         neigh->confirmed = jiffies;
1208                 neigh->updated = jiffies;
1209         }
1210
1211         if (new != old) {
1212                 neigh_del_timer(neigh);
1213                 if (new & NUD_PROBE)
1214                         atomic_set(&neigh->probes, 0);
1215                 if (new & NUD_IN_TIMER)
1216                         neigh_add_timer(neigh, (jiffies +
1217                                                 ((new & NUD_REACHABLE) ?
1218                                                  neigh->parms->reachable_time :
1219                                                  0)));
1220                 neigh->nud_state = new;
1221                 notify = 1;
1222         }
1223
1224         if (lladdr != neigh->ha) {
1225                 write_seqlock(&neigh->ha_lock);
1226                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1227                 write_sequnlock(&neigh->ha_lock);
1228                 neigh_update_hhs(neigh);
1229                 if (!(new & NUD_CONNECTED))
1230                         neigh->confirmed = jiffies -
1231                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1232                 notify = 1;
1233         }
1234         if (new == old)
1235                 goto out;
1236         if (new & NUD_CONNECTED)
1237                 neigh_connect(neigh);
1238         else
1239                 neigh_suspect(neigh);
1240         if (!(old & NUD_VALID)) {
1241                 struct sk_buff *skb;
1242
1243                 /* Again: avoid dead loop if something went wrong */
1244
1245                 while (neigh->nud_state & NUD_VALID &&
1246                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1247                         struct dst_entry *dst = skb_dst(skb);
1248                         struct neighbour *n2, *n1 = neigh;
1249                         write_unlock_bh(&neigh->lock);
1250
1251                         rcu_read_lock();
1252
1253                         /* Why not just use 'neigh' as-is?  The problem is that
1254                          * things such as shaper, eql, and sch_teql can end up
1255                          * using alternative, different, neigh objects to output
1256                          * the packet in the output path.  So what we need to do
1257                          * here is re-lookup the top-level neigh in the path so
1258                          * we can reinject the packet there.
1259                          */
1260                         n2 = NULL;
1261                         if (dst) {
1262                                 n2 = dst_neigh_lookup_skb(dst, skb);
1263                                 if (n2)
1264                                         n1 = n2;
1265                         }
1266                         n1->output(n1, skb);
1267                         if (n2)
1268                                 neigh_release(n2);
1269                         rcu_read_unlock();
1270
1271                         write_lock_bh(&neigh->lock);
1272                 }
1273                 __skb_queue_purge(&neigh->arp_queue);
1274                 neigh->arp_queue_len_bytes = 0;
1275         }
1276 out:
1277         if (update_isrouter) {
1278                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1279                         (neigh->flags | NTF_ROUTER) :
1280                         (neigh->flags & ~NTF_ROUTER);
1281         }
1282         write_unlock_bh(&neigh->lock);
1283
1284         if (notify)
1285                 neigh_update_notify(neigh, nlmsg_pid);
1286
1287         return err;
1288 }
1289 EXPORT_SYMBOL(neigh_update);
1290
1291 /* Update the neigh to listen temporarily for probe responses, even if it is
1292  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1293  */
1294 void __neigh_set_probe_once(struct neighbour *neigh)
1295 {
1296         if (neigh->dead)
1297                 return;
1298         neigh->updated = jiffies;
1299         if (!(neigh->nud_state & NUD_FAILED))
1300                 return;
1301         neigh->nud_state = NUD_INCOMPLETE;
1302         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1303         neigh_add_timer(neigh,
1304                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1305 }
1306 EXPORT_SYMBOL(__neigh_set_probe_once);
1307
1308 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1309                                  u8 *lladdr, void *saddr,
1310                                  struct net_device *dev)
1311 {
1312         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1313                                                  lladdr || !dev->addr_len);
1314         if (neigh)
1315                 neigh_update(neigh, lladdr, NUD_STALE,
1316                              NEIGH_UPDATE_F_OVERRIDE, 0);
1317         return neigh;
1318 }
1319 EXPORT_SYMBOL(neigh_event_ns);
1320
1321 /* called with read_lock_bh(&n->lock); */
1322 static void neigh_hh_init(struct neighbour *n)
1323 {
1324         struct net_device *dev = n->dev;
1325         __be16 prot = n->tbl->protocol;
1326         struct hh_cache *hh = &n->hh;
1327
1328         write_lock_bh(&n->lock);
1329
1330         /* Only one thread can come in here and initialize the
1331          * hh_cache entry.
1332          */
1333         if (!hh->hh_len)
1334                 dev->header_ops->cache(n, hh, prot);
1335
1336         write_unlock_bh(&n->lock);
1337 }
1338
1339 /* Slow and careful. */
1340
1341 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1342 {
1343         int rc = 0;
1344
1345         if (!neigh_event_send(neigh, skb)) {
1346                 int err;
1347                 struct net_device *dev = neigh->dev;
1348                 unsigned int seq;
1349
1350                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1351                         neigh_hh_init(neigh);
1352
1353                 do {
1354                         __skb_pull(skb, skb_network_offset(skb));
1355                         seq = read_seqbegin(&neigh->ha_lock);
1356                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1357                                               neigh->ha, NULL, skb->len);
1358                 } while (read_seqretry(&neigh->ha_lock, seq));
1359
1360                 if (err >= 0)
1361                         rc = dev_queue_xmit(skb);
1362                 else
1363                         goto out_kfree_skb;
1364         }
1365 out:
1366         return rc;
1367 out_kfree_skb:
1368         rc = -EINVAL;
1369         kfree_skb(skb);
1370         goto out;
1371 }
1372 EXPORT_SYMBOL(neigh_resolve_output);
1373
1374 /* As fast as possible without hh cache */
1375
1376 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1377 {
1378         struct net_device *dev = neigh->dev;
1379         unsigned int seq;
1380         int err;
1381
1382         do {
1383                 __skb_pull(skb, skb_network_offset(skb));
1384                 seq = read_seqbegin(&neigh->ha_lock);
1385                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1386                                       neigh->ha, NULL, skb->len);
1387         } while (read_seqretry(&neigh->ha_lock, seq));
1388
1389         if (err >= 0)
1390                 err = dev_queue_xmit(skb);
1391         else {
1392                 err = -EINVAL;
1393                 kfree_skb(skb);
1394         }
1395         return err;
1396 }
1397 EXPORT_SYMBOL(neigh_connected_output);
1398
1399 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1400 {
1401         return dev_queue_xmit(skb);
1402 }
1403 EXPORT_SYMBOL(neigh_direct_output);
1404
1405 static void neigh_proxy_process(struct timer_list *t)
1406 {
1407         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1408         long sched_next = 0;
1409         unsigned long now = jiffies;
1410         struct sk_buff *skb, *n;
1411
1412         spin_lock(&tbl->proxy_queue.lock);
1413
1414         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1415                 long tdif = NEIGH_CB(skb)->sched_next - now;
1416
1417                 if (tdif <= 0) {
1418                         struct net_device *dev = skb->dev;
1419
1420                         __skb_unlink(skb, &tbl->proxy_queue);
1421                         if (tbl->proxy_redo && netif_running(dev)) {
1422                                 rcu_read_lock();
1423                                 tbl->proxy_redo(skb);
1424                                 rcu_read_unlock();
1425                         } else {
1426                                 kfree_skb(skb);
1427                         }
1428
1429                         dev_put(dev);
1430                 } else if (!sched_next || tdif < sched_next)
1431                         sched_next = tdif;
1432         }
1433         del_timer(&tbl->proxy_timer);
1434         if (sched_next)
1435                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1436         spin_unlock(&tbl->proxy_queue.lock);
1437 }
1438
1439 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1440                     struct sk_buff *skb)
1441 {
1442         unsigned long now = jiffies;
1443
1444         unsigned long sched_next = now + (prandom_u32() %
1445                                           NEIGH_VAR(p, PROXY_DELAY));
1446
1447         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1448                 kfree_skb(skb);
1449                 return;
1450         }
1451
1452         NEIGH_CB(skb)->sched_next = sched_next;
1453         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1454
1455         spin_lock(&tbl->proxy_queue.lock);
1456         if (del_timer(&tbl->proxy_timer)) {
1457                 if (time_before(tbl->proxy_timer.expires, sched_next))
1458                         sched_next = tbl->proxy_timer.expires;
1459         }
1460         skb_dst_drop(skb);
1461         dev_hold(skb->dev);
1462         __skb_queue_tail(&tbl->proxy_queue, skb);
1463         mod_timer(&tbl->proxy_timer, sched_next);
1464         spin_unlock(&tbl->proxy_queue.lock);
1465 }
1466 EXPORT_SYMBOL(pneigh_enqueue);
1467
1468 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1469                                                       struct net *net, int ifindex)
1470 {
1471         struct neigh_parms *p;
1472
1473         list_for_each_entry(p, &tbl->parms_list, list) {
1474                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1475                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1476                         return p;
1477         }
1478
1479         return NULL;
1480 }
1481
1482 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1483                                       struct neigh_table *tbl)
1484 {
1485         struct neigh_parms *p;
1486         struct net *net = dev_net(dev);
1487         const struct net_device_ops *ops = dev->netdev_ops;
1488
1489         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1490         if (p) {
1491                 p->tbl            = tbl;
1492                 refcount_set(&p->refcnt, 1);
1493                 p->reachable_time =
1494                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1495                 dev_hold(dev);
1496                 p->dev = dev;
1497                 write_pnet(&p->net, net);
1498                 p->sysctl_table = NULL;
1499
1500                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1501                         dev_put(dev);
1502                         kfree(p);
1503                         return NULL;
1504                 }
1505
1506                 write_lock_bh(&tbl->lock);
1507                 list_add(&p->list, &tbl->parms.list);
1508                 write_unlock_bh(&tbl->lock);
1509
1510                 neigh_parms_data_state_cleanall(p);
1511         }
1512         return p;
1513 }
1514 EXPORT_SYMBOL(neigh_parms_alloc);
1515
1516 static void neigh_rcu_free_parms(struct rcu_head *head)
1517 {
1518         struct neigh_parms *parms =
1519                 container_of(head, struct neigh_parms, rcu_head);
1520
1521         neigh_parms_put(parms);
1522 }
1523
1524 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1525 {
1526         if (!parms || parms == &tbl->parms)
1527                 return;
1528         write_lock_bh(&tbl->lock);
1529         list_del(&parms->list);
1530         parms->dead = 1;
1531         write_unlock_bh(&tbl->lock);
1532         if (parms->dev)
1533                 dev_put(parms->dev);
1534         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1535 }
1536 EXPORT_SYMBOL(neigh_parms_release);
1537
1538 static void neigh_parms_destroy(struct neigh_parms *parms)
1539 {
1540         kfree(parms);
1541 }
1542
1543 static struct lock_class_key neigh_table_proxy_queue_class;
1544
1545 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1546
1547 void neigh_table_init(int index, struct neigh_table *tbl)
1548 {
1549         unsigned long now = jiffies;
1550         unsigned long phsize;
1551
1552         INIT_LIST_HEAD(&tbl->parms_list);
1553         list_add(&tbl->parms.list, &tbl->parms_list);
1554         write_pnet(&tbl->parms.net, &init_net);
1555         refcount_set(&tbl->parms.refcnt, 1);
1556         tbl->parms.reachable_time =
1557                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1558
1559         tbl->stats = alloc_percpu(struct neigh_statistics);
1560         if (!tbl->stats)
1561                 panic("cannot create neighbour cache statistics");
1562
1563 #ifdef CONFIG_PROC_FS
1564         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1565                               &neigh_stat_seq_ops, tbl))
1566                 panic("cannot create neighbour proc dir entry");
1567 #endif
1568
1569         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1570
1571         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1572         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1573
1574         if (!tbl->nht || !tbl->phash_buckets)
1575                 panic("cannot allocate neighbour cache hashes");
1576
1577         if (!tbl->entry_size)
1578                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1579                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1580         else
1581                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1582
1583         rwlock_init(&tbl->lock);
1584         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1585         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1586                         tbl->parms.reachable_time);
1587         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1588         skb_queue_head_init_class(&tbl->proxy_queue,
1589                         &neigh_table_proxy_queue_class);
1590
1591         tbl->last_flush = now;
1592         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1593
1594         neigh_tables[index] = tbl;
1595 }
1596 EXPORT_SYMBOL(neigh_table_init);
1597
1598 int neigh_table_clear(int index, struct neigh_table *tbl)
1599 {
1600         neigh_tables[index] = NULL;
1601         /* It is not clean... Fix it to unload IPv6 module safely */
1602         cancel_delayed_work_sync(&tbl->gc_work);
1603         del_timer_sync(&tbl->proxy_timer);
1604         pneigh_queue_purge(&tbl->proxy_queue);
1605         neigh_ifdown(tbl, NULL);
1606         if (atomic_read(&tbl->entries))
1607                 pr_crit("neighbour leakage\n");
1608
1609         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1610                  neigh_hash_free_rcu);
1611         tbl->nht = NULL;
1612
1613         kfree(tbl->phash_buckets);
1614         tbl->phash_buckets = NULL;
1615
1616         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1617
1618         free_percpu(tbl->stats);
1619         tbl->stats = NULL;
1620
1621         return 0;
1622 }
1623 EXPORT_SYMBOL(neigh_table_clear);
1624
1625 static struct neigh_table *neigh_find_table(int family)
1626 {
1627         struct neigh_table *tbl = NULL;
1628
1629         switch (family) {
1630         case AF_INET:
1631                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1632                 break;
1633         case AF_INET6:
1634                 tbl = neigh_tables[NEIGH_ND_TABLE];
1635                 break;
1636         case AF_DECnet:
1637                 tbl = neigh_tables[NEIGH_DN_TABLE];
1638                 break;
1639         }
1640
1641         return tbl;
1642 }
1643
1644 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1645                         struct netlink_ext_ack *extack)
1646 {
1647         struct net *net = sock_net(skb->sk);
1648         struct ndmsg *ndm;
1649         struct nlattr *dst_attr;
1650         struct neigh_table *tbl;
1651         struct neighbour *neigh;
1652         struct net_device *dev = NULL;
1653         int err = -EINVAL;
1654
1655         ASSERT_RTNL();
1656         if (nlmsg_len(nlh) < sizeof(*ndm))
1657                 goto out;
1658
1659         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1660         if (dst_attr == NULL)
1661                 goto out;
1662
1663         ndm = nlmsg_data(nlh);
1664         if (ndm->ndm_ifindex) {
1665                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1666                 if (dev == NULL) {
1667                         err = -ENODEV;
1668                         goto out;
1669                 }
1670         }
1671
1672         tbl = neigh_find_table(ndm->ndm_family);
1673         if (tbl == NULL)
1674                 return -EAFNOSUPPORT;
1675
1676         if (nla_len(dst_attr) < (int)tbl->key_len)
1677                 goto out;
1678
1679         if (ndm->ndm_flags & NTF_PROXY) {
1680                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1681                 goto out;
1682         }
1683
1684         if (dev == NULL)
1685                 goto out;
1686
1687         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1688         if (neigh == NULL) {
1689                 err = -ENOENT;
1690                 goto out;
1691         }
1692
1693         err = neigh_update(neigh, NULL, NUD_FAILED,
1694                            NEIGH_UPDATE_F_OVERRIDE |
1695                            NEIGH_UPDATE_F_ADMIN,
1696                            NETLINK_CB(skb).portid);
1697         write_lock_bh(&tbl->lock);
1698         neigh_release(neigh);
1699         neigh_remove_one(neigh, tbl);
1700         write_unlock_bh(&tbl->lock);
1701
1702 out:
1703         return err;
1704 }
1705
1706 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1707                      struct netlink_ext_ack *extack)
1708 {
1709         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1710         struct net *net = sock_net(skb->sk);
1711         struct ndmsg *ndm;
1712         struct nlattr *tb[NDA_MAX+1];
1713         struct neigh_table *tbl;
1714         struct net_device *dev = NULL;
1715         struct neighbour *neigh;
1716         void *dst, *lladdr;
1717         int err;
1718
1719         ASSERT_RTNL();
1720         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1721         if (err < 0)
1722                 goto out;
1723
1724         err = -EINVAL;
1725         if (tb[NDA_DST] == NULL)
1726                 goto out;
1727
1728         ndm = nlmsg_data(nlh);
1729         if (ndm->ndm_ifindex) {
1730                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1731                 if (dev == NULL) {
1732                         err = -ENODEV;
1733                         goto out;
1734                 }
1735
1736                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1737                         goto out;
1738         }
1739
1740         tbl = neigh_find_table(ndm->ndm_family);
1741         if (tbl == NULL)
1742                 return -EAFNOSUPPORT;
1743
1744         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1745                 goto out;
1746         dst = nla_data(tb[NDA_DST]);
1747         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1748
1749         if (ndm->ndm_flags & NTF_PROXY) {
1750                 struct pneigh_entry *pn;
1751
1752                 err = -ENOBUFS;
1753                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1754                 if (pn) {
1755                         pn->flags = ndm->ndm_flags;
1756                         err = 0;
1757                 }
1758                 goto out;
1759         }
1760
1761         if (dev == NULL)
1762                 goto out;
1763
1764         neigh = neigh_lookup(tbl, dst, dev);
1765         if (neigh == NULL) {
1766                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1767                         err = -ENOENT;
1768                         goto out;
1769                 }
1770
1771                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1772                 if (IS_ERR(neigh)) {
1773                         err = PTR_ERR(neigh);
1774                         goto out;
1775                 }
1776         } else {
1777                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1778                         err = -EEXIST;
1779                         neigh_release(neigh);
1780                         goto out;
1781                 }
1782
1783                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1784                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1785         }
1786
1787         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1788                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1789
1790         if (ndm->ndm_flags & NTF_USE) {
1791                 neigh_event_send(neigh, NULL);
1792                 err = 0;
1793         } else
1794                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1795                                    NETLINK_CB(skb).portid);
1796         neigh_release(neigh);
1797
1798 out:
1799         return err;
1800 }
1801
1802 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1803 {
1804         struct nlattr *nest;
1805
1806         nest = nla_nest_start(skb, NDTA_PARMS);
1807         if (nest == NULL)
1808                 return -ENOBUFS;
1809
1810         if ((parms->dev &&
1811              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1812             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1813             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1814                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1815             /* approximative value for deprecated QUEUE_LEN (in packets) */
1816             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1817                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1818             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1819             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1820             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1821                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1822             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1823                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1824             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1825                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1826             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1827                           NDTPA_PAD) ||
1828             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1829                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1830             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1831                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1832             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1833                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1834             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1835                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1836             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1837                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1838             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1839                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1840             nla_put_msecs(skb, NDTPA_LOCKTIME,
1841                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1842                 goto nla_put_failure;
1843         return nla_nest_end(skb, nest);
1844
1845 nla_put_failure:
1846         nla_nest_cancel(skb, nest);
1847         return -EMSGSIZE;
1848 }
1849
1850 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1851                               u32 pid, u32 seq, int type, int flags)
1852 {
1853         struct nlmsghdr *nlh;
1854         struct ndtmsg *ndtmsg;
1855
1856         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1857         if (nlh == NULL)
1858                 return -EMSGSIZE;
1859
1860         ndtmsg = nlmsg_data(nlh);
1861
1862         read_lock_bh(&tbl->lock);
1863         ndtmsg->ndtm_family = tbl->family;
1864         ndtmsg->ndtm_pad1   = 0;
1865         ndtmsg->ndtm_pad2   = 0;
1866
1867         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1868             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1869             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1870             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1871             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1872                 goto nla_put_failure;
1873         {
1874                 unsigned long now = jiffies;
1875                 unsigned int flush_delta = now - tbl->last_flush;
1876                 unsigned int rand_delta = now - tbl->last_rand;
1877                 struct neigh_hash_table *nht;
1878                 struct ndt_config ndc = {
1879                         .ndtc_key_len           = tbl->key_len,
1880                         .ndtc_entry_size        = tbl->entry_size,
1881                         .ndtc_entries           = atomic_read(&tbl->entries),
1882                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1883                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1884                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1885                 };
1886
1887                 rcu_read_lock_bh();
1888                 nht = rcu_dereference_bh(tbl->nht);
1889                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1890                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1891                 rcu_read_unlock_bh();
1892
1893                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1894                         goto nla_put_failure;
1895         }
1896
1897         {
1898                 int cpu;
1899                 struct ndt_stats ndst;
1900
1901                 memset(&ndst, 0, sizeof(ndst));
1902
1903                 for_each_possible_cpu(cpu) {
1904                         struct neigh_statistics *st;
1905
1906                         st = per_cpu_ptr(tbl->stats, cpu);
1907                         ndst.ndts_allocs                += st->allocs;
1908                         ndst.ndts_destroys              += st->destroys;
1909                         ndst.ndts_hash_grows            += st->hash_grows;
1910                         ndst.ndts_res_failed            += st->res_failed;
1911                         ndst.ndts_lookups               += st->lookups;
1912                         ndst.ndts_hits                  += st->hits;
1913                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1914                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1915                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1916                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1917                         ndst.ndts_table_fulls           += st->table_fulls;
1918                 }
1919
1920                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1921                                   NDTA_PAD))
1922                         goto nla_put_failure;
1923         }
1924
1925         BUG_ON(tbl->parms.dev);
1926         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1927                 goto nla_put_failure;
1928
1929         read_unlock_bh(&tbl->lock);
1930         nlmsg_end(skb, nlh);
1931         return 0;
1932
1933 nla_put_failure:
1934         read_unlock_bh(&tbl->lock);
1935         nlmsg_cancel(skb, nlh);
1936         return -EMSGSIZE;
1937 }
1938
1939 static int neightbl_fill_param_info(struct sk_buff *skb,
1940                                     struct neigh_table *tbl,
1941                                     struct neigh_parms *parms,
1942                                     u32 pid, u32 seq, int type,
1943                                     unsigned int flags)
1944 {
1945         struct ndtmsg *ndtmsg;
1946         struct nlmsghdr *nlh;
1947
1948         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1949         if (nlh == NULL)
1950                 return -EMSGSIZE;
1951
1952         ndtmsg = nlmsg_data(nlh);
1953
1954         read_lock_bh(&tbl->lock);
1955         ndtmsg->ndtm_family = tbl->family;
1956         ndtmsg->ndtm_pad1   = 0;
1957         ndtmsg->ndtm_pad2   = 0;
1958
1959         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1960             neightbl_fill_parms(skb, parms) < 0)
1961                 goto errout;
1962
1963         read_unlock_bh(&tbl->lock);
1964         nlmsg_end(skb, nlh);
1965         return 0;
1966 errout:
1967         read_unlock_bh(&tbl->lock);
1968         nlmsg_cancel(skb, nlh);
1969         return -EMSGSIZE;
1970 }
1971
1972 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1973         [NDTA_NAME]             = { .type = NLA_STRING },
1974         [NDTA_THRESH1]          = { .type = NLA_U32 },
1975         [NDTA_THRESH2]          = { .type = NLA_U32 },
1976         [NDTA_THRESH3]          = { .type = NLA_U32 },
1977         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1978         [NDTA_PARMS]            = { .type = NLA_NESTED },
1979 };
1980
1981 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1982         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1983         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1984         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1985         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1986         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1987         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1988         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1989         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1990         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1991         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1992         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1993         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1994         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1995         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1996 };
1997
1998 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
1999                         struct netlink_ext_ack *extack)
2000 {
2001         struct net *net = sock_net(skb->sk);
2002         struct neigh_table *tbl;
2003         struct ndtmsg *ndtmsg;
2004         struct nlattr *tb[NDTA_MAX+1];
2005         bool found = false;
2006         int err, tidx;
2007
2008         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2009                           nl_neightbl_policy, extack);
2010         if (err < 0)
2011                 goto errout;
2012
2013         if (tb[NDTA_NAME] == NULL) {
2014                 err = -EINVAL;
2015                 goto errout;
2016         }
2017
2018         ndtmsg = nlmsg_data(nlh);
2019
2020         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2021                 tbl = neigh_tables[tidx];
2022                 if (!tbl)
2023                         continue;
2024                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2025                         continue;
2026                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2027                         found = true;
2028                         break;
2029                 }
2030         }
2031
2032         if (!found)
2033                 return -ENOENT;
2034
2035         /*
2036          * We acquire tbl->lock to be nice to the periodic timers and
2037          * make sure they always see a consistent set of values.
2038          */
2039         write_lock_bh(&tbl->lock);
2040
2041         if (tb[NDTA_PARMS]) {
2042                 struct nlattr *tbp[NDTPA_MAX+1];
2043                 struct neigh_parms *p;
2044                 int i, ifindex = 0;
2045
2046                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2047                                        nl_ntbl_parm_policy, extack);
2048                 if (err < 0)
2049                         goto errout_tbl_lock;
2050
2051                 if (tbp[NDTPA_IFINDEX])
2052                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2053
2054                 p = lookup_neigh_parms(tbl, net, ifindex);
2055                 if (p == NULL) {
2056                         err = -ENOENT;
2057                         goto errout_tbl_lock;
2058                 }
2059
2060                 for (i = 1; i <= NDTPA_MAX; i++) {
2061                         if (tbp[i] == NULL)
2062                                 continue;
2063
2064                         switch (i) {
2065                         case NDTPA_QUEUE_LEN:
2066                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2067                                               nla_get_u32(tbp[i]) *
2068                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2069                                 break;
2070                         case NDTPA_QUEUE_LENBYTES:
2071                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2072                                               nla_get_u32(tbp[i]));
2073                                 break;
2074                         case NDTPA_PROXY_QLEN:
2075                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2076                                               nla_get_u32(tbp[i]));
2077                                 break;
2078                         case NDTPA_APP_PROBES:
2079                                 NEIGH_VAR_SET(p, APP_PROBES,
2080                                               nla_get_u32(tbp[i]));
2081                                 break;
2082                         case NDTPA_UCAST_PROBES:
2083                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2084                                               nla_get_u32(tbp[i]));
2085                                 break;
2086                         case NDTPA_MCAST_PROBES:
2087                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2088                                               nla_get_u32(tbp[i]));
2089                                 break;
2090                         case NDTPA_MCAST_REPROBES:
2091                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2092                                               nla_get_u32(tbp[i]));
2093                                 break;
2094                         case NDTPA_BASE_REACHABLE_TIME:
2095                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2096                                               nla_get_msecs(tbp[i]));
2097                                 /* update reachable_time as well, otherwise, the change will
2098                                  * only be effective after the next time neigh_periodic_work
2099                                  * decides to recompute it (can be multiple minutes)
2100                                  */
2101                                 p->reachable_time =
2102                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2103                                 break;
2104                         case NDTPA_GC_STALETIME:
2105                                 NEIGH_VAR_SET(p, GC_STALETIME,
2106                                               nla_get_msecs(tbp[i]));
2107                                 break;
2108                         case NDTPA_DELAY_PROBE_TIME:
2109                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2110                                               nla_get_msecs(tbp[i]));
2111                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2112                                 break;
2113                         case NDTPA_RETRANS_TIME:
2114                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2115                                               nla_get_msecs(tbp[i]));
2116                                 break;
2117                         case NDTPA_ANYCAST_DELAY:
2118                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2119                                               nla_get_msecs(tbp[i]));
2120                                 break;
2121                         case NDTPA_PROXY_DELAY:
2122                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2123                                               nla_get_msecs(tbp[i]));
2124                                 break;
2125                         case NDTPA_LOCKTIME:
2126                                 NEIGH_VAR_SET(p, LOCKTIME,
2127                                               nla_get_msecs(tbp[i]));
2128                                 break;
2129                         }
2130                 }
2131         }
2132
2133         err = -ENOENT;
2134         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2135              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2136             !net_eq(net, &init_net))
2137                 goto errout_tbl_lock;
2138
2139         if (tb[NDTA_THRESH1])
2140                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2141
2142         if (tb[NDTA_THRESH2])
2143                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2144
2145         if (tb[NDTA_THRESH3])
2146                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2147
2148         if (tb[NDTA_GC_INTERVAL])
2149                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2150
2151         err = 0;
2152
2153 errout_tbl_lock:
2154         write_unlock_bh(&tbl->lock);
2155 errout:
2156         return err;
2157 }
2158
2159 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2160 {
2161         struct net *net = sock_net(skb->sk);
2162         int family, tidx, nidx = 0;
2163         int tbl_skip = cb->args[0];
2164         int neigh_skip = cb->args[1];
2165         struct neigh_table *tbl;
2166
2167         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2168
2169         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2170                 struct neigh_parms *p;
2171
2172                 tbl = neigh_tables[tidx];
2173                 if (!tbl)
2174                         continue;
2175
2176                 if (tidx < tbl_skip || (family && tbl->family != family))
2177                         continue;
2178
2179                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2180                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2181                                        NLM_F_MULTI) < 0)
2182                         break;
2183
2184                 nidx = 0;
2185                 p = list_next_entry(&tbl->parms, list);
2186                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2187                         if (!net_eq(neigh_parms_net(p), net))
2188                                 continue;
2189
2190                         if (nidx < neigh_skip)
2191                                 goto next;
2192
2193                         if (neightbl_fill_param_info(skb, tbl, p,
2194                                                      NETLINK_CB(cb->skb).portid,
2195                                                      cb->nlh->nlmsg_seq,
2196                                                      RTM_NEWNEIGHTBL,
2197                                                      NLM_F_MULTI) < 0)
2198                                 goto out;
2199                 next:
2200                         nidx++;
2201                 }
2202
2203                 neigh_skip = 0;
2204         }
2205 out:
2206         cb->args[0] = tidx;
2207         cb->args[1] = nidx;
2208
2209         return skb->len;
2210 }
2211
2212 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2213                            u32 pid, u32 seq, int type, unsigned int flags)
2214 {
2215         unsigned long now = jiffies;
2216         struct nda_cacheinfo ci;
2217         struct nlmsghdr *nlh;
2218         struct ndmsg *ndm;
2219
2220         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2221         if (nlh == NULL)
2222                 return -EMSGSIZE;
2223
2224         ndm = nlmsg_data(nlh);
2225         ndm->ndm_family  = neigh->ops->family;
2226         ndm->ndm_pad1    = 0;
2227         ndm->ndm_pad2    = 0;
2228         ndm->ndm_flags   = neigh->flags;
2229         ndm->ndm_type    = neigh->type;
2230         ndm->ndm_ifindex = neigh->dev->ifindex;
2231
2232         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2233                 goto nla_put_failure;
2234
2235         read_lock_bh(&neigh->lock);
2236         ndm->ndm_state   = neigh->nud_state;
2237         if (neigh->nud_state & NUD_VALID) {
2238                 char haddr[MAX_ADDR_LEN];
2239
2240                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2241                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2242                         read_unlock_bh(&neigh->lock);
2243                         goto nla_put_failure;
2244                 }
2245         }
2246
2247         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2248         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2249         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2250         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2251         read_unlock_bh(&neigh->lock);
2252
2253         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2254             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2255                 goto nla_put_failure;
2256
2257         nlmsg_end(skb, nlh);
2258         return 0;
2259
2260 nla_put_failure:
2261         nlmsg_cancel(skb, nlh);
2262         return -EMSGSIZE;
2263 }
2264
2265 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2266                             u32 pid, u32 seq, int type, unsigned int flags,
2267                             struct neigh_table *tbl)
2268 {
2269         struct nlmsghdr *nlh;
2270         struct ndmsg *ndm;
2271
2272         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2273         if (nlh == NULL)
2274                 return -EMSGSIZE;
2275
2276         ndm = nlmsg_data(nlh);
2277         ndm->ndm_family  = tbl->family;
2278         ndm->ndm_pad1    = 0;
2279         ndm->ndm_pad2    = 0;
2280         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2281         ndm->ndm_type    = RTN_UNICAST;
2282         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2283         ndm->ndm_state   = NUD_NONE;
2284
2285         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2286                 goto nla_put_failure;
2287
2288         nlmsg_end(skb, nlh);
2289         return 0;
2290
2291 nla_put_failure:
2292         nlmsg_cancel(skb, nlh);
2293         return -EMSGSIZE;
2294 }
2295
2296 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2297 {
2298         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2299         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2300 }
2301
2302 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2303 {
2304         struct net_device *master;
2305
2306         if (!master_idx)
2307                 return false;
2308
2309         master = netdev_master_upper_dev_get(dev);
2310         if (!master || master->ifindex != master_idx)
2311                 return true;
2312
2313         return false;
2314 }
2315
2316 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2317 {
2318         if (filter_idx && dev->ifindex != filter_idx)
2319                 return true;
2320
2321         return false;
2322 }
2323
2324 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2325                             struct netlink_callback *cb)
2326 {
2327         struct net *net = sock_net(skb->sk);
2328         const struct nlmsghdr *nlh = cb->nlh;
2329         struct nlattr *tb[NDA_MAX + 1];
2330         struct neighbour *n;
2331         int rc, h, s_h = cb->args[1];
2332         int idx, s_idx = idx = cb->args[2];
2333         struct neigh_hash_table *nht;
2334         int filter_master_idx = 0, filter_idx = 0;
2335         unsigned int flags = NLM_F_MULTI;
2336         int err;
2337
2338         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2339         if (!err) {
2340                 if (tb[NDA_IFINDEX]) {
2341                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2342                                 return -EINVAL;
2343                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2344                 }
2345                 if (tb[NDA_MASTER]) {
2346                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2347                                 return -EINVAL;
2348                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2349                 }
2350                 if (filter_idx || filter_master_idx)
2351                         flags |= NLM_F_DUMP_FILTERED;
2352         }
2353
2354         rcu_read_lock_bh();
2355         nht = rcu_dereference_bh(tbl->nht);
2356
2357         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2358                 if (h > s_h)
2359                         s_idx = 0;
2360                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2361                      n != NULL;
2362                      n = rcu_dereference_bh(n->next)) {
2363                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2364                                 goto next;
2365                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2366                             neigh_master_filtered(n->dev, filter_master_idx))
2367                                 goto next;
2368                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2369                                             cb->nlh->nlmsg_seq,
2370                                             RTM_NEWNEIGH,
2371                                             flags) < 0) {
2372                                 rc = -1;
2373                                 goto out;
2374                         }
2375 next:
2376                         idx++;
2377                 }
2378         }
2379         rc = skb->len;
2380 out:
2381         rcu_read_unlock_bh();
2382         cb->args[1] = h;
2383         cb->args[2] = idx;
2384         return rc;
2385 }
2386
2387 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2388                              struct netlink_callback *cb)
2389 {
2390         struct pneigh_entry *n;
2391         struct net *net = sock_net(skb->sk);
2392         int rc, h, s_h = cb->args[3];
2393         int idx, s_idx = idx = cb->args[4];
2394
2395         read_lock_bh(&tbl->lock);
2396
2397         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2398                 if (h > s_h)
2399                         s_idx = 0;
2400                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2401                         if (idx < s_idx || pneigh_net(n) != net)
2402                                 goto next;
2403                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2404                                             cb->nlh->nlmsg_seq,
2405                                             RTM_NEWNEIGH,
2406                                             NLM_F_MULTI, tbl) < 0) {
2407                                 read_unlock_bh(&tbl->lock);
2408                                 rc = -1;
2409                                 goto out;
2410                         }
2411                 next:
2412                         idx++;
2413                 }
2414         }
2415
2416         read_unlock_bh(&tbl->lock);
2417         rc = skb->len;
2418 out:
2419         cb->args[3] = h;
2420         cb->args[4] = idx;
2421         return rc;
2422
2423 }
2424
2425 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2426 {
2427         struct neigh_table *tbl;
2428         int t, family, s_t;
2429         int proxy = 0;
2430         int err;
2431
2432         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2433
2434         /* check for full ndmsg structure presence, family member is
2435          * the same for both structures
2436          */
2437         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2438             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2439                 proxy = 1;
2440
2441         s_t = cb->args[0];
2442
2443         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2444                 tbl = neigh_tables[t];
2445
2446                 if (!tbl)
2447                         continue;
2448                 if (t < s_t || (family && tbl->family != family))
2449                         continue;
2450                 if (t > s_t)
2451                         memset(&cb->args[1], 0, sizeof(cb->args) -
2452                                                 sizeof(cb->args[0]));
2453                 if (proxy)
2454                         err = pneigh_dump_table(tbl, skb, cb);
2455                 else
2456                         err = neigh_dump_table(tbl, skb, cb);
2457                 if (err < 0)
2458                         break;
2459         }
2460
2461         cb->args[0] = t;
2462         return skb->len;
2463 }
2464
2465 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2466 {
2467         int chain;
2468         struct neigh_hash_table *nht;
2469
2470         rcu_read_lock_bh();
2471         nht = rcu_dereference_bh(tbl->nht);
2472
2473         read_lock(&tbl->lock); /* avoid resizes */
2474         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2475                 struct neighbour *n;
2476
2477                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2478                      n != NULL;
2479                      n = rcu_dereference_bh(n->next))
2480                         cb(n, cookie);
2481         }
2482         read_unlock(&tbl->lock);
2483         rcu_read_unlock_bh();
2484 }
2485 EXPORT_SYMBOL(neigh_for_each);
2486
2487 /* The tbl->lock must be held as a writer and BH disabled. */
2488 void __neigh_for_each_release(struct neigh_table *tbl,
2489                               int (*cb)(struct neighbour *))
2490 {
2491         int chain;
2492         struct neigh_hash_table *nht;
2493
2494         nht = rcu_dereference_protected(tbl->nht,
2495                                         lockdep_is_held(&tbl->lock));
2496         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2497                 struct neighbour *n;
2498                 struct neighbour __rcu **np;
2499
2500                 np = &nht->hash_buckets[chain];
2501                 while ((n = rcu_dereference_protected(*np,
2502                                         lockdep_is_held(&tbl->lock))) != NULL) {
2503                         int release;
2504
2505                         write_lock(&n->lock);
2506                         release = cb(n);
2507                         if (release) {
2508                                 rcu_assign_pointer(*np,
2509                                         rcu_dereference_protected(n->next,
2510                                                 lockdep_is_held(&tbl->lock)));
2511                                 n->dead = 1;
2512                         } else
2513                                 np = &n->next;
2514                         write_unlock(&n->lock);
2515                         if (release)
2516                                 neigh_cleanup_and_release(n);
2517                 }
2518         }
2519 }
2520 EXPORT_SYMBOL(__neigh_for_each_release);
2521
2522 int neigh_xmit(int index, struct net_device *dev,
2523                const void *addr, struct sk_buff *skb)
2524 {
2525         int err = -EAFNOSUPPORT;
2526         if (likely(index < NEIGH_NR_TABLES)) {
2527                 struct neigh_table *tbl;
2528                 struct neighbour *neigh;
2529
2530                 tbl = neigh_tables[index];
2531                 if (!tbl)
2532                         goto out;
2533                 rcu_read_lock_bh();
2534                 neigh = __neigh_lookup_noref(tbl, addr, dev);
2535                 if (!neigh)
2536                         neigh = __neigh_create(tbl, addr, dev, false);
2537                 err = PTR_ERR(neigh);
2538                 if (IS_ERR(neigh)) {
2539                         rcu_read_unlock_bh();
2540                         goto out_kfree_skb;
2541                 }
2542                 err = neigh->output(neigh, skb);
2543                 rcu_read_unlock_bh();
2544         }
2545         else if (index == NEIGH_LINK_TABLE) {
2546                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2547                                       addr, NULL, skb->len);
2548                 if (err < 0)
2549                         goto out_kfree_skb;
2550                 err = dev_queue_xmit(skb);
2551         }
2552 out:
2553         return err;
2554 out_kfree_skb:
2555         kfree_skb(skb);
2556         goto out;
2557 }
2558 EXPORT_SYMBOL(neigh_xmit);
2559
2560 #ifdef CONFIG_PROC_FS
2561
2562 static struct neighbour *neigh_get_first(struct seq_file *seq)
2563 {
2564         struct neigh_seq_state *state = seq->private;
2565         struct net *net = seq_file_net(seq);
2566         struct neigh_hash_table *nht = state->nht;
2567         struct neighbour *n = NULL;
2568         int bucket = state->bucket;
2569
2570         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2571         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2572                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2573
2574                 while (n) {
2575                         if (!net_eq(dev_net(n->dev), net))
2576                                 goto next;
2577                         if (state->neigh_sub_iter) {
2578                                 loff_t fakep = 0;
2579                                 void *v;
2580
2581                                 v = state->neigh_sub_iter(state, n, &fakep);
2582                                 if (!v)
2583                                         goto next;
2584                         }
2585                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2586                                 break;
2587                         if (n->nud_state & ~NUD_NOARP)
2588                                 break;
2589 next:
2590                         n = rcu_dereference_bh(n->next);
2591                 }
2592
2593                 if (n)
2594                         break;
2595         }
2596         state->bucket = bucket;
2597
2598         return n;
2599 }
2600
2601 static struct neighbour *neigh_get_next(struct seq_file *seq,
2602                                         struct neighbour *n,
2603                                         loff_t *pos)
2604 {
2605         struct neigh_seq_state *state = seq->private;
2606         struct net *net = seq_file_net(seq);
2607         struct neigh_hash_table *nht = state->nht;
2608
2609         if (state->neigh_sub_iter) {
2610                 void *v = state->neigh_sub_iter(state, n, pos);
2611                 if (v)
2612                         return n;
2613         }
2614         n = rcu_dereference_bh(n->next);
2615
2616         while (1) {
2617                 while (n) {
2618                         if (!net_eq(dev_net(n->dev), net))
2619                                 goto next;
2620                         if (state->neigh_sub_iter) {
2621                                 void *v = state->neigh_sub_iter(state, n, pos);
2622                                 if (v)
2623                                         return n;
2624                                 goto next;
2625                         }
2626                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2627                                 break;
2628
2629                         if (n->nud_state & ~NUD_NOARP)
2630                                 break;
2631 next:
2632                         n = rcu_dereference_bh(n->next);
2633                 }
2634
2635                 if (n)
2636                         break;
2637
2638                 if (++state->bucket >= (1 << nht->hash_shift))
2639                         break;
2640
2641                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2642         }
2643
2644         if (n && pos)
2645                 --(*pos);
2646         return n;
2647 }
2648
2649 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2650 {
2651         struct neighbour *n = neigh_get_first(seq);
2652
2653         if (n) {
2654                 --(*pos);
2655                 while (*pos) {
2656                         n = neigh_get_next(seq, n, pos);
2657                         if (!n)
2658                                 break;
2659                 }
2660         }
2661         return *pos ? NULL : n;
2662 }
2663
2664 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2665 {
2666         struct neigh_seq_state *state = seq->private;
2667         struct net *net = seq_file_net(seq);
2668         struct neigh_table *tbl = state->tbl;
2669         struct pneigh_entry *pn = NULL;
2670         int bucket = state->bucket;
2671
2672         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2673         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2674                 pn = tbl->phash_buckets[bucket];
2675                 while (pn && !net_eq(pneigh_net(pn), net))
2676                         pn = pn->next;
2677                 if (pn)
2678                         break;
2679         }
2680         state->bucket = bucket;
2681
2682         return pn;
2683 }
2684
2685 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2686                                             struct pneigh_entry *pn,
2687                                             loff_t *pos)
2688 {
2689         struct neigh_seq_state *state = seq->private;
2690         struct net *net = seq_file_net(seq);
2691         struct neigh_table *tbl = state->tbl;
2692
2693         do {
2694                 pn = pn->next;
2695         } while (pn && !net_eq(pneigh_net(pn), net));
2696
2697         while (!pn) {
2698                 if (++state->bucket > PNEIGH_HASHMASK)
2699                         break;
2700                 pn = tbl->phash_buckets[state->bucket];
2701                 while (pn && !net_eq(pneigh_net(pn), net))
2702                         pn = pn->next;
2703                 if (pn)
2704                         break;
2705         }
2706
2707         if (pn && pos)
2708                 --(*pos);
2709
2710         return pn;
2711 }
2712
2713 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2714 {
2715         struct pneigh_entry *pn = pneigh_get_first(seq);
2716
2717         if (pn) {
2718                 --(*pos);
2719                 while (*pos) {
2720                         pn = pneigh_get_next(seq, pn, pos);
2721                         if (!pn)
2722                                 break;
2723                 }
2724         }
2725         return *pos ? NULL : pn;
2726 }
2727
2728 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2729 {
2730         struct neigh_seq_state *state = seq->private;
2731         void *rc;
2732         loff_t idxpos = *pos;
2733
2734         rc = neigh_get_idx(seq, &idxpos);
2735         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2736                 rc = pneigh_get_idx(seq, &idxpos);
2737
2738         return rc;
2739 }
2740
2741 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2742         __acquires(rcu_bh)
2743 {
2744         struct neigh_seq_state *state = seq->private;
2745
2746         state->tbl = tbl;
2747         state->bucket = 0;
2748         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2749
2750         rcu_read_lock_bh();
2751         state->nht = rcu_dereference_bh(tbl->nht);
2752
2753         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2754 }
2755 EXPORT_SYMBOL(neigh_seq_start);
2756
2757 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2758 {
2759         struct neigh_seq_state *state;
2760         void *rc;
2761
2762         if (v == SEQ_START_TOKEN) {
2763                 rc = neigh_get_first(seq);
2764                 goto out;
2765         }
2766
2767         state = seq->private;
2768         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2769                 rc = neigh_get_next(seq, v, NULL);
2770                 if (rc)
2771                         goto out;
2772                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2773                         rc = pneigh_get_first(seq);
2774         } else {
2775                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2776                 rc = pneigh_get_next(seq, v, NULL);
2777         }
2778 out:
2779         ++(*pos);
2780         return rc;
2781 }
2782 EXPORT_SYMBOL(neigh_seq_next);
2783
2784 void neigh_seq_stop(struct seq_file *seq, void *v)
2785         __releases(rcu_bh)
2786 {
2787         rcu_read_unlock_bh();
2788 }
2789 EXPORT_SYMBOL(neigh_seq_stop);
2790
2791 /* statistics via seq_file */
2792
2793 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2794 {
2795         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2796         int cpu;
2797
2798         if (*pos == 0)
2799                 return SEQ_START_TOKEN;
2800
2801         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2802                 if (!cpu_possible(cpu))
2803                         continue;
2804                 *pos = cpu+1;
2805                 return per_cpu_ptr(tbl->stats, cpu);
2806         }
2807         return NULL;
2808 }
2809
2810 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2811 {
2812         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2813         int cpu;
2814
2815         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2816                 if (!cpu_possible(cpu))
2817                         continue;
2818                 *pos = cpu+1;
2819                 return per_cpu_ptr(tbl->stats, cpu);
2820         }
2821         return NULL;
2822 }
2823
2824 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2825 {
2826
2827 }
2828
2829 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2830 {
2831         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2832         struct neigh_statistics *st = v;
2833
2834         if (v == SEQ_START_TOKEN) {
2835                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2836                 return 0;
2837         }
2838
2839         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2840                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2841                    atomic_read(&tbl->entries),
2842
2843                    st->allocs,
2844                    st->destroys,
2845                    st->hash_grows,
2846
2847                    st->lookups,
2848                    st->hits,
2849
2850                    st->res_failed,
2851
2852                    st->rcv_probes_mcast,
2853                    st->rcv_probes_ucast,
2854
2855                    st->periodic_gc_runs,
2856                    st->forced_gc_runs,
2857                    st->unres_discards,
2858                    st->table_fulls
2859                    );
2860
2861         return 0;
2862 }
2863
2864 static const struct seq_operations neigh_stat_seq_ops = {
2865         .start  = neigh_stat_seq_start,
2866         .next   = neigh_stat_seq_next,
2867         .stop   = neigh_stat_seq_stop,
2868         .show   = neigh_stat_seq_show,
2869 };
2870 #endif /* CONFIG_PROC_FS */
2871
2872 static inline size_t neigh_nlmsg_size(void)
2873 {
2874         return NLMSG_ALIGN(sizeof(struct ndmsg))
2875                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2876                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2877                + nla_total_size(sizeof(struct nda_cacheinfo))
2878                + nla_total_size(4); /* NDA_PROBES */
2879 }
2880
2881 static void __neigh_notify(struct neighbour *n, int type, int flags,
2882                            u32 pid)
2883 {
2884         struct net *net = dev_net(n->dev);
2885         struct sk_buff *skb;
2886         int err = -ENOBUFS;
2887
2888         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2889         if (skb == NULL)
2890                 goto errout;
2891
2892         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2893         if (err < 0) {
2894                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2895                 WARN_ON(err == -EMSGSIZE);
2896                 kfree_skb(skb);
2897                 goto errout;
2898         }
2899         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2900         return;
2901 errout:
2902         if (err < 0)
2903                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2904 }
2905
2906 void neigh_app_ns(struct neighbour *n)
2907 {
2908         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2909 }
2910 EXPORT_SYMBOL(neigh_app_ns);
2911
2912 #ifdef CONFIG_SYSCTL
2913 static int zero;
2914 static int int_max = INT_MAX;
2915 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2916
2917 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2918                            void __user *buffer, size_t *lenp, loff_t *ppos)
2919 {
2920         int size, ret;
2921         struct ctl_table tmp = *ctl;
2922
2923         tmp.extra1 = &zero;
2924         tmp.extra2 = &unres_qlen_max;
2925         tmp.data = &size;
2926
2927         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2928         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2929
2930         if (write && !ret)
2931                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2932         return ret;
2933 }
2934
2935 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2936                                                    int family)
2937 {
2938         switch (family) {
2939         case AF_INET:
2940                 return __in_dev_arp_parms_get_rcu(dev);
2941         case AF_INET6:
2942                 return __in6_dev_nd_parms_get_rcu(dev);
2943         }
2944         return NULL;
2945 }
2946
2947 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2948                                   int index)
2949 {
2950         struct net_device *dev;
2951         int family = neigh_parms_family(p);
2952
2953         rcu_read_lock();
2954         for_each_netdev_rcu(net, dev) {
2955                 struct neigh_parms *dst_p =
2956                                 neigh_get_dev_parms_rcu(dev, family);
2957
2958                 if (dst_p && !test_bit(index, dst_p->data_state))
2959                         dst_p->data[index] = p->data[index];
2960         }
2961         rcu_read_unlock();
2962 }
2963
2964 static void neigh_proc_update(struct ctl_table *ctl, int write)
2965 {
2966         struct net_device *dev = ctl->extra1;
2967         struct neigh_parms *p = ctl->extra2;
2968         struct net *net = neigh_parms_net(p);
2969         int index = (int *) ctl->data - p->data;
2970
2971         if (!write)
2972                 return;
2973
2974         set_bit(index, p->data_state);
2975         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2976                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2977         if (!dev) /* NULL dev means this is default value */
2978                 neigh_copy_dflt_parms(net, p, index);
2979 }
2980
2981 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2982                                            void __user *buffer,
2983                                            size_t *lenp, loff_t *ppos)
2984 {
2985         struct ctl_table tmp = *ctl;
2986         int ret;
2987
2988         tmp.extra1 = &zero;
2989         tmp.extra2 = &int_max;
2990
2991         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2992         neigh_proc_update(ctl, write);
2993         return ret;
2994 }
2995
2996 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2997                         void __user *buffer, size_t *lenp, loff_t *ppos)
2998 {
2999         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3000
3001         neigh_proc_update(ctl, write);
3002         return ret;
3003 }
3004 EXPORT_SYMBOL(neigh_proc_dointvec);
3005
3006 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3007                                 void __user *buffer,
3008                                 size_t *lenp, loff_t *ppos)
3009 {
3010         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3011
3012         neigh_proc_update(ctl, write);
3013         return ret;
3014 }
3015 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3016
3017 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3018                                               void __user *buffer,
3019                                               size_t *lenp, loff_t *ppos)
3020 {
3021         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3022
3023         neigh_proc_update(ctl, write);
3024         return ret;
3025 }
3026
3027 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3028                                    void __user *buffer,
3029                                    size_t *lenp, loff_t *ppos)
3030 {
3031         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3032
3033         neigh_proc_update(ctl, write);
3034         return ret;
3035 }
3036 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3037
3038 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3039                                           void __user *buffer,
3040                                           size_t *lenp, loff_t *ppos)
3041 {
3042         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3043
3044         neigh_proc_update(ctl, write);
3045         return ret;
3046 }
3047
3048 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3049                                           void __user *buffer,
3050                                           size_t *lenp, loff_t *ppos)
3051 {
3052         struct neigh_parms *p = ctl->extra2;
3053         int ret;
3054
3055         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3056                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3057         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3058                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3059         else
3060                 ret = -1;
3061
3062         if (write && ret == 0) {
3063                 /* update reachable_time as well, otherwise, the change will
3064                  * only be effective after the next time neigh_periodic_work
3065                  * decides to recompute it
3066                  */
3067                 p->reachable_time =
3068                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3069         }
3070         return ret;
3071 }
3072
3073 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3074         (&((struct neigh_parms *) 0)->data[index])
3075
3076 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3077         [NEIGH_VAR_ ## attr] = { \
3078                 .procname       = name, \
3079                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3080                 .maxlen         = sizeof(int), \
3081                 .mode           = mval, \
3082                 .proc_handler   = proc, \
3083         }
3084
3085 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3086         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3087
3088 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3089         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3090
3091 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3092         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3093
3094 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3095         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3096
3097 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3098         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3099
3100 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3101         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3102
3103 static struct neigh_sysctl_table {
3104         struct ctl_table_header *sysctl_header;
3105         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3106 } neigh_sysctl_template __read_mostly = {
3107         .neigh_vars = {
3108                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3109                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3110                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3111                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3112                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3113                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3114                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3115                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3116                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3117                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3118                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3119                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3120                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3121                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3122                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3123                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3124                 [NEIGH_VAR_GC_INTERVAL] = {
3125                         .procname       = "gc_interval",
3126                         .maxlen         = sizeof(int),
3127                         .mode           = 0644,
3128                         .proc_handler   = proc_dointvec_jiffies,
3129                 },
3130                 [NEIGH_VAR_GC_THRESH1] = {
3131                         .procname       = "gc_thresh1",
3132                         .maxlen         = sizeof(int),
3133                         .mode           = 0644,
3134                         .extra1         = &zero,
3135                         .extra2         = &int_max,
3136                         .proc_handler   = proc_dointvec_minmax,
3137                 },
3138                 [NEIGH_VAR_GC_THRESH2] = {
3139                         .procname       = "gc_thresh2",
3140                         .maxlen         = sizeof(int),
3141                         .mode           = 0644,
3142                         .extra1         = &zero,
3143                         .extra2         = &int_max,
3144                         .proc_handler   = proc_dointvec_minmax,
3145                 },
3146                 [NEIGH_VAR_GC_THRESH3] = {
3147                         .procname       = "gc_thresh3",
3148                         .maxlen         = sizeof(int),
3149                         .mode           = 0644,
3150                         .extra1         = &zero,
3151                         .extra2         = &int_max,
3152                         .proc_handler   = proc_dointvec_minmax,
3153                 },
3154                 {},
3155         },
3156 };
3157
3158 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3159                           proc_handler *handler)
3160 {
3161         int i;
3162         struct neigh_sysctl_table *t;
3163         const char *dev_name_source;
3164         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3165         char *p_name;
3166
3167         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3168         if (!t)
3169                 goto err;
3170
3171         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3172                 t->neigh_vars[i].data += (long) p;
3173                 t->neigh_vars[i].extra1 = dev;
3174                 t->neigh_vars[i].extra2 = p;
3175         }
3176
3177         if (dev) {
3178                 dev_name_source = dev->name;
3179                 /* Terminate the table early */
3180                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3181                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3182         } else {
3183                 struct neigh_table *tbl = p->tbl;
3184                 dev_name_source = "default";
3185                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3186                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3187                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3188                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3189         }
3190
3191         if (handler) {
3192                 /* RetransTime */
3193                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3194                 /* ReachableTime */
3195                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3196                 /* RetransTime (in milliseconds)*/
3197                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3198                 /* ReachableTime (in milliseconds) */
3199                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3200         } else {
3201                 /* Those handlers will update p->reachable_time after
3202                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3203                  * applied after the next neighbour update instead of waiting for
3204                  * neigh_periodic_work to update its value (can be multiple minutes)
3205                  * So any handler that replaces them should do this as well
3206                  */
3207                 /* ReachableTime */
3208                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3209                         neigh_proc_base_reachable_time;
3210                 /* ReachableTime (in milliseconds) */
3211                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3212                         neigh_proc_base_reachable_time;
3213         }
3214
3215         /* Don't export sysctls to unprivileged users */
3216         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3217                 t->neigh_vars[0].procname = NULL;
3218
3219         switch (neigh_parms_family(p)) {
3220         case AF_INET:
3221               p_name = "ipv4";
3222               break;
3223         case AF_INET6:
3224               p_name = "ipv6";
3225               break;
3226         default:
3227               BUG();
3228         }
3229
3230         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3231                 p_name, dev_name_source);
3232         t->sysctl_header =
3233                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3234         if (!t->sysctl_header)
3235                 goto free;
3236
3237         p->sysctl_table = t;
3238         return 0;
3239
3240 free:
3241         kfree(t);
3242 err:
3243         return -ENOBUFS;
3244 }
3245 EXPORT_SYMBOL(neigh_sysctl_register);
3246
3247 void neigh_sysctl_unregister(struct neigh_parms *p)
3248 {
3249         if (p->sysctl_table) {
3250                 struct neigh_sysctl_table *t = p->sysctl_table;
3251                 p->sysctl_table = NULL;
3252                 unregister_net_sysctl_table(t->sysctl_header);
3253                 kfree(t);
3254         }
3255 }
3256 EXPORT_SYMBOL(neigh_sysctl_unregister);
3257
3258 #endif  /* CONFIG_SYSCTL */
3259
3260 static int __init neigh_init(void)
3261 {
3262         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3263         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3264         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3265
3266         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3267                       0);
3268         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3269
3270         return 0;
3271 }
3272
3273 subsys_initcall(neigh_init);
3274