0bc6a4b1ceae074fb0079a5964f2be3287a6dc76
[linux-2.6-microblaze.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
59 {
60         return ntohl(addr->a4);
61 }
62
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
64 {
65         return ntohl(addr->a6[2]^addr->a6[3]);
66 }
67
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69                                            u32 reqid, unsigned short family,
70                                            unsigned int hmask)
71 {
72         unsigned int h = family ^ reqid;
73         switch (family) {
74         case AF_INET:
75                 h ^= __xfrm4_addr_hash(addr);
76                 break;
77         case AF_INET6:
78                 h ^= __xfrm6_addr_hash(addr);
79                 break;
80         };
81         return (h ^ (h >> 16)) & hmask;
82 }
83
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85                                          unsigned short family)
86 {
87         return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
88 }
89
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
91                                        unsigned int hmask)
92 {
93         unsigned int h = family;
94         switch (family) {
95         case AF_INET:
96                 h ^= __xfrm4_addr_hash(addr);
97                 break;
98         case AF_INET6:
99                 h ^= __xfrm6_addr_hash(addr);
100                 break;
101         };
102         return (h ^ (h >> 16)) & hmask;
103 }
104
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
106 {
107         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
108 }
109
110 static inline unsigned int
111 __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
112                 unsigned int hmask)
113 {
114         unsigned int h = spi ^ proto;
115         switch (family) {
116         case AF_INET:
117                 h ^= __xfrm4_addr_hash(addr);
118                 break;
119         case AF_INET6:
120                 h ^= __xfrm6_addr_hash(addr);
121                 break;
122         }
123         return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
124 }
125
126 static inline unsigned int
127 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
128 {
129         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
130 }
131
132 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
133 {
134         struct hlist_head *n;
135
136         if (sz <= PAGE_SIZE)
137                 n = kmalloc(sz, GFP_KERNEL);
138         else if (hashdist)
139                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
140         else
141                 n = (struct hlist_head *)
142                         __get_free_pages(GFP_KERNEL, get_order(sz));
143
144         if (n)
145                 memset(n, 0, sz);
146
147         return n;
148 }
149
150 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
151 {
152         if (sz <= PAGE_SIZE)
153                 kfree(n);
154         else if (hashdist)
155                 vfree(n);
156         else
157                 free_pages((unsigned long)n, get_order(sz));
158 }
159
160 static void xfrm_hash_transfer(struct hlist_head *list,
161                                struct hlist_head *ndsttable,
162                                struct hlist_head *nsrctable,
163                                struct hlist_head *nspitable,
164                                unsigned int nhashmask)
165 {
166         struct hlist_node *entry, *tmp;
167         struct xfrm_state *x;
168
169         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
170                 unsigned int h;
171
172                 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
173                                     x->props.family, nhashmask);
174                 hlist_add_head(&x->bydst, ndsttable+h);
175
176                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
177                                     nhashmask);
178                 hlist_add_head(&x->bysrc, nsrctable+h);
179
180                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
181                                     x->props.family, nhashmask);
182                 hlist_add_head(&x->byspi, nspitable+h);
183         }
184 }
185
186 static unsigned long xfrm_hash_new_size(void)
187 {
188         return ((xfrm_state_hmask + 1) << 1) *
189                 sizeof(struct hlist_head);
190 }
191
192 static DEFINE_MUTEX(hash_resize_mutex);
193
194 static void xfrm_hash_resize(void *__unused)
195 {
196         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
197         unsigned long nsize, osize;
198         unsigned int nhashmask, ohashmask;
199         int i;
200
201         mutex_lock(&hash_resize_mutex);
202
203         nsize = xfrm_hash_new_size();
204         ndst = xfrm_state_hash_alloc(nsize);
205         if (!ndst)
206                 goto out_unlock;
207         nsrc = xfrm_state_hash_alloc(nsize);
208         if (!nsrc) {
209                 xfrm_state_hash_free(ndst, nsize);
210                 goto out_unlock;
211         }
212         nspi = xfrm_state_hash_alloc(nsize);
213         if (!nspi) {
214                 xfrm_state_hash_free(ndst, nsize);
215                 xfrm_state_hash_free(nsrc, nsize);
216                 goto out_unlock;
217         }
218
219         spin_lock_bh(&xfrm_state_lock);
220
221         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
222         for (i = xfrm_state_hmask; i >= 0; i--)
223                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
224                                    nhashmask);
225
226         odst = xfrm_state_bydst;
227         osrc = xfrm_state_bysrc;
228         ospi = xfrm_state_byspi;
229         ohashmask = xfrm_state_hmask;
230
231         xfrm_state_bydst = ndst;
232         xfrm_state_bysrc = nsrc;
233         xfrm_state_byspi = nspi;
234         xfrm_state_hmask = nhashmask;
235
236         spin_unlock_bh(&xfrm_state_lock);
237
238         osize = (ohashmask + 1) * sizeof(struct hlist_head);
239         xfrm_state_hash_free(odst, osize);
240         xfrm_state_hash_free(osrc, osize);
241         xfrm_state_hash_free(ospi, osize);
242
243 out_unlock:
244         mutex_unlock(&hash_resize_mutex);
245 }
246
247 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
248
249 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
250 EXPORT_SYMBOL(km_waitq);
251
252 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
253 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
254
255 static struct work_struct xfrm_state_gc_work;
256 static HLIST_HEAD(xfrm_state_gc_list);
257 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
258
259 int __xfrm_state_delete(struct xfrm_state *x);
260
261 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
262 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
263
264 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
265 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
266
267 static void xfrm_state_gc_destroy(struct xfrm_state *x)
268 {
269         del_timer_sync(&x->timer);
270         del_timer_sync(&x->rtimer);
271         kfree(x->aalg);
272         kfree(x->ealg);
273         kfree(x->calg);
274         kfree(x->encap);
275         kfree(x->coaddr);
276         if (x->mode)
277                 xfrm_put_mode(x->mode);
278         if (x->type) {
279                 x->type->destructor(x);
280                 xfrm_put_type(x->type);
281         }
282         security_xfrm_state_free(x);
283         kfree(x);
284 }
285
286 static void xfrm_state_gc_task(void *data)
287 {
288         struct xfrm_state *x;
289         struct hlist_node *entry, *tmp;
290         struct hlist_head gc_list;
291
292         spin_lock_bh(&xfrm_state_gc_lock);
293         gc_list.first = xfrm_state_gc_list.first;
294         INIT_HLIST_HEAD(&xfrm_state_gc_list);
295         spin_unlock_bh(&xfrm_state_gc_lock);
296
297         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
298                 xfrm_state_gc_destroy(x);
299
300         wake_up(&km_waitq);
301 }
302
303 static inline unsigned long make_jiffies(long secs)
304 {
305         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
306                 return MAX_SCHEDULE_TIMEOUT-1;
307         else
308                 return secs*HZ;
309 }
310
311 static void xfrm_timer_handler(unsigned long data)
312 {
313         struct xfrm_state *x = (struct xfrm_state*)data;
314         unsigned long now = (unsigned long)xtime.tv_sec;
315         long next = LONG_MAX;
316         int warn = 0;
317
318         spin_lock(&x->lock);
319         if (x->km.state == XFRM_STATE_DEAD)
320                 goto out;
321         if (x->km.state == XFRM_STATE_EXPIRED)
322                 goto expired;
323         if (x->lft.hard_add_expires_seconds) {
324                 long tmo = x->lft.hard_add_expires_seconds +
325                         x->curlft.add_time - now;
326                 if (tmo <= 0)
327                         goto expired;
328                 if (tmo < next)
329                         next = tmo;
330         }
331         if (x->lft.hard_use_expires_seconds) {
332                 long tmo = x->lft.hard_use_expires_seconds +
333                         (x->curlft.use_time ? : now) - now;
334                 if (tmo <= 0)
335                         goto expired;
336                 if (tmo < next)
337                         next = tmo;
338         }
339         if (x->km.dying)
340                 goto resched;
341         if (x->lft.soft_add_expires_seconds) {
342                 long tmo = x->lft.soft_add_expires_seconds +
343                         x->curlft.add_time - now;
344                 if (tmo <= 0)
345                         warn = 1;
346                 else if (tmo < next)
347                         next = tmo;
348         }
349         if (x->lft.soft_use_expires_seconds) {
350                 long tmo = x->lft.soft_use_expires_seconds +
351                         (x->curlft.use_time ? : now) - now;
352                 if (tmo <= 0)
353                         warn = 1;
354                 else if (tmo < next)
355                         next = tmo;
356         }
357
358         x->km.dying = warn;
359         if (warn)
360                 km_state_expired(x, 0, 0);
361 resched:
362         if (next != LONG_MAX)
363                 mod_timer(&x->timer, jiffies + make_jiffies(next));
364
365         goto out;
366
367 expired:
368         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
369                 x->km.state = XFRM_STATE_EXPIRED;
370                 wake_up(&km_waitq);
371                 next = 2;
372                 goto resched;
373         }
374         if (!__xfrm_state_delete(x) && x->id.spi)
375                 km_state_expired(x, 1, 0);
376
377 out:
378         spin_unlock(&x->lock);
379 }
380
381 static void xfrm_replay_timer_handler(unsigned long data);
382
383 struct xfrm_state *xfrm_state_alloc(void)
384 {
385         struct xfrm_state *x;
386
387         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
388
389         if (x) {
390                 atomic_set(&x->refcnt, 1);
391                 atomic_set(&x->tunnel_users, 0);
392                 INIT_HLIST_NODE(&x->bydst);
393                 INIT_HLIST_NODE(&x->bysrc);
394                 INIT_HLIST_NODE(&x->byspi);
395                 init_timer(&x->timer);
396                 x->timer.function = xfrm_timer_handler;
397                 x->timer.data     = (unsigned long)x;
398                 init_timer(&x->rtimer);
399                 x->rtimer.function = xfrm_replay_timer_handler;
400                 x->rtimer.data     = (unsigned long)x;
401                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
402                 x->lft.soft_byte_limit = XFRM_INF;
403                 x->lft.soft_packet_limit = XFRM_INF;
404                 x->lft.hard_byte_limit = XFRM_INF;
405                 x->lft.hard_packet_limit = XFRM_INF;
406                 x->replay_maxage = 0;
407                 x->replay_maxdiff = 0;
408                 spin_lock_init(&x->lock);
409         }
410         return x;
411 }
412 EXPORT_SYMBOL(xfrm_state_alloc);
413
414 void __xfrm_state_destroy(struct xfrm_state *x)
415 {
416         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
417
418         spin_lock_bh(&xfrm_state_gc_lock);
419         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
420         spin_unlock_bh(&xfrm_state_gc_lock);
421         schedule_work(&xfrm_state_gc_work);
422 }
423 EXPORT_SYMBOL(__xfrm_state_destroy);
424
425 int __xfrm_state_delete(struct xfrm_state *x)
426 {
427         int err = -ESRCH;
428
429         if (x->km.state != XFRM_STATE_DEAD) {
430                 x->km.state = XFRM_STATE_DEAD;
431                 spin_lock(&xfrm_state_lock);
432                 hlist_del(&x->bydst);
433                 hlist_del(&x->bysrc);
434                 if (x->id.spi)
435                         hlist_del(&x->byspi);
436                 xfrm_state_num--;
437                 spin_unlock(&xfrm_state_lock);
438
439                 /* All xfrm_state objects are created by xfrm_state_alloc.
440                  * The xfrm_state_alloc call gives a reference, and that
441                  * is what we are dropping here.
442                  */
443                 __xfrm_state_put(x);
444                 err = 0;
445         }
446
447         return err;
448 }
449 EXPORT_SYMBOL(__xfrm_state_delete);
450
451 int xfrm_state_delete(struct xfrm_state *x)
452 {
453         int err;
454
455         spin_lock_bh(&x->lock);
456         err = __xfrm_state_delete(x);
457         spin_unlock_bh(&x->lock);
458
459         return err;
460 }
461 EXPORT_SYMBOL(xfrm_state_delete);
462
463 void xfrm_state_flush(u8 proto)
464 {
465         int i;
466
467         spin_lock_bh(&xfrm_state_lock);
468         for (i = 0; i < xfrm_state_hmask; i++) {
469                 struct hlist_node *entry;
470                 struct xfrm_state *x;
471 restart:
472                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
473                         if (!xfrm_state_kern(x) &&
474                             xfrm_id_proto_match(x->id.proto, proto)) {
475                                 xfrm_state_hold(x);
476                                 spin_unlock_bh(&xfrm_state_lock);
477
478                                 xfrm_state_delete(x);
479                                 xfrm_state_put(x);
480
481                                 spin_lock_bh(&xfrm_state_lock);
482                                 goto restart;
483                         }
484                 }
485         }
486         spin_unlock_bh(&xfrm_state_lock);
487         wake_up(&km_waitq);
488 }
489 EXPORT_SYMBOL(xfrm_state_flush);
490
491 static int
492 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
493                   struct xfrm_tmpl *tmpl,
494                   xfrm_address_t *daddr, xfrm_address_t *saddr,
495                   unsigned short family)
496 {
497         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
498         if (!afinfo)
499                 return -1;
500         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
501         xfrm_state_put_afinfo(afinfo);
502         return 0;
503 }
504
505 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
506 {
507         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
508         struct xfrm_state *x;
509         struct hlist_node *entry;
510
511         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
512                 if (x->props.family != family ||
513                     x->id.spi       != spi ||
514                     x->id.proto     != proto)
515                         continue;
516
517                 switch (family) {
518                 case AF_INET:
519                         if (x->id.daddr.a4 != daddr->a4)
520                                 continue;
521                         break;
522                 case AF_INET6:
523                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
524                                              (struct in6_addr *)
525                                              x->id.daddr.a6))
526                                 continue;
527                         break;
528                 };
529
530                 xfrm_state_hold(x);
531                 return x;
532         }
533
534         return NULL;
535 }
536
537 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
538 {
539         unsigned int h = xfrm_src_hash(saddr, family);
540         struct xfrm_state *x;
541         struct hlist_node *entry;
542
543         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
544                 if (x->props.family != family ||
545                     x->id.proto     != proto)
546                         continue;
547
548                 switch (family) {
549                 case AF_INET:
550                         if (x->id.daddr.a4 != daddr->a4 ||
551                             x->props.saddr.a4 != saddr->a4)
552                                 continue;
553                         break;
554                 case AF_INET6:
555                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
556                                              (struct in6_addr *)
557                                              x->id.daddr.a6) ||
558                             !ipv6_addr_equal((struct in6_addr *)saddr,
559                                              (struct in6_addr *)
560                                              x->props.saddr.a6))
561                                 continue;
562                         break;
563                 };
564
565                 xfrm_state_hold(x);
566                 return x;
567         }
568
569         return NULL;
570 }
571
572 static inline struct xfrm_state *
573 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
574 {
575         if (use_spi)
576                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
577                                            x->id.proto, family);
578         else
579                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
580                                                   &x->props.saddr,
581                                                   x->id.proto, family);
582 }
583
584 struct xfrm_state *
585 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
586                 struct flowi *fl, struct xfrm_tmpl *tmpl,
587                 struct xfrm_policy *pol, int *err,
588                 unsigned short family)
589 {
590         unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
591         struct hlist_node *entry;
592         struct xfrm_state *x, *x0;
593         int acquire_in_progress = 0;
594         int error = 0;
595         struct xfrm_state *best = NULL;
596         
597         spin_lock_bh(&xfrm_state_lock);
598         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
599                 if (x->props.family == family &&
600                     x->props.reqid == tmpl->reqid &&
601                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
602                     xfrm_state_addr_check(x, daddr, saddr, family) &&
603                     tmpl->mode == x->props.mode &&
604                     tmpl->id.proto == x->id.proto &&
605                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
606                         /* Resolution logic:
607                            1. There is a valid state with matching selector.
608                               Done.
609                            2. Valid state with inappropriate selector. Skip.
610
611                            Entering area of "sysdeps".
612
613                            3. If state is not valid, selector is temporary,
614                               it selects only session which triggered
615                               previous resolution. Key manager will do
616                               something to install a state with proper
617                               selector.
618                          */
619                         if (x->km.state == XFRM_STATE_VALID) {
620                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
621                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
622                                         continue;
623                                 if (!best ||
624                                     best->km.dying > x->km.dying ||
625                                     (best->km.dying == x->km.dying &&
626                                      best->curlft.add_time < x->curlft.add_time))
627                                         best = x;
628                         } else if (x->km.state == XFRM_STATE_ACQ) {
629                                 acquire_in_progress = 1;
630                         } else if (x->km.state == XFRM_STATE_ERROR ||
631                                    x->km.state == XFRM_STATE_EXPIRED) {
632                                 if (xfrm_selector_match(&x->sel, fl, family) &&
633                                     security_xfrm_state_pol_flow_match(x, pol, fl))
634                                         error = -ESRCH;
635                         }
636                 }
637         }
638
639         x = best;
640         if (!x && !error && !acquire_in_progress) {
641                 if (tmpl->id.spi &&
642                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
643                                               tmpl->id.proto, family)) != NULL) {
644                         xfrm_state_put(x0);
645                         error = -EEXIST;
646                         goto out;
647                 }
648                 x = xfrm_state_alloc();
649                 if (x == NULL) {
650                         error = -ENOMEM;
651                         goto out;
652                 }
653                 /* Initialize temporary selector matching only
654                  * to current session. */
655                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
656
657                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
658                 if (error) {
659                         x->km.state = XFRM_STATE_DEAD;
660                         xfrm_state_put(x);
661                         x = NULL;
662                         goto out;
663                 }
664
665                 if (km_query(x, tmpl, pol) == 0) {
666                         x->km.state = XFRM_STATE_ACQ;
667                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
668                         h = xfrm_src_hash(saddr, family);
669                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
670                         if (x->id.spi) {
671                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
672                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
673                         }
674                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
675                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
676                         add_timer(&x->timer);
677                 } else {
678                         x->km.state = XFRM_STATE_DEAD;
679                         xfrm_state_put(x);
680                         x = NULL;
681                         error = -ESRCH;
682                 }
683         }
684 out:
685         if (x)
686                 xfrm_state_hold(x);
687         else
688                 *err = acquire_in_progress ? -EAGAIN : error;
689         spin_unlock_bh(&xfrm_state_lock);
690         return x;
691 }
692
693 static void __xfrm_state_insert(struct xfrm_state *x)
694 {
695         unsigned int h;
696
697         x->genid = ++xfrm_state_genid;
698
699         h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
700         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
701
702         h = xfrm_src_hash(&x->props.saddr, x->props.family);
703         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
704
705         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
706                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
707                                   x->props.family);
708
709                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
710         }
711
712         mod_timer(&x->timer, jiffies + HZ);
713         if (x->replay_maxage)
714                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
715
716         wake_up(&km_waitq);
717
718         xfrm_state_num++;
719
720         if (x->bydst.next != NULL &&
721             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
722             xfrm_state_num > xfrm_state_hmask)
723                 schedule_work(&xfrm_hash_work);
724 }
725
726 /* xfrm_state_lock is held */
727 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
728 {
729         unsigned short family = xnew->props.family;
730         u32 reqid = xnew->props.reqid;
731         struct xfrm_state *x;
732         struct hlist_node *entry;
733         unsigned int h;
734
735         h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
736         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
737                 if (x->props.family     == family &&
738                     x->props.reqid      == reqid &&
739                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
740                         x->genid = xfrm_state_genid;
741         }
742 }
743
744 void xfrm_state_insert(struct xfrm_state *x)
745 {
746         spin_lock_bh(&xfrm_state_lock);
747         __xfrm_state_bump_genids(x);
748         __xfrm_state_insert(x);
749         spin_unlock_bh(&xfrm_state_lock);
750 }
751 EXPORT_SYMBOL(xfrm_state_insert);
752
753 /* xfrm_state_lock is held */
754 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
755 {
756         unsigned int h = xfrm_dst_hash(daddr, reqid, family);
757         struct hlist_node *entry;
758         struct xfrm_state *x;
759
760         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
761                 if (x->props.reqid  != reqid ||
762                     x->props.mode   != mode ||
763                     x->props.family != family ||
764                     x->km.state     != XFRM_STATE_ACQ ||
765                     x->id.spi       != 0)
766                         continue;
767
768                 switch (family) {
769                 case AF_INET:
770                         if (x->id.daddr.a4    != daddr->a4 ||
771                             x->props.saddr.a4 != saddr->a4)
772                                 continue;
773                         break;
774                 case AF_INET6:
775                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
776                                              (struct in6_addr *)daddr) ||
777                             !ipv6_addr_equal((struct in6_addr *)
778                                              x->props.saddr.a6,
779                                              (struct in6_addr *)saddr))
780                                 continue;
781                         break;
782                 };
783
784                 xfrm_state_hold(x);
785                 return x;
786         }
787
788         if (!create)
789                 return NULL;
790
791         x = xfrm_state_alloc();
792         if (likely(x)) {
793                 switch (family) {
794                 case AF_INET:
795                         x->sel.daddr.a4 = daddr->a4;
796                         x->sel.saddr.a4 = saddr->a4;
797                         x->sel.prefixlen_d = 32;
798                         x->sel.prefixlen_s = 32;
799                         x->props.saddr.a4 = saddr->a4;
800                         x->id.daddr.a4 = daddr->a4;
801                         break;
802
803                 case AF_INET6:
804                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
805                                        (struct in6_addr *)daddr);
806                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
807                                        (struct in6_addr *)saddr);
808                         x->sel.prefixlen_d = 128;
809                         x->sel.prefixlen_s = 128;
810                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
811                                        (struct in6_addr *)saddr);
812                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
813                                        (struct in6_addr *)daddr);
814                         break;
815                 };
816
817                 x->km.state = XFRM_STATE_ACQ;
818                 x->id.proto = proto;
819                 x->props.family = family;
820                 x->props.mode = mode;
821                 x->props.reqid = reqid;
822                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
823                 xfrm_state_hold(x);
824                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
825                 add_timer(&x->timer);
826                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
827                 h = xfrm_src_hash(saddr, family);
828                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
829                 wake_up(&km_waitq);
830         }
831
832         return x;
833 }
834
835 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
836
837 int xfrm_state_add(struct xfrm_state *x)
838 {
839         struct xfrm_state *x1;
840         int family;
841         int err;
842         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
843
844         family = x->props.family;
845
846         spin_lock_bh(&xfrm_state_lock);
847
848         x1 = __xfrm_state_locate(x, use_spi, family);
849         if (x1) {
850                 xfrm_state_put(x1);
851                 x1 = NULL;
852                 err = -EEXIST;
853                 goto out;
854         }
855
856         if (use_spi && x->km.seq) {
857                 x1 = __xfrm_find_acq_byseq(x->km.seq);
858                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
859                         xfrm_state_put(x1);
860                         x1 = NULL;
861                 }
862         }
863
864         if (use_spi && !x1)
865                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
866                                      x->id.proto,
867                                      &x->id.daddr, &x->props.saddr, 0);
868
869         __xfrm_state_bump_genids(x);
870         __xfrm_state_insert(x);
871         err = 0;
872
873 out:
874         spin_unlock_bh(&xfrm_state_lock);
875
876         if (x1) {
877                 xfrm_state_delete(x1);
878                 xfrm_state_put(x1);
879         }
880
881         return err;
882 }
883 EXPORT_SYMBOL(xfrm_state_add);
884
885 int xfrm_state_update(struct xfrm_state *x)
886 {
887         struct xfrm_state *x1;
888         int err;
889         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
890
891         spin_lock_bh(&xfrm_state_lock);
892         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
893
894         err = -ESRCH;
895         if (!x1)
896                 goto out;
897
898         if (xfrm_state_kern(x1)) {
899                 xfrm_state_put(x1);
900                 err = -EEXIST;
901                 goto out;
902         }
903
904         if (x1->km.state == XFRM_STATE_ACQ) {
905                 __xfrm_state_insert(x);
906                 x = NULL;
907         }
908         err = 0;
909
910 out:
911         spin_unlock_bh(&xfrm_state_lock);
912
913         if (err)
914                 return err;
915
916         if (!x) {
917                 xfrm_state_delete(x1);
918                 xfrm_state_put(x1);
919                 return 0;
920         }
921
922         err = -EINVAL;
923         spin_lock_bh(&x1->lock);
924         if (likely(x1->km.state == XFRM_STATE_VALID)) {
925                 if (x->encap && x1->encap)
926                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
927                 if (x->coaddr && x1->coaddr) {
928                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
929                 }
930                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
931                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
932                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
933                 x1->km.dying = 0;
934
935                 mod_timer(&x1->timer, jiffies + HZ);
936                 if (x1->curlft.use_time)
937                         xfrm_state_check_expire(x1);
938
939                 err = 0;
940         }
941         spin_unlock_bh(&x1->lock);
942
943         xfrm_state_put(x1);
944
945         return err;
946 }
947 EXPORT_SYMBOL(xfrm_state_update);
948
949 int xfrm_state_check_expire(struct xfrm_state *x)
950 {
951         if (!x->curlft.use_time)
952                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
953
954         if (x->km.state != XFRM_STATE_VALID)
955                 return -EINVAL;
956
957         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
958             x->curlft.packets >= x->lft.hard_packet_limit) {
959                 x->km.state = XFRM_STATE_EXPIRED;
960                 mod_timer(&x->timer, jiffies);
961                 return -EINVAL;
962         }
963
964         if (!x->km.dying &&
965             (x->curlft.bytes >= x->lft.soft_byte_limit ||
966              x->curlft.packets >= x->lft.soft_packet_limit)) {
967                 x->km.dying = 1;
968                 km_state_expired(x, 0, 0);
969         }
970         return 0;
971 }
972 EXPORT_SYMBOL(xfrm_state_check_expire);
973
974 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
975 {
976         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
977                 - skb_headroom(skb);
978
979         if (nhead > 0)
980                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
981
982         /* Check tail too... */
983         return 0;
984 }
985
986 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
987 {
988         int err = xfrm_state_check_expire(x);
989         if (err < 0)
990                 goto err;
991         err = xfrm_state_check_space(x, skb);
992 err:
993         return err;
994 }
995 EXPORT_SYMBOL(xfrm_state_check);
996
997 struct xfrm_state *
998 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
999                   unsigned short family)
1000 {
1001         struct xfrm_state *x;
1002
1003         spin_lock_bh(&xfrm_state_lock);
1004         x = __xfrm_state_lookup(daddr, spi, proto, family);
1005         spin_unlock_bh(&xfrm_state_lock);
1006         return x;
1007 }
1008 EXPORT_SYMBOL(xfrm_state_lookup);
1009
1010 struct xfrm_state *
1011 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1012                          u8 proto, unsigned short family)
1013 {
1014         struct xfrm_state *x;
1015
1016         spin_lock_bh(&xfrm_state_lock);
1017         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1018         spin_unlock_bh(&xfrm_state_lock);
1019         return x;
1020 }
1021 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1022
1023 struct xfrm_state *
1024 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1025               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1026               int create, unsigned short family)
1027 {
1028         struct xfrm_state *x;
1029
1030         spin_lock_bh(&xfrm_state_lock);
1031         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1032         spin_unlock_bh(&xfrm_state_lock);
1033
1034         return x;
1035 }
1036 EXPORT_SYMBOL(xfrm_find_acq);
1037
1038 #ifdef CONFIG_XFRM_SUB_POLICY
1039 int
1040 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1041                unsigned short family)
1042 {
1043         int err = 0;
1044         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1045         if (!afinfo)
1046                 return -EAFNOSUPPORT;
1047
1048         spin_lock_bh(&xfrm_state_lock);
1049         if (afinfo->tmpl_sort)
1050                 err = afinfo->tmpl_sort(dst, src, n);
1051         spin_unlock_bh(&xfrm_state_lock);
1052         xfrm_state_put_afinfo(afinfo);
1053         return err;
1054 }
1055 EXPORT_SYMBOL(xfrm_tmpl_sort);
1056
1057 int
1058 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1059                 unsigned short family)
1060 {
1061         int err = 0;
1062         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1063         if (!afinfo)
1064                 return -EAFNOSUPPORT;
1065
1066         spin_lock_bh(&xfrm_state_lock);
1067         if (afinfo->state_sort)
1068                 err = afinfo->state_sort(dst, src, n);
1069         spin_unlock_bh(&xfrm_state_lock);
1070         xfrm_state_put_afinfo(afinfo);
1071         return err;
1072 }
1073 EXPORT_SYMBOL(xfrm_state_sort);
1074 #endif
1075
1076 /* Silly enough, but I'm lazy to build resolution list */
1077
1078 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1079 {
1080         int i;
1081
1082         for (i = 0; i <= xfrm_state_hmask; i++) {
1083                 struct hlist_node *entry;
1084                 struct xfrm_state *x;
1085
1086                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1087                         if (x->km.seq == seq &&
1088                             x->km.state == XFRM_STATE_ACQ) {
1089                                 xfrm_state_hold(x);
1090                                 return x;
1091                         }
1092                 }
1093         }
1094         return NULL;
1095 }
1096
1097 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1098 {
1099         struct xfrm_state *x;
1100
1101         spin_lock_bh(&xfrm_state_lock);
1102         x = __xfrm_find_acq_byseq(seq);
1103         spin_unlock_bh(&xfrm_state_lock);
1104         return x;
1105 }
1106 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1107
1108 u32 xfrm_get_acqseq(void)
1109 {
1110         u32 res;
1111         static u32 acqseq;
1112         static DEFINE_SPINLOCK(acqseq_lock);
1113
1114         spin_lock_bh(&acqseq_lock);
1115         res = (++acqseq ? : ++acqseq);
1116         spin_unlock_bh(&acqseq_lock);
1117         return res;
1118 }
1119 EXPORT_SYMBOL(xfrm_get_acqseq);
1120
1121 void
1122 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1123 {
1124         unsigned int h;
1125         struct xfrm_state *x0;
1126
1127         if (x->id.spi)
1128                 return;
1129
1130         if (minspi == maxspi) {
1131                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1132                 if (x0) {
1133                         xfrm_state_put(x0);
1134                         return;
1135                 }
1136                 x->id.spi = minspi;
1137         } else {
1138                 u32 spi = 0;
1139                 minspi = ntohl(minspi);
1140                 maxspi = ntohl(maxspi);
1141                 for (h=0; h<maxspi-minspi+1; h++) {
1142                         spi = minspi + net_random()%(maxspi-minspi+1);
1143                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1144                         if (x0 == NULL) {
1145                                 x->id.spi = htonl(spi);
1146                                 break;
1147                         }
1148                         xfrm_state_put(x0);
1149                 }
1150         }
1151         if (x->id.spi) {
1152                 spin_lock_bh(&xfrm_state_lock);
1153                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1154                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1155                 spin_unlock_bh(&xfrm_state_lock);
1156                 wake_up(&km_waitq);
1157         }
1158 }
1159 EXPORT_SYMBOL(xfrm_alloc_spi);
1160
1161 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1162                     void *data)
1163 {
1164         int i;
1165         struct xfrm_state *x;
1166         struct hlist_node *entry;
1167         int count = 0;
1168         int err = 0;
1169
1170         spin_lock_bh(&xfrm_state_lock);
1171         for (i = 0; i <= xfrm_state_hmask; i++) {
1172                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1173                         if (xfrm_id_proto_match(x->id.proto, proto))
1174                                 count++;
1175                 }
1176         }
1177         if (count == 0) {
1178                 err = -ENOENT;
1179                 goto out;
1180         }
1181
1182         for (i = 0; i <= xfrm_state_hmask; i++) {
1183                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1184                         if (!xfrm_id_proto_match(x->id.proto, proto))
1185                                 continue;
1186                         err = func(x, --count, data);
1187                         if (err)
1188                                 goto out;
1189                 }
1190         }
1191 out:
1192         spin_unlock_bh(&xfrm_state_lock);
1193         return err;
1194 }
1195 EXPORT_SYMBOL(xfrm_state_walk);
1196
1197
1198 void xfrm_replay_notify(struct xfrm_state *x, int event)
1199 {
1200         struct km_event c;
1201         /* we send notify messages in case
1202          *  1. we updated on of the sequence numbers, and the seqno difference
1203          *     is at least x->replay_maxdiff, in this case we also update the
1204          *     timeout of our timer function
1205          *  2. if x->replay_maxage has elapsed since last update,
1206          *     and there were changes
1207          *
1208          *  The state structure must be locked!
1209          */
1210
1211         switch (event) {
1212         case XFRM_REPLAY_UPDATE:
1213                 if (x->replay_maxdiff &&
1214                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1215                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1216                         if (x->xflags & XFRM_TIME_DEFER)
1217                                 event = XFRM_REPLAY_TIMEOUT;
1218                         else
1219                                 return;
1220                 }
1221
1222                 break;
1223
1224         case XFRM_REPLAY_TIMEOUT:
1225                 if ((x->replay.seq == x->preplay.seq) &&
1226                     (x->replay.bitmap == x->preplay.bitmap) &&
1227                     (x->replay.oseq == x->preplay.oseq)) {
1228                         x->xflags |= XFRM_TIME_DEFER;
1229                         return;
1230                 }
1231
1232                 break;
1233         }
1234
1235         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1236         c.event = XFRM_MSG_NEWAE;
1237         c.data.aevent = event;
1238         km_state_notify(x, &c);
1239
1240         if (x->replay_maxage &&
1241             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1242                 x->xflags &= ~XFRM_TIME_DEFER;
1243 }
1244 EXPORT_SYMBOL(xfrm_replay_notify);
1245
1246 static void xfrm_replay_timer_handler(unsigned long data)
1247 {
1248         struct xfrm_state *x = (struct xfrm_state*)data;
1249
1250         spin_lock(&x->lock);
1251
1252         if (x->km.state == XFRM_STATE_VALID) {
1253                 if (xfrm_aevent_is_on())
1254                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1255                 else
1256                         x->xflags |= XFRM_TIME_DEFER;
1257         }
1258
1259         spin_unlock(&x->lock);
1260 }
1261
1262 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1263 {
1264         u32 diff;
1265
1266         seq = ntohl(seq);
1267
1268         if (unlikely(seq == 0))
1269                 return -EINVAL;
1270
1271         if (likely(seq > x->replay.seq))
1272                 return 0;
1273
1274         diff = x->replay.seq - seq;
1275         if (diff >= x->props.replay_window) {
1276                 x->stats.replay_window++;
1277                 return -EINVAL;
1278         }
1279
1280         if (x->replay.bitmap & (1U << diff)) {
1281                 x->stats.replay++;
1282                 return -EINVAL;
1283         }
1284         return 0;
1285 }
1286 EXPORT_SYMBOL(xfrm_replay_check);
1287
1288 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1289 {
1290         u32 diff;
1291
1292         seq = ntohl(seq);
1293
1294         if (seq > x->replay.seq) {
1295                 diff = seq - x->replay.seq;
1296                 if (diff < x->props.replay_window)
1297                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1298                 else
1299                         x->replay.bitmap = 1;
1300                 x->replay.seq = seq;
1301         } else {
1302                 diff = x->replay.seq - seq;
1303                 x->replay.bitmap |= (1U << diff);
1304         }
1305
1306         if (xfrm_aevent_is_on())
1307                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1308 }
1309 EXPORT_SYMBOL(xfrm_replay_advance);
1310
1311 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1312 static DEFINE_RWLOCK(xfrm_km_lock);
1313
1314 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1315 {
1316         struct xfrm_mgr *km;
1317
1318         read_lock(&xfrm_km_lock);
1319         list_for_each_entry(km, &xfrm_km_list, list)
1320                 if (km->notify_policy)
1321                         km->notify_policy(xp, dir, c);
1322         read_unlock(&xfrm_km_lock);
1323 }
1324
1325 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1326 {
1327         struct xfrm_mgr *km;
1328         read_lock(&xfrm_km_lock);
1329         list_for_each_entry(km, &xfrm_km_list, list)
1330                 if (km->notify)
1331                         km->notify(x, c);
1332         read_unlock(&xfrm_km_lock);
1333 }
1334
1335 EXPORT_SYMBOL(km_policy_notify);
1336 EXPORT_SYMBOL(km_state_notify);
1337
1338 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1339 {
1340         struct km_event c;
1341
1342         c.data.hard = hard;
1343         c.pid = pid;
1344         c.event = XFRM_MSG_EXPIRE;
1345         km_state_notify(x, &c);
1346
1347         if (hard)
1348                 wake_up(&km_waitq);
1349 }
1350
1351 EXPORT_SYMBOL(km_state_expired);
1352 /*
1353  * We send to all registered managers regardless of failure
1354  * We are happy with one success
1355 */
1356 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1357 {
1358         int err = -EINVAL, acqret;
1359         struct xfrm_mgr *km;
1360
1361         read_lock(&xfrm_km_lock);
1362         list_for_each_entry(km, &xfrm_km_list, list) {
1363                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1364                 if (!acqret)
1365                         err = acqret;
1366         }
1367         read_unlock(&xfrm_km_lock);
1368         return err;
1369 }
1370 EXPORT_SYMBOL(km_query);
1371
1372 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1373 {
1374         int err = -EINVAL;
1375         struct xfrm_mgr *km;
1376
1377         read_lock(&xfrm_km_lock);
1378         list_for_each_entry(km, &xfrm_km_list, list) {
1379                 if (km->new_mapping)
1380                         err = km->new_mapping(x, ipaddr, sport);
1381                 if (!err)
1382                         break;
1383         }
1384         read_unlock(&xfrm_km_lock);
1385         return err;
1386 }
1387 EXPORT_SYMBOL(km_new_mapping);
1388
1389 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1390 {
1391         struct km_event c;
1392
1393         c.data.hard = hard;
1394         c.pid = pid;
1395         c.event = XFRM_MSG_POLEXPIRE;
1396         km_policy_notify(pol, dir, &c);
1397
1398         if (hard)
1399                 wake_up(&km_waitq);
1400 }
1401 EXPORT_SYMBOL(km_policy_expired);
1402
1403 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1404 {
1405         int err = -EINVAL;
1406         int ret;
1407         struct xfrm_mgr *km;
1408
1409         read_lock(&xfrm_km_lock);
1410         list_for_each_entry(km, &xfrm_km_list, list) {
1411                 if (km->report) {
1412                         ret = km->report(proto, sel, addr);
1413                         if (!ret)
1414                                 err = ret;
1415                 }
1416         }
1417         read_unlock(&xfrm_km_lock);
1418         return err;
1419 }
1420 EXPORT_SYMBOL(km_report);
1421
1422 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1423 {
1424         int err;
1425         u8 *data;
1426         struct xfrm_mgr *km;
1427         struct xfrm_policy *pol = NULL;
1428
1429         if (optlen <= 0 || optlen > PAGE_SIZE)
1430                 return -EMSGSIZE;
1431
1432         data = kmalloc(optlen, GFP_KERNEL);
1433         if (!data)
1434                 return -ENOMEM;
1435
1436         err = -EFAULT;
1437         if (copy_from_user(data, optval, optlen))
1438                 goto out;
1439
1440         err = -EINVAL;
1441         read_lock(&xfrm_km_lock);
1442         list_for_each_entry(km, &xfrm_km_list, list) {
1443                 pol = km->compile_policy(sk, optname, data,
1444                                          optlen, &err);
1445                 if (err >= 0)
1446                         break;
1447         }
1448         read_unlock(&xfrm_km_lock);
1449
1450         if (err >= 0) {
1451                 xfrm_sk_policy_insert(sk, err, pol);
1452                 xfrm_pol_put(pol);
1453                 err = 0;
1454         }
1455
1456 out:
1457         kfree(data);
1458         return err;
1459 }
1460 EXPORT_SYMBOL(xfrm_user_policy);
1461
1462 int xfrm_register_km(struct xfrm_mgr *km)
1463 {
1464         write_lock_bh(&xfrm_km_lock);
1465         list_add_tail(&km->list, &xfrm_km_list);
1466         write_unlock_bh(&xfrm_km_lock);
1467         return 0;
1468 }
1469 EXPORT_SYMBOL(xfrm_register_km);
1470
1471 int xfrm_unregister_km(struct xfrm_mgr *km)
1472 {
1473         write_lock_bh(&xfrm_km_lock);
1474         list_del(&km->list);
1475         write_unlock_bh(&xfrm_km_lock);
1476         return 0;
1477 }
1478 EXPORT_SYMBOL(xfrm_unregister_km);
1479
1480 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1481 {
1482         int err = 0;
1483         if (unlikely(afinfo == NULL))
1484                 return -EINVAL;
1485         if (unlikely(afinfo->family >= NPROTO))
1486                 return -EAFNOSUPPORT;
1487         write_lock_bh(&xfrm_state_afinfo_lock);
1488         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1489                 err = -ENOBUFS;
1490         else
1491                 xfrm_state_afinfo[afinfo->family] = afinfo;
1492         write_unlock_bh(&xfrm_state_afinfo_lock);
1493         return err;
1494 }
1495 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1496
1497 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1498 {
1499         int err = 0;
1500         if (unlikely(afinfo == NULL))
1501                 return -EINVAL;
1502         if (unlikely(afinfo->family >= NPROTO))
1503                 return -EAFNOSUPPORT;
1504         write_lock_bh(&xfrm_state_afinfo_lock);
1505         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1506                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1507                         err = -EINVAL;
1508                 else
1509                         xfrm_state_afinfo[afinfo->family] = NULL;
1510         }
1511         write_unlock_bh(&xfrm_state_afinfo_lock);
1512         return err;
1513 }
1514 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1515
1516 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1517 {
1518         struct xfrm_state_afinfo *afinfo;
1519         if (unlikely(family >= NPROTO))
1520                 return NULL;
1521         read_lock(&xfrm_state_afinfo_lock);
1522         afinfo = xfrm_state_afinfo[family];
1523         if (unlikely(!afinfo))
1524                 read_unlock(&xfrm_state_afinfo_lock);
1525         return afinfo;
1526 }
1527
1528 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1529 {
1530         read_unlock(&xfrm_state_afinfo_lock);
1531 }
1532
1533 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1534 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1535 {
1536         if (x->tunnel) {
1537                 struct xfrm_state *t = x->tunnel;
1538
1539                 if (atomic_read(&t->tunnel_users) == 2)
1540                         xfrm_state_delete(t);
1541                 atomic_dec(&t->tunnel_users);
1542                 xfrm_state_put(t);
1543                 x->tunnel = NULL;
1544         }
1545 }
1546 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1547
1548 /*
1549  * This function is NOT optimal.  For example, with ESP it will give an
1550  * MTU that's usually two bytes short of being optimal.  However, it will
1551  * usually give an answer that's a multiple of 4 provided the input is
1552  * also a multiple of 4.
1553  */
1554 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1555 {
1556         int res = mtu;
1557
1558         res -= x->props.header_len;
1559
1560         for (;;) {
1561                 int m = res;
1562
1563                 if (m < 68)
1564                         return 68;
1565
1566                 spin_lock_bh(&x->lock);
1567                 if (x->km.state == XFRM_STATE_VALID &&
1568                     x->type && x->type->get_max_size)
1569                         m = x->type->get_max_size(x, m);
1570                 else
1571                         m += x->props.header_len;
1572                 spin_unlock_bh(&x->lock);
1573
1574                 if (m <= mtu)
1575                         break;
1576                 res -= (m - mtu);
1577         }
1578
1579         return res;
1580 }
1581
1582 int xfrm_init_state(struct xfrm_state *x)
1583 {
1584         struct xfrm_state_afinfo *afinfo;
1585         int family = x->props.family;
1586         int err;
1587
1588         err = -EAFNOSUPPORT;
1589         afinfo = xfrm_state_get_afinfo(family);
1590         if (!afinfo)
1591                 goto error;
1592
1593         err = 0;
1594         if (afinfo->init_flags)
1595                 err = afinfo->init_flags(x);
1596
1597         xfrm_state_put_afinfo(afinfo);
1598
1599         if (err)
1600                 goto error;
1601
1602         err = -EPROTONOSUPPORT;
1603         x->type = xfrm_get_type(x->id.proto, family);
1604         if (x->type == NULL)
1605                 goto error;
1606
1607         err = x->type->init_state(x);
1608         if (err)
1609                 goto error;
1610
1611         x->mode = xfrm_get_mode(x->props.mode, family);
1612         if (x->mode == NULL)
1613                 goto error;
1614
1615         x->km.state = XFRM_STATE_VALID;
1616
1617 error:
1618         return err;
1619 }
1620
1621 EXPORT_SYMBOL(xfrm_init_state);
1622  
1623 void __init xfrm_state_init(void)
1624 {
1625         unsigned int sz;
1626
1627         sz = sizeof(struct hlist_head) * 8;
1628
1629         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1630         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1631         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1632         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1633                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1634         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1635
1636         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1637 }
1638