[XFRM]: Do not flush all bundles on SA insert.
[linux-2.6-microblaze.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
59 {
60         return ntohl(addr->a4);
61 }
62
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
64 {
65         return ntohl(addr->a6[2]^addr->a6[3]);
66 }
67
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69                                            u32 reqid, unsigned short family,
70                                            unsigned int hmask)
71 {
72         unsigned int h = family ^ reqid;
73         switch (family) {
74         case AF_INET:
75                 h ^= __xfrm4_addr_hash(addr);
76                 break;
77         case AF_INET6:
78                 h ^= __xfrm6_addr_hash(addr);
79                 break;
80         };
81         return (h ^ (h >> 16)) & hmask;
82 }
83
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85                                          unsigned short family)
86 {
87         return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
88 }
89
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
91                                        unsigned int hmask)
92 {
93         unsigned int h = family;
94         switch (family) {
95         case AF_INET:
96                 h ^= __xfrm4_addr_hash(addr);
97                 break;
98         case AF_INET6:
99                 h ^= __xfrm6_addr_hash(addr);
100                 break;
101         };
102         return (h ^ (h >> 16)) & hmask;
103 }
104
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
106 {
107         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
108 }
109
110 static inline unsigned int
111 __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
112                 unsigned int hmask)
113 {
114         unsigned int h = spi ^ proto;
115         switch (family) {
116         case AF_INET:
117                 h ^= __xfrm4_addr_hash(addr);
118                 break;
119         case AF_INET6:
120                 h ^= __xfrm6_addr_hash(addr);
121                 break;
122         }
123         return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
124 }
125
126 static inline unsigned int
127 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
128 {
129         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
130 }
131
132 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
133 {
134         struct hlist_head *n;
135
136         if (sz <= PAGE_SIZE)
137                 n = kmalloc(sz, GFP_KERNEL);
138         else if (hashdist)
139                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
140         else
141                 n = (struct hlist_head *)
142                         __get_free_pages(GFP_KERNEL, get_order(sz));
143
144         if (n)
145                 memset(n, 0, sz);
146
147         return n;
148 }
149
150 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
151 {
152         if (sz <= PAGE_SIZE)
153                 kfree(n);
154         else if (hashdist)
155                 vfree(n);
156         else
157                 free_pages((unsigned long)n, get_order(sz));
158 }
159
160 static void xfrm_hash_transfer(struct hlist_head *list,
161                                struct hlist_head *ndsttable,
162                                struct hlist_head *nsrctable,
163                                struct hlist_head *nspitable,
164                                unsigned int nhashmask)
165 {
166         struct hlist_node *entry, *tmp;
167         struct xfrm_state *x;
168
169         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
170                 unsigned int h;
171
172                 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
173                                     x->props.family, nhashmask);
174                 hlist_add_head(&x->bydst, ndsttable+h);
175
176                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
177                                     nhashmask);
178                 hlist_add_head(&x->bysrc, nsrctable+h);
179
180                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
181                                     x->props.family, nhashmask);
182                 hlist_add_head(&x->byspi, nspitable+h);
183         }
184 }
185
186 static unsigned long xfrm_hash_new_size(void)
187 {
188         return ((xfrm_state_hmask + 1) << 1) *
189                 sizeof(struct hlist_head);
190 }
191
192 static DEFINE_MUTEX(hash_resize_mutex);
193
194 static void xfrm_hash_resize(void *__unused)
195 {
196         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
197         unsigned long nsize, osize;
198         unsigned int nhashmask, ohashmask;
199         int i;
200
201         mutex_lock(&hash_resize_mutex);
202
203         nsize = xfrm_hash_new_size();
204         ndst = xfrm_state_hash_alloc(nsize);
205         if (!ndst)
206                 goto out_unlock;
207         nsrc = xfrm_state_hash_alloc(nsize);
208         if (!nsrc) {
209                 xfrm_state_hash_free(ndst, nsize);
210                 goto out_unlock;
211         }
212         nspi = xfrm_state_hash_alloc(nsize);
213         if (!nspi) {
214                 xfrm_state_hash_free(ndst, nsize);
215                 xfrm_state_hash_free(nsrc, nsize);
216                 goto out_unlock;
217         }
218
219         spin_lock_bh(&xfrm_state_lock);
220
221         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
222         for (i = xfrm_state_hmask; i >= 0; i--)
223                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
224                                    nhashmask);
225
226         odst = xfrm_state_bydst;
227         osrc = xfrm_state_bysrc;
228         ospi = xfrm_state_byspi;
229         ohashmask = xfrm_state_hmask;
230
231         xfrm_state_bydst = ndst;
232         xfrm_state_bysrc = nsrc;
233         xfrm_state_byspi = nspi;
234         xfrm_state_hmask = nhashmask;
235
236         spin_unlock_bh(&xfrm_state_lock);
237
238         osize = (ohashmask + 1) * sizeof(struct hlist_head);
239         xfrm_state_hash_free(odst, osize);
240         xfrm_state_hash_free(osrc, osize);
241         xfrm_state_hash_free(ospi, osize);
242
243 out_unlock:
244         mutex_unlock(&hash_resize_mutex);
245 }
246
247 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
248
249 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
250 EXPORT_SYMBOL(km_waitq);
251
252 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
253 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
254
255 static struct work_struct xfrm_state_gc_work;
256 static HLIST_HEAD(xfrm_state_gc_list);
257 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
258
259 static int xfrm_state_gc_flush_bundles;
260
261 int __xfrm_state_delete(struct xfrm_state *x);
262
263 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
264 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
265
266 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
267 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
268
269 static void xfrm_state_gc_destroy(struct xfrm_state *x)
270 {
271         if (del_timer(&x->timer))
272                 BUG();
273         if (del_timer(&x->rtimer))
274                 BUG();
275         kfree(x->aalg);
276         kfree(x->ealg);
277         kfree(x->calg);
278         kfree(x->encap);
279         kfree(x->coaddr);
280         if (x->mode)
281                 xfrm_put_mode(x->mode);
282         if (x->type) {
283                 x->type->destructor(x);
284                 xfrm_put_type(x->type);
285         }
286         security_xfrm_state_free(x);
287         kfree(x);
288 }
289
290 static void xfrm_state_gc_task(void *data)
291 {
292         struct xfrm_state *x;
293         struct hlist_node *entry, *tmp;
294         struct hlist_head gc_list;
295
296         if (xfrm_state_gc_flush_bundles) {
297                 xfrm_state_gc_flush_bundles = 0;
298                 xfrm_flush_bundles();
299         }
300
301         spin_lock_bh(&xfrm_state_gc_lock);
302         gc_list.first = xfrm_state_gc_list.first;
303         INIT_HLIST_HEAD(&xfrm_state_gc_list);
304         spin_unlock_bh(&xfrm_state_gc_lock);
305
306         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
307                 xfrm_state_gc_destroy(x);
308
309         wake_up(&km_waitq);
310 }
311
312 static inline unsigned long make_jiffies(long secs)
313 {
314         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
315                 return MAX_SCHEDULE_TIMEOUT-1;
316         else
317                 return secs*HZ;
318 }
319
320 static void xfrm_timer_handler(unsigned long data)
321 {
322         struct xfrm_state *x = (struct xfrm_state*)data;
323         unsigned long now = (unsigned long)xtime.tv_sec;
324         long next = LONG_MAX;
325         int warn = 0;
326
327         spin_lock(&x->lock);
328         if (x->km.state == XFRM_STATE_DEAD)
329                 goto out;
330         if (x->km.state == XFRM_STATE_EXPIRED)
331                 goto expired;
332         if (x->lft.hard_add_expires_seconds) {
333                 long tmo = x->lft.hard_add_expires_seconds +
334                         x->curlft.add_time - now;
335                 if (tmo <= 0)
336                         goto expired;
337                 if (tmo < next)
338                         next = tmo;
339         }
340         if (x->lft.hard_use_expires_seconds) {
341                 long tmo = x->lft.hard_use_expires_seconds +
342                         (x->curlft.use_time ? : now) - now;
343                 if (tmo <= 0)
344                         goto expired;
345                 if (tmo < next)
346                         next = tmo;
347         }
348         if (x->km.dying)
349                 goto resched;
350         if (x->lft.soft_add_expires_seconds) {
351                 long tmo = x->lft.soft_add_expires_seconds +
352                         x->curlft.add_time - now;
353                 if (tmo <= 0)
354                         warn = 1;
355                 else if (tmo < next)
356                         next = tmo;
357         }
358         if (x->lft.soft_use_expires_seconds) {
359                 long tmo = x->lft.soft_use_expires_seconds +
360                         (x->curlft.use_time ? : now) - now;
361                 if (tmo <= 0)
362                         warn = 1;
363                 else if (tmo < next)
364                         next = tmo;
365         }
366
367         x->km.dying = warn;
368         if (warn)
369                 km_state_expired(x, 0, 0);
370 resched:
371         if (next != LONG_MAX &&
372             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
373                 xfrm_state_hold(x);
374         goto out;
375
376 expired:
377         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
378                 x->km.state = XFRM_STATE_EXPIRED;
379                 wake_up(&km_waitq);
380                 next = 2;
381                 goto resched;
382         }
383         if (!__xfrm_state_delete(x) && x->id.spi)
384                 km_state_expired(x, 1, 0);
385
386 out:
387         spin_unlock(&x->lock);
388         xfrm_state_put(x);
389 }
390
391 static void xfrm_replay_timer_handler(unsigned long data);
392
393 struct xfrm_state *xfrm_state_alloc(void)
394 {
395         struct xfrm_state *x;
396
397         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
398
399         if (x) {
400                 atomic_set(&x->refcnt, 1);
401                 atomic_set(&x->tunnel_users, 0);
402                 INIT_HLIST_NODE(&x->bydst);
403                 INIT_HLIST_NODE(&x->bysrc);
404                 INIT_HLIST_NODE(&x->byspi);
405                 init_timer(&x->timer);
406                 x->timer.function = xfrm_timer_handler;
407                 x->timer.data     = (unsigned long)x;
408                 init_timer(&x->rtimer);
409                 x->rtimer.function = xfrm_replay_timer_handler;
410                 x->rtimer.data     = (unsigned long)x;
411                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
412                 x->lft.soft_byte_limit = XFRM_INF;
413                 x->lft.soft_packet_limit = XFRM_INF;
414                 x->lft.hard_byte_limit = XFRM_INF;
415                 x->lft.hard_packet_limit = XFRM_INF;
416                 x->replay_maxage = 0;
417                 x->replay_maxdiff = 0;
418                 spin_lock_init(&x->lock);
419         }
420         return x;
421 }
422 EXPORT_SYMBOL(xfrm_state_alloc);
423
424 void __xfrm_state_destroy(struct xfrm_state *x)
425 {
426         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
427
428         spin_lock_bh(&xfrm_state_gc_lock);
429         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
430         spin_unlock_bh(&xfrm_state_gc_lock);
431         schedule_work(&xfrm_state_gc_work);
432 }
433 EXPORT_SYMBOL(__xfrm_state_destroy);
434
435 int __xfrm_state_delete(struct xfrm_state *x)
436 {
437         int err = -ESRCH;
438
439         if (x->km.state != XFRM_STATE_DEAD) {
440                 x->km.state = XFRM_STATE_DEAD;
441                 spin_lock(&xfrm_state_lock);
442                 hlist_del(&x->bydst);
443                 __xfrm_state_put(x);
444                 hlist_del(&x->bysrc);
445                 __xfrm_state_put(x);
446                 if (x->id.spi) {
447                         hlist_del(&x->byspi);
448                         __xfrm_state_put(x);
449                 }
450                 xfrm_state_num--;
451                 spin_unlock(&xfrm_state_lock);
452                 if (del_timer(&x->timer))
453                         __xfrm_state_put(x);
454                 if (del_timer(&x->rtimer))
455                         __xfrm_state_put(x);
456
457                 /* The number two in this test is the reference
458                  * mentioned in the comment below plus the reference
459                  * our caller holds.  A larger value means that
460                  * there are DSTs attached to this xfrm_state.
461                  */
462                 if (atomic_read(&x->refcnt) > 2) {
463                         xfrm_state_gc_flush_bundles = 1;
464                         schedule_work(&xfrm_state_gc_work);
465                 }
466
467                 /* All xfrm_state objects are created by xfrm_state_alloc.
468                  * The xfrm_state_alloc call gives a reference, and that
469                  * is what we are dropping here.
470                  */
471                 __xfrm_state_put(x);
472                 err = 0;
473         }
474
475         return err;
476 }
477 EXPORT_SYMBOL(__xfrm_state_delete);
478
479 int xfrm_state_delete(struct xfrm_state *x)
480 {
481         int err;
482
483         spin_lock_bh(&x->lock);
484         err = __xfrm_state_delete(x);
485         spin_unlock_bh(&x->lock);
486
487         return err;
488 }
489 EXPORT_SYMBOL(xfrm_state_delete);
490
491 void xfrm_state_flush(u8 proto)
492 {
493         int i;
494
495         spin_lock_bh(&xfrm_state_lock);
496         for (i = 0; i < xfrm_state_hmask; i++) {
497                 struct hlist_node *entry;
498                 struct xfrm_state *x;
499 restart:
500                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
501                         if (!xfrm_state_kern(x) &&
502                             xfrm_id_proto_match(x->id.proto, proto)) {
503                                 xfrm_state_hold(x);
504                                 spin_unlock_bh(&xfrm_state_lock);
505
506                                 xfrm_state_delete(x);
507                                 xfrm_state_put(x);
508
509                                 spin_lock_bh(&xfrm_state_lock);
510                                 goto restart;
511                         }
512                 }
513         }
514         spin_unlock_bh(&xfrm_state_lock);
515         wake_up(&km_waitq);
516 }
517 EXPORT_SYMBOL(xfrm_state_flush);
518
519 static int
520 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
521                   struct xfrm_tmpl *tmpl,
522                   xfrm_address_t *daddr, xfrm_address_t *saddr,
523                   unsigned short family)
524 {
525         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
526         if (!afinfo)
527                 return -1;
528         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
529         xfrm_state_put_afinfo(afinfo);
530         return 0;
531 }
532
533 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
534 {
535         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
536         struct xfrm_state *x;
537         struct hlist_node *entry;
538
539         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
540                 if (x->props.family != family ||
541                     x->id.spi       != spi ||
542                     x->id.proto     != proto)
543                         continue;
544
545                 switch (family) {
546                 case AF_INET:
547                         if (x->id.daddr.a4 != daddr->a4)
548                                 continue;
549                         break;
550                 case AF_INET6:
551                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
552                                              (struct in6_addr *)
553                                              x->id.daddr.a6))
554                                 continue;
555                         break;
556                 };
557
558                 xfrm_state_hold(x);
559                 return x;
560         }
561
562         return NULL;
563 }
564
565 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
566 {
567         unsigned int h = xfrm_src_hash(saddr, family);
568         struct xfrm_state *x;
569         struct hlist_node *entry;
570
571         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
572                 if (x->props.family != family ||
573                     x->id.proto     != proto)
574                         continue;
575
576                 switch (family) {
577                 case AF_INET:
578                         if (x->id.daddr.a4 != daddr->a4 ||
579                             x->props.saddr.a4 != saddr->a4)
580                                 continue;
581                         break;
582                 case AF_INET6:
583                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
584                                              (struct in6_addr *)
585                                              x->id.daddr.a6) ||
586                             !ipv6_addr_equal((struct in6_addr *)saddr,
587                                              (struct in6_addr *)
588                                              x->props.saddr.a6))
589                                 continue;
590                         break;
591                 };
592
593                 xfrm_state_hold(x);
594                 return x;
595         }
596
597         return NULL;
598 }
599
600 static inline struct xfrm_state *
601 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
602 {
603         if (use_spi)
604                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
605                                            x->id.proto, family);
606         else
607                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
608                                                   &x->props.saddr,
609                                                   x->id.proto, family);
610 }
611
612 struct xfrm_state *
613 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
614                 struct flowi *fl, struct xfrm_tmpl *tmpl,
615                 struct xfrm_policy *pol, int *err,
616                 unsigned short family)
617 {
618         unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
619         struct hlist_node *entry;
620         struct xfrm_state *x, *x0;
621         int acquire_in_progress = 0;
622         int error = 0;
623         struct xfrm_state *best = NULL;
624         
625         spin_lock_bh(&xfrm_state_lock);
626         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
627                 if (x->props.family == family &&
628                     x->props.reqid == tmpl->reqid &&
629                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
630                     xfrm_state_addr_check(x, daddr, saddr, family) &&
631                     tmpl->mode == x->props.mode &&
632                     tmpl->id.proto == x->id.proto &&
633                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
634                         /* Resolution logic:
635                            1. There is a valid state with matching selector.
636                               Done.
637                            2. Valid state with inappropriate selector. Skip.
638
639                            Entering area of "sysdeps".
640
641                            3. If state is not valid, selector is temporary,
642                               it selects only session which triggered
643                               previous resolution. Key manager will do
644                               something to install a state with proper
645                               selector.
646                          */
647                         if (x->km.state == XFRM_STATE_VALID) {
648                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
649                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
650                                         continue;
651                                 if (!best ||
652                                     best->km.dying > x->km.dying ||
653                                     (best->km.dying == x->km.dying &&
654                                      best->curlft.add_time < x->curlft.add_time))
655                                         best = x;
656                         } else if (x->km.state == XFRM_STATE_ACQ) {
657                                 acquire_in_progress = 1;
658                         } else if (x->km.state == XFRM_STATE_ERROR ||
659                                    x->km.state == XFRM_STATE_EXPIRED) {
660                                 if (xfrm_selector_match(&x->sel, fl, family) &&
661                                     security_xfrm_state_pol_flow_match(x, pol, fl))
662                                         error = -ESRCH;
663                         }
664                 }
665         }
666
667         x = best;
668         if (!x && !error && !acquire_in_progress) {
669                 if (tmpl->id.spi &&
670                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
671                                               tmpl->id.proto, family)) != NULL) {
672                         xfrm_state_put(x0);
673                         error = -EEXIST;
674                         goto out;
675                 }
676                 x = xfrm_state_alloc();
677                 if (x == NULL) {
678                         error = -ENOMEM;
679                         goto out;
680                 }
681                 /* Initialize temporary selector matching only
682                  * to current session. */
683                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
684
685                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
686                 if (error) {
687                         x->km.state = XFRM_STATE_DEAD;
688                         xfrm_state_put(x);
689                         x = NULL;
690                         goto out;
691                 }
692
693                 if (km_query(x, tmpl, pol) == 0) {
694                         x->km.state = XFRM_STATE_ACQ;
695                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
696                         xfrm_state_hold(x);
697                         h = xfrm_src_hash(saddr, family);
698                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
699                         xfrm_state_hold(x);
700                         if (x->id.spi) {
701                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
702                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
703                                 xfrm_state_hold(x);
704                         }
705                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
706                         xfrm_state_hold(x);
707                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
708                         add_timer(&x->timer);
709                 } else {
710                         x->km.state = XFRM_STATE_DEAD;
711                         xfrm_state_put(x);
712                         x = NULL;
713                         error = -ESRCH;
714                 }
715         }
716 out:
717         if (x)
718                 xfrm_state_hold(x);
719         else
720                 *err = acquire_in_progress ? -EAGAIN : error;
721         spin_unlock_bh(&xfrm_state_lock);
722         return x;
723 }
724
725 static void __xfrm_state_insert(struct xfrm_state *x)
726 {
727         unsigned int h;
728
729         x->genid = ++xfrm_state_genid;
730
731         h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
732         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
733         xfrm_state_hold(x);
734
735         h = xfrm_src_hash(&x->props.saddr, x->props.family);
736         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
737         xfrm_state_hold(x);
738
739         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
740                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
741                                   x->props.family);
742
743                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
744                 xfrm_state_hold(x);
745         }
746
747         if (!mod_timer(&x->timer, jiffies + HZ))
748                 xfrm_state_hold(x);
749
750         if (x->replay_maxage &&
751             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
752                 xfrm_state_hold(x);
753
754         wake_up(&km_waitq);
755
756         xfrm_state_num++;
757
758         if (x->bydst.next != NULL &&
759             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
760             xfrm_state_num > xfrm_state_hmask)
761                 schedule_work(&xfrm_hash_work);
762 }
763
764 /* xfrm_state_lock is held */
765 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
766 {
767         unsigned short family = xnew->props.family;
768         u32 reqid = xnew->props.reqid;
769         struct xfrm_state *x;
770         struct hlist_node *entry;
771         unsigned int h;
772
773         h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
774         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
775                 if (x->props.family     == family &&
776                     x->props.reqid      == reqid &&
777                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
778                         x->genid = xfrm_state_genid;
779         }
780 }
781
782 void xfrm_state_insert(struct xfrm_state *x)
783 {
784         spin_lock_bh(&xfrm_state_lock);
785         __xfrm_state_bump_genids(x);
786         __xfrm_state_insert(x);
787         spin_unlock_bh(&xfrm_state_lock);
788 }
789 EXPORT_SYMBOL(xfrm_state_insert);
790
791 /* xfrm_state_lock is held */
792 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
793 {
794         unsigned int h = xfrm_dst_hash(daddr, reqid, family);
795         struct hlist_node *entry;
796         struct xfrm_state *x;
797
798         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
799                 if (x->props.reqid  != reqid ||
800                     x->props.mode   != mode ||
801                     x->props.family != family ||
802                     x->km.state     != XFRM_STATE_ACQ ||
803                     x->id.spi       != 0)
804                         continue;
805
806                 switch (family) {
807                 case AF_INET:
808                         if (x->id.daddr.a4    != daddr->a4 ||
809                             x->props.saddr.a4 != saddr->a4)
810                                 continue;
811                         break;
812                 case AF_INET6:
813                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
814                                              (struct in6_addr *)daddr) ||
815                             !ipv6_addr_equal((struct in6_addr *)
816                                              x->props.saddr.a6,
817                                              (struct in6_addr *)saddr))
818                                 continue;
819                         break;
820                 };
821
822                 xfrm_state_hold(x);
823                 return x;
824         }
825
826         if (!create)
827                 return NULL;
828
829         x = xfrm_state_alloc();
830         if (likely(x)) {
831                 switch (family) {
832                 case AF_INET:
833                         x->sel.daddr.a4 = daddr->a4;
834                         x->sel.saddr.a4 = saddr->a4;
835                         x->sel.prefixlen_d = 32;
836                         x->sel.prefixlen_s = 32;
837                         x->props.saddr.a4 = saddr->a4;
838                         x->id.daddr.a4 = daddr->a4;
839                         break;
840
841                 case AF_INET6:
842                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
843                                        (struct in6_addr *)daddr);
844                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
845                                        (struct in6_addr *)saddr);
846                         x->sel.prefixlen_d = 128;
847                         x->sel.prefixlen_s = 128;
848                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
849                                        (struct in6_addr *)saddr);
850                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
851                                        (struct in6_addr *)daddr);
852                         break;
853                 };
854
855                 x->km.state = XFRM_STATE_ACQ;
856                 x->id.proto = proto;
857                 x->props.family = family;
858                 x->props.mode = mode;
859                 x->props.reqid = reqid;
860                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
861                 xfrm_state_hold(x);
862                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
863                 add_timer(&x->timer);
864                 xfrm_state_hold(x);
865                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
866                 h = xfrm_src_hash(saddr, family);
867                 xfrm_state_hold(x);
868                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
869                 wake_up(&km_waitq);
870         }
871
872         return x;
873 }
874
875 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
876
877 int xfrm_state_add(struct xfrm_state *x)
878 {
879         struct xfrm_state *x1;
880         int family;
881         int err;
882         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
883
884         family = x->props.family;
885
886         spin_lock_bh(&xfrm_state_lock);
887
888         x1 = __xfrm_state_locate(x, use_spi, family);
889         if (x1) {
890                 xfrm_state_put(x1);
891                 x1 = NULL;
892                 err = -EEXIST;
893                 goto out;
894         }
895
896         if (use_spi && x->km.seq) {
897                 x1 = __xfrm_find_acq_byseq(x->km.seq);
898                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
899                         xfrm_state_put(x1);
900                         x1 = NULL;
901                 }
902         }
903
904         if (use_spi && !x1)
905                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
906                                      x->id.proto,
907                                      &x->id.daddr, &x->props.saddr, 0);
908
909         __xfrm_state_bump_genids(x);
910         __xfrm_state_insert(x);
911         err = 0;
912
913 out:
914         spin_unlock_bh(&xfrm_state_lock);
915
916         if (x1) {
917                 xfrm_state_delete(x1);
918                 xfrm_state_put(x1);
919         }
920
921         return err;
922 }
923 EXPORT_SYMBOL(xfrm_state_add);
924
925 int xfrm_state_update(struct xfrm_state *x)
926 {
927         struct xfrm_state *x1;
928         int err;
929         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
930
931         spin_lock_bh(&xfrm_state_lock);
932         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
933
934         err = -ESRCH;
935         if (!x1)
936                 goto out;
937
938         if (xfrm_state_kern(x1)) {
939                 xfrm_state_put(x1);
940                 err = -EEXIST;
941                 goto out;
942         }
943
944         if (x1->km.state == XFRM_STATE_ACQ) {
945                 __xfrm_state_insert(x);
946                 x = NULL;
947         }
948         err = 0;
949
950 out:
951         spin_unlock_bh(&xfrm_state_lock);
952
953         if (err)
954                 return err;
955
956         if (!x) {
957                 xfrm_state_delete(x1);
958                 xfrm_state_put(x1);
959                 return 0;
960         }
961
962         err = -EINVAL;
963         spin_lock_bh(&x1->lock);
964         if (likely(x1->km.state == XFRM_STATE_VALID)) {
965                 if (x->encap && x1->encap)
966                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
967                 if (x->coaddr && x1->coaddr) {
968                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
969                 }
970                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
971                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
972                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
973                 x1->km.dying = 0;
974
975                 if (!mod_timer(&x1->timer, jiffies + HZ))
976                         xfrm_state_hold(x1);
977                 if (x1->curlft.use_time)
978                         xfrm_state_check_expire(x1);
979
980                 err = 0;
981         }
982         spin_unlock_bh(&x1->lock);
983
984         xfrm_state_put(x1);
985
986         return err;
987 }
988 EXPORT_SYMBOL(xfrm_state_update);
989
990 int xfrm_state_check_expire(struct xfrm_state *x)
991 {
992         if (!x->curlft.use_time)
993                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
994
995         if (x->km.state != XFRM_STATE_VALID)
996                 return -EINVAL;
997
998         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
999             x->curlft.packets >= x->lft.hard_packet_limit) {
1000                 x->km.state = XFRM_STATE_EXPIRED;
1001                 if (!mod_timer(&x->timer, jiffies))
1002                         xfrm_state_hold(x);
1003                 return -EINVAL;
1004         }
1005
1006         if (!x->km.dying &&
1007             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1008              x->curlft.packets >= x->lft.soft_packet_limit)) {
1009                 x->km.dying = 1;
1010                 km_state_expired(x, 0, 0);
1011         }
1012         return 0;
1013 }
1014 EXPORT_SYMBOL(xfrm_state_check_expire);
1015
1016 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1017 {
1018         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1019                 - skb_headroom(skb);
1020
1021         if (nhead > 0)
1022                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1023
1024         /* Check tail too... */
1025         return 0;
1026 }
1027
1028 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1029 {
1030         int err = xfrm_state_check_expire(x);
1031         if (err < 0)
1032                 goto err;
1033         err = xfrm_state_check_space(x, skb);
1034 err:
1035         return err;
1036 }
1037 EXPORT_SYMBOL(xfrm_state_check);
1038
1039 struct xfrm_state *
1040 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1041                   unsigned short family)
1042 {
1043         struct xfrm_state *x;
1044
1045         spin_lock_bh(&xfrm_state_lock);
1046         x = __xfrm_state_lookup(daddr, spi, proto, family);
1047         spin_unlock_bh(&xfrm_state_lock);
1048         return x;
1049 }
1050 EXPORT_SYMBOL(xfrm_state_lookup);
1051
1052 struct xfrm_state *
1053 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1054                          u8 proto, unsigned short family)
1055 {
1056         struct xfrm_state *x;
1057
1058         spin_lock_bh(&xfrm_state_lock);
1059         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1060         spin_unlock_bh(&xfrm_state_lock);
1061         return x;
1062 }
1063 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1064
1065 struct xfrm_state *
1066 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1067               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1068               int create, unsigned short family)
1069 {
1070         struct xfrm_state *x;
1071
1072         spin_lock_bh(&xfrm_state_lock);
1073         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1074         spin_unlock_bh(&xfrm_state_lock);
1075
1076         return x;
1077 }
1078 EXPORT_SYMBOL(xfrm_find_acq);
1079
1080 #ifdef CONFIG_XFRM_SUB_POLICY
1081 int
1082 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1083                unsigned short family)
1084 {
1085         int err = 0;
1086         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1087         if (!afinfo)
1088                 return -EAFNOSUPPORT;
1089
1090         spin_lock_bh(&xfrm_state_lock);
1091         if (afinfo->tmpl_sort)
1092                 err = afinfo->tmpl_sort(dst, src, n);
1093         spin_unlock_bh(&xfrm_state_lock);
1094         xfrm_state_put_afinfo(afinfo);
1095         return err;
1096 }
1097 EXPORT_SYMBOL(xfrm_tmpl_sort);
1098
1099 int
1100 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1101                 unsigned short family)
1102 {
1103         int err = 0;
1104         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1105         if (!afinfo)
1106                 return -EAFNOSUPPORT;
1107
1108         spin_lock_bh(&xfrm_state_lock);
1109         if (afinfo->state_sort)
1110                 err = afinfo->state_sort(dst, src, n);
1111         spin_unlock_bh(&xfrm_state_lock);
1112         xfrm_state_put_afinfo(afinfo);
1113         return err;
1114 }
1115 EXPORT_SYMBOL(xfrm_state_sort);
1116 #endif
1117
1118 /* Silly enough, but I'm lazy to build resolution list */
1119
1120 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1121 {
1122         int i;
1123
1124         for (i = 0; i <= xfrm_state_hmask; i++) {
1125                 struct hlist_node *entry;
1126                 struct xfrm_state *x;
1127
1128                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1129                         if (x->km.seq == seq &&
1130                             x->km.state == XFRM_STATE_ACQ) {
1131                                 xfrm_state_hold(x);
1132                                 return x;
1133                         }
1134                 }
1135         }
1136         return NULL;
1137 }
1138
1139 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1140 {
1141         struct xfrm_state *x;
1142
1143         spin_lock_bh(&xfrm_state_lock);
1144         x = __xfrm_find_acq_byseq(seq);
1145         spin_unlock_bh(&xfrm_state_lock);
1146         return x;
1147 }
1148 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1149
1150 u32 xfrm_get_acqseq(void)
1151 {
1152         u32 res;
1153         static u32 acqseq;
1154         static DEFINE_SPINLOCK(acqseq_lock);
1155
1156         spin_lock_bh(&acqseq_lock);
1157         res = (++acqseq ? : ++acqseq);
1158         spin_unlock_bh(&acqseq_lock);
1159         return res;
1160 }
1161 EXPORT_SYMBOL(xfrm_get_acqseq);
1162
1163 void
1164 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1165 {
1166         unsigned int h;
1167         struct xfrm_state *x0;
1168
1169         if (x->id.spi)
1170                 return;
1171
1172         if (minspi == maxspi) {
1173                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1174                 if (x0) {
1175                         xfrm_state_put(x0);
1176                         return;
1177                 }
1178                 x->id.spi = minspi;
1179         } else {
1180                 u32 spi = 0;
1181                 minspi = ntohl(minspi);
1182                 maxspi = ntohl(maxspi);
1183                 for (h=0; h<maxspi-minspi+1; h++) {
1184                         spi = minspi + net_random()%(maxspi-minspi+1);
1185                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1186                         if (x0 == NULL) {
1187                                 x->id.spi = htonl(spi);
1188                                 break;
1189                         }
1190                         xfrm_state_put(x0);
1191                 }
1192         }
1193         if (x->id.spi) {
1194                 spin_lock_bh(&xfrm_state_lock);
1195                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1196                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1197                 xfrm_state_hold(x);
1198                 spin_unlock_bh(&xfrm_state_lock);
1199                 wake_up(&km_waitq);
1200         }
1201 }
1202 EXPORT_SYMBOL(xfrm_alloc_spi);
1203
1204 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1205                     void *data)
1206 {
1207         int i;
1208         struct xfrm_state *x;
1209         struct hlist_node *entry;
1210         int count = 0;
1211         int err = 0;
1212
1213         spin_lock_bh(&xfrm_state_lock);
1214         for (i = 0; i <= xfrm_state_hmask; i++) {
1215                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1216                         if (xfrm_id_proto_match(x->id.proto, proto))
1217                                 count++;
1218                 }
1219         }
1220         if (count == 0) {
1221                 err = -ENOENT;
1222                 goto out;
1223         }
1224
1225         for (i = 0; i <= xfrm_state_hmask; i++) {
1226                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1227                         if (!xfrm_id_proto_match(x->id.proto, proto))
1228                                 continue;
1229                         err = func(x, --count, data);
1230                         if (err)
1231                                 goto out;
1232                 }
1233         }
1234 out:
1235         spin_unlock_bh(&xfrm_state_lock);
1236         return err;
1237 }
1238 EXPORT_SYMBOL(xfrm_state_walk);
1239
1240
1241 void xfrm_replay_notify(struct xfrm_state *x, int event)
1242 {
1243         struct km_event c;
1244         /* we send notify messages in case
1245          *  1. we updated on of the sequence numbers, and the seqno difference
1246          *     is at least x->replay_maxdiff, in this case we also update the
1247          *     timeout of our timer function
1248          *  2. if x->replay_maxage has elapsed since last update,
1249          *     and there were changes
1250          *
1251          *  The state structure must be locked!
1252          */
1253
1254         switch (event) {
1255         case XFRM_REPLAY_UPDATE:
1256                 if (x->replay_maxdiff &&
1257                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1258                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1259                         if (x->xflags & XFRM_TIME_DEFER)
1260                                 event = XFRM_REPLAY_TIMEOUT;
1261                         else
1262                                 return;
1263                 }
1264
1265                 break;
1266
1267         case XFRM_REPLAY_TIMEOUT:
1268                 if ((x->replay.seq == x->preplay.seq) &&
1269                     (x->replay.bitmap == x->preplay.bitmap) &&
1270                     (x->replay.oseq == x->preplay.oseq)) {
1271                         x->xflags |= XFRM_TIME_DEFER;
1272                         return;
1273                 }
1274
1275                 break;
1276         }
1277
1278         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1279         c.event = XFRM_MSG_NEWAE;
1280         c.data.aevent = event;
1281         km_state_notify(x, &c);
1282
1283         if (x->replay_maxage &&
1284             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1285                 xfrm_state_hold(x);
1286                 x->xflags &= ~XFRM_TIME_DEFER;
1287         }
1288 }
1289 EXPORT_SYMBOL(xfrm_replay_notify);
1290
1291 static void xfrm_replay_timer_handler(unsigned long data)
1292 {
1293         struct xfrm_state *x = (struct xfrm_state*)data;
1294
1295         spin_lock(&x->lock);
1296
1297         if (x->km.state == XFRM_STATE_VALID) {
1298                 if (xfrm_aevent_is_on())
1299                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1300                 else
1301                         x->xflags |= XFRM_TIME_DEFER;
1302         }
1303
1304         spin_unlock(&x->lock);
1305         xfrm_state_put(x);
1306 }
1307
1308 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1309 {
1310         u32 diff;
1311
1312         seq = ntohl(seq);
1313
1314         if (unlikely(seq == 0))
1315                 return -EINVAL;
1316
1317         if (likely(seq > x->replay.seq))
1318                 return 0;
1319
1320         diff = x->replay.seq - seq;
1321         if (diff >= x->props.replay_window) {
1322                 x->stats.replay_window++;
1323                 return -EINVAL;
1324         }
1325
1326         if (x->replay.bitmap & (1U << diff)) {
1327                 x->stats.replay++;
1328                 return -EINVAL;
1329         }
1330         return 0;
1331 }
1332 EXPORT_SYMBOL(xfrm_replay_check);
1333
1334 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1335 {
1336         u32 diff;
1337
1338         seq = ntohl(seq);
1339
1340         if (seq > x->replay.seq) {
1341                 diff = seq - x->replay.seq;
1342                 if (diff < x->props.replay_window)
1343                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1344                 else
1345                         x->replay.bitmap = 1;
1346                 x->replay.seq = seq;
1347         } else {
1348                 diff = x->replay.seq - seq;
1349                 x->replay.bitmap |= (1U << diff);
1350         }
1351
1352         if (xfrm_aevent_is_on())
1353                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1354 }
1355 EXPORT_SYMBOL(xfrm_replay_advance);
1356
1357 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1358 static DEFINE_RWLOCK(xfrm_km_lock);
1359
1360 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1361 {
1362         struct xfrm_mgr *km;
1363
1364         read_lock(&xfrm_km_lock);
1365         list_for_each_entry(km, &xfrm_km_list, list)
1366                 if (km->notify_policy)
1367                         km->notify_policy(xp, dir, c);
1368         read_unlock(&xfrm_km_lock);
1369 }
1370
1371 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1372 {
1373         struct xfrm_mgr *km;
1374         read_lock(&xfrm_km_lock);
1375         list_for_each_entry(km, &xfrm_km_list, list)
1376                 if (km->notify)
1377                         km->notify(x, c);
1378         read_unlock(&xfrm_km_lock);
1379 }
1380
1381 EXPORT_SYMBOL(km_policy_notify);
1382 EXPORT_SYMBOL(km_state_notify);
1383
1384 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1385 {
1386         struct km_event c;
1387
1388         c.data.hard = hard;
1389         c.pid = pid;
1390         c.event = XFRM_MSG_EXPIRE;
1391         km_state_notify(x, &c);
1392
1393         if (hard)
1394                 wake_up(&km_waitq);
1395 }
1396
1397 EXPORT_SYMBOL(km_state_expired);
1398 /*
1399  * We send to all registered managers regardless of failure
1400  * We are happy with one success
1401 */
1402 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1403 {
1404         int err = -EINVAL, acqret;
1405         struct xfrm_mgr *km;
1406
1407         read_lock(&xfrm_km_lock);
1408         list_for_each_entry(km, &xfrm_km_list, list) {
1409                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1410                 if (!acqret)
1411                         err = acqret;
1412         }
1413         read_unlock(&xfrm_km_lock);
1414         return err;
1415 }
1416 EXPORT_SYMBOL(km_query);
1417
1418 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1419 {
1420         int err = -EINVAL;
1421         struct xfrm_mgr *km;
1422
1423         read_lock(&xfrm_km_lock);
1424         list_for_each_entry(km, &xfrm_km_list, list) {
1425                 if (km->new_mapping)
1426                         err = km->new_mapping(x, ipaddr, sport);
1427                 if (!err)
1428                         break;
1429         }
1430         read_unlock(&xfrm_km_lock);
1431         return err;
1432 }
1433 EXPORT_SYMBOL(km_new_mapping);
1434
1435 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1436 {
1437         struct km_event c;
1438
1439         c.data.hard = hard;
1440         c.pid = pid;
1441         c.event = XFRM_MSG_POLEXPIRE;
1442         km_policy_notify(pol, dir, &c);
1443
1444         if (hard)
1445                 wake_up(&km_waitq);
1446 }
1447 EXPORT_SYMBOL(km_policy_expired);
1448
1449 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1450 {
1451         int err = -EINVAL;
1452         int ret;
1453         struct xfrm_mgr *km;
1454
1455         read_lock(&xfrm_km_lock);
1456         list_for_each_entry(km, &xfrm_km_list, list) {
1457                 if (km->report) {
1458                         ret = km->report(proto, sel, addr);
1459                         if (!ret)
1460                                 err = ret;
1461                 }
1462         }
1463         read_unlock(&xfrm_km_lock);
1464         return err;
1465 }
1466 EXPORT_SYMBOL(km_report);
1467
1468 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1469 {
1470         int err;
1471         u8 *data;
1472         struct xfrm_mgr *km;
1473         struct xfrm_policy *pol = NULL;
1474
1475         if (optlen <= 0 || optlen > PAGE_SIZE)
1476                 return -EMSGSIZE;
1477
1478         data = kmalloc(optlen, GFP_KERNEL);
1479         if (!data)
1480                 return -ENOMEM;
1481
1482         err = -EFAULT;
1483         if (copy_from_user(data, optval, optlen))
1484                 goto out;
1485
1486         err = -EINVAL;
1487         read_lock(&xfrm_km_lock);
1488         list_for_each_entry(km, &xfrm_km_list, list) {
1489                 pol = km->compile_policy(sk, optname, data,
1490                                          optlen, &err);
1491                 if (err >= 0)
1492                         break;
1493         }
1494         read_unlock(&xfrm_km_lock);
1495
1496         if (err >= 0) {
1497                 xfrm_sk_policy_insert(sk, err, pol);
1498                 xfrm_pol_put(pol);
1499                 err = 0;
1500         }
1501
1502 out:
1503         kfree(data);
1504         return err;
1505 }
1506 EXPORT_SYMBOL(xfrm_user_policy);
1507
1508 int xfrm_register_km(struct xfrm_mgr *km)
1509 {
1510         write_lock_bh(&xfrm_km_lock);
1511         list_add_tail(&km->list, &xfrm_km_list);
1512         write_unlock_bh(&xfrm_km_lock);
1513         return 0;
1514 }
1515 EXPORT_SYMBOL(xfrm_register_km);
1516
1517 int xfrm_unregister_km(struct xfrm_mgr *km)
1518 {
1519         write_lock_bh(&xfrm_km_lock);
1520         list_del(&km->list);
1521         write_unlock_bh(&xfrm_km_lock);
1522         return 0;
1523 }
1524 EXPORT_SYMBOL(xfrm_unregister_km);
1525
1526 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1527 {
1528         int err = 0;
1529         if (unlikely(afinfo == NULL))
1530                 return -EINVAL;
1531         if (unlikely(afinfo->family >= NPROTO))
1532                 return -EAFNOSUPPORT;
1533         write_lock_bh(&xfrm_state_afinfo_lock);
1534         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1535                 err = -ENOBUFS;
1536         else
1537                 xfrm_state_afinfo[afinfo->family] = afinfo;
1538         write_unlock_bh(&xfrm_state_afinfo_lock);
1539         return err;
1540 }
1541 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1542
1543 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1544 {
1545         int err = 0;
1546         if (unlikely(afinfo == NULL))
1547                 return -EINVAL;
1548         if (unlikely(afinfo->family >= NPROTO))
1549                 return -EAFNOSUPPORT;
1550         write_lock_bh(&xfrm_state_afinfo_lock);
1551         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1552                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1553                         err = -EINVAL;
1554                 else
1555                         xfrm_state_afinfo[afinfo->family] = NULL;
1556         }
1557         write_unlock_bh(&xfrm_state_afinfo_lock);
1558         return err;
1559 }
1560 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1561
1562 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1563 {
1564         struct xfrm_state_afinfo *afinfo;
1565         if (unlikely(family >= NPROTO))
1566                 return NULL;
1567         read_lock(&xfrm_state_afinfo_lock);
1568         afinfo = xfrm_state_afinfo[family];
1569         if (unlikely(!afinfo))
1570                 read_unlock(&xfrm_state_afinfo_lock);
1571         return afinfo;
1572 }
1573
1574 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1575 {
1576         read_unlock(&xfrm_state_afinfo_lock);
1577 }
1578
1579 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1580 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1581 {
1582         if (x->tunnel) {
1583                 struct xfrm_state *t = x->tunnel;
1584
1585                 if (atomic_read(&t->tunnel_users) == 2)
1586                         xfrm_state_delete(t);
1587                 atomic_dec(&t->tunnel_users);
1588                 xfrm_state_put(t);
1589                 x->tunnel = NULL;
1590         }
1591 }
1592 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1593
1594 /*
1595  * This function is NOT optimal.  For example, with ESP it will give an
1596  * MTU that's usually two bytes short of being optimal.  However, it will
1597  * usually give an answer that's a multiple of 4 provided the input is
1598  * also a multiple of 4.
1599  */
1600 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1601 {
1602         int res = mtu;
1603
1604         res -= x->props.header_len;
1605
1606         for (;;) {
1607                 int m = res;
1608
1609                 if (m < 68)
1610                         return 68;
1611
1612                 spin_lock_bh(&x->lock);
1613                 if (x->km.state == XFRM_STATE_VALID &&
1614                     x->type && x->type->get_max_size)
1615                         m = x->type->get_max_size(x, m);
1616                 else
1617                         m += x->props.header_len;
1618                 spin_unlock_bh(&x->lock);
1619
1620                 if (m <= mtu)
1621                         break;
1622                 res -= (m - mtu);
1623         }
1624
1625         return res;
1626 }
1627
1628 int xfrm_init_state(struct xfrm_state *x)
1629 {
1630         struct xfrm_state_afinfo *afinfo;
1631         int family = x->props.family;
1632         int err;
1633
1634         err = -EAFNOSUPPORT;
1635         afinfo = xfrm_state_get_afinfo(family);
1636         if (!afinfo)
1637                 goto error;
1638
1639         err = 0;
1640         if (afinfo->init_flags)
1641                 err = afinfo->init_flags(x);
1642
1643         xfrm_state_put_afinfo(afinfo);
1644
1645         if (err)
1646                 goto error;
1647
1648         err = -EPROTONOSUPPORT;
1649         x->type = xfrm_get_type(x->id.proto, family);
1650         if (x->type == NULL)
1651                 goto error;
1652
1653         err = x->type->init_state(x);
1654         if (err)
1655                 goto error;
1656
1657         x->mode = xfrm_get_mode(x->props.mode, family);
1658         if (x->mode == NULL)
1659                 goto error;
1660
1661         x->km.state = XFRM_STATE_VALID;
1662
1663 error:
1664         return err;
1665 }
1666
1667 EXPORT_SYMBOL(xfrm_init_state);
1668  
1669 void __init xfrm_state_init(void)
1670 {
1671         unsigned int sz;
1672
1673         sz = sizeof(struct hlist_head) * 8;
1674
1675         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1676         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1677         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1678         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1679                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1680         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1681
1682         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1683 }
1684