6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
27 EXPORT_SYMBOL(xfrm_nl);
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35 /* Each xfrm_state may be linked to two tables:
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39 destination/tunnel endpoint. (output)
42 static DEFINE_SPINLOCK(xfrm_state_lock);
44 /* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
60 return ntohl(addr->a4);
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
65 return ntohl(addr->a6[2]^addr->a6[3]);
68 static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
70 return ntohl(daddr->a4 ^ saddr->a4);
73 static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
75 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
76 saddr->a6[2] ^ saddr->a6[3]);
79 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr,
80 xfrm_address_t *saddr,
81 u32 reqid, unsigned short family,
84 unsigned int h = family ^ reqid;
87 h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
90 h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
93 return (h ^ (h >> 16)) & hmask;
96 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
97 xfrm_address_t *saddr,
99 unsigned short family)
101 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
104 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
107 unsigned int h = family;
110 h ^= __xfrm4_addr_hash(addr);
113 h ^= __xfrm6_addr_hash(addr);
116 return (h ^ (h >> 16)) & hmask;
119 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
121 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
124 static inline unsigned int
125 __xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto,
126 unsigned short family, unsigned int hmask)
128 unsigned int h = spi ^ proto;
131 h ^= __xfrm4_addr_hash(daddr);
134 h ^= __xfrm6_addr_hash(daddr);
137 return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
140 static inline unsigned int
141 xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
143 return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
146 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
148 struct hlist_head *n;
151 n = kmalloc(sz, GFP_KERNEL);
153 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
155 n = (struct hlist_head *)
156 __get_free_pages(GFP_KERNEL, get_order(sz));
164 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
171 free_pages((unsigned long)n, get_order(sz));
174 static void xfrm_hash_transfer(struct hlist_head *list,
175 struct hlist_head *ndsttable,
176 struct hlist_head *nsrctable,
177 struct hlist_head *nspitable,
178 unsigned int nhashmask)
180 struct hlist_node *entry, *tmp;
181 struct xfrm_state *x;
183 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
186 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
187 x->props.reqid, x->props.family,
189 hlist_add_head(&x->bydst, ndsttable+h);
191 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
193 hlist_add_head(&x->bysrc, nsrctable+h);
195 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
196 x->props.family, nhashmask);
197 hlist_add_head(&x->byspi, nspitable+h);
201 static unsigned long xfrm_hash_new_size(void)
203 return ((xfrm_state_hmask + 1) << 1) *
204 sizeof(struct hlist_head);
207 static DEFINE_MUTEX(hash_resize_mutex);
209 static void xfrm_hash_resize(void *__unused)
211 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
212 unsigned long nsize, osize;
213 unsigned int nhashmask, ohashmask;
216 mutex_lock(&hash_resize_mutex);
218 nsize = xfrm_hash_new_size();
219 ndst = xfrm_state_hash_alloc(nsize);
222 nsrc = xfrm_state_hash_alloc(nsize);
224 xfrm_state_hash_free(ndst, nsize);
227 nspi = xfrm_state_hash_alloc(nsize);
229 xfrm_state_hash_free(ndst, nsize);
230 xfrm_state_hash_free(nsrc, nsize);
234 spin_lock_bh(&xfrm_state_lock);
236 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
237 for (i = xfrm_state_hmask; i >= 0; i--)
238 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
241 odst = xfrm_state_bydst;
242 osrc = xfrm_state_bysrc;
243 ospi = xfrm_state_byspi;
244 ohashmask = xfrm_state_hmask;
246 xfrm_state_bydst = ndst;
247 xfrm_state_bysrc = nsrc;
248 xfrm_state_byspi = nspi;
249 xfrm_state_hmask = nhashmask;
251 spin_unlock_bh(&xfrm_state_lock);
253 osize = (ohashmask + 1) * sizeof(struct hlist_head);
254 xfrm_state_hash_free(odst, osize);
255 xfrm_state_hash_free(osrc, osize);
256 xfrm_state_hash_free(ospi, osize);
259 mutex_unlock(&hash_resize_mutex);
262 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
264 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
265 EXPORT_SYMBOL(km_waitq);
267 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
268 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
270 static struct work_struct xfrm_state_gc_work;
271 static HLIST_HEAD(xfrm_state_gc_list);
272 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
274 int __xfrm_state_delete(struct xfrm_state *x);
276 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
277 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
279 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
280 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
282 static void xfrm_state_gc_destroy(struct xfrm_state *x)
284 del_timer_sync(&x->timer);
285 del_timer_sync(&x->rtimer);
292 xfrm_put_mode(x->mode);
294 x->type->destructor(x);
295 xfrm_put_type(x->type);
297 security_xfrm_state_free(x);
301 static void xfrm_state_gc_task(void *data)
303 struct xfrm_state *x;
304 struct hlist_node *entry, *tmp;
305 struct hlist_head gc_list;
307 spin_lock_bh(&xfrm_state_gc_lock);
308 gc_list.first = xfrm_state_gc_list.first;
309 INIT_HLIST_HEAD(&xfrm_state_gc_list);
310 spin_unlock_bh(&xfrm_state_gc_lock);
312 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
313 xfrm_state_gc_destroy(x);
318 static inline unsigned long make_jiffies(long secs)
320 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
321 return MAX_SCHEDULE_TIMEOUT-1;
326 static void xfrm_timer_handler(unsigned long data)
328 struct xfrm_state *x = (struct xfrm_state*)data;
329 unsigned long now = (unsigned long)xtime.tv_sec;
330 long next = LONG_MAX;
334 if (x->km.state == XFRM_STATE_DEAD)
336 if (x->km.state == XFRM_STATE_EXPIRED)
338 if (x->lft.hard_add_expires_seconds) {
339 long tmo = x->lft.hard_add_expires_seconds +
340 x->curlft.add_time - now;
346 if (x->lft.hard_use_expires_seconds) {
347 long tmo = x->lft.hard_use_expires_seconds +
348 (x->curlft.use_time ? : now) - now;
356 if (x->lft.soft_add_expires_seconds) {
357 long tmo = x->lft.soft_add_expires_seconds +
358 x->curlft.add_time - now;
364 if (x->lft.soft_use_expires_seconds) {
365 long tmo = x->lft.soft_use_expires_seconds +
366 (x->curlft.use_time ? : now) - now;
375 km_state_expired(x, 0, 0);
377 if (next != LONG_MAX)
378 mod_timer(&x->timer, jiffies + make_jiffies(next));
383 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
384 x->km.state = XFRM_STATE_EXPIRED;
389 if (!__xfrm_state_delete(x) && x->id.spi)
390 km_state_expired(x, 1, 0);
393 spin_unlock(&x->lock);
396 static void xfrm_replay_timer_handler(unsigned long data);
398 struct xfrm_state *xfrm_state_alloc(void)
400 struct xfrm_state *x;
402 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
405 atomic_set(&x->refcnt, 1);
406 atomic_set(&x->tunnel_users, 0);
407 INIT_HLIST_NODE(&x->bydst);
408 INIT_HLIST_NODE(&x->bysrc);
409 INIT_HLIST_NODE(&x->byspi);
410 init_timer(&x->timer);
411 x->timer.function = xfrm_timer_handler;
412 x->timer.data = (unsigned long)x;
413 init_timer(&x->rtimer);
414 x->rtimer.function = xfrm_replay_timer_handler;
415 x->rtimer.data = (unsigned long)x;
416 x->curlft.add_time = (unsigned long)xtime.tv_sec;
417 x->lft.soft_byte_limit = XFRM_INF;
418 x->lft.soft_packet_limit = XFRM_INF;
419 x->lft.hard_byte_limit = XFRM_INF;
420 x->lft.hard_packet_limit = XFRM_INF;
421 x->replay_maxage = 0;
422 x->replay_maxdiff = 0;
423 spin_lock_init(&x->lock);
427 EXPORT_SYMBOL(xfrm_state_alloc);
429 void __xfrm_state_destroy(struct xfrm_state *x)
431 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
433 spin_lock_bh(&xfrm_state_gc_lock);
434 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
435 spin_unlock_bh(&xfrm_state_gc_lock);
436 schedule_work(&xfrm_state_gc_work);
438 EXPORT_SYMBOL(__xfrm_state_destroy);
440 int __xfrm_state_delete(struct xfrm_state *x)
444 if (x->km.state != XFRM_STATE_DEAD) {
445 x->km.state = XFRM_STATE_DEAD;
446 spin_lock(&xfrm_state_lock);
447 hlist_del(&x->bydst);
448 hlist_del(&x->bysrc);
450 hlist_del(&x->byspi);
452 spin_unlock(&xfrm_state_lock);
454 /* All xfrm_state objects are created by xfrm_state_alloc.
455 * The xfrm_state_alloc call gives a reference, and that
456 * is what we are dropping here.
464 EXPORT_SYMBOL(__xfrm_state_delete);
466 int xfrm_state_delete(struct xfrm_state *x)
470 spin_lock_bh(&x->lock);
471 err = __xfrm_state_delete(x);
472 spin_unlock_bh(&x->lock);
476 EXPORT_SYMBOL(xfrm_state_delete);
478 void xfrm_state_flush(u8 proto)
482 spin_lock_bh(&xfrm_state_lock);
483 for (i = 0; i < xfrm_state_hmask; i++) {
484 struct hlist_node *entry;
485 struct xfrm_state *x;
487 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
488 if (!xfrm_state_kern(x) &&
489 xfrm_id_proto_match(x->id.proto, proto)) {
491 spin_unlock_bh(&xfrm_state_lock);
493 xfrm_state_delete(x);
496 spin_lock_bh(&xfrm_state_lock);
501 spin_unlock_bh(&xfrm_state_lock);
504 EXPORT_SYMBOL(xfrm_state_flush);
507 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
508 struct xfrm_tmpl *tmpl,
509 xfrm_address_t *daddr, xfrm_address_t *saddr,
510 unsigned short family)
512 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
515 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
516 xfrm_state_put_afinfo(afinfo);
520 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
522 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
523 struct xfrm_state *x;
524 struct hlist_node *entry;
526 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
527 if (x->props.family != family ||
529 x->id.proto != proto)
534 if (x->id.daddr.a4 != daddr->a4)
538 if (!ipv6_addr_equal((struct in6_addr *)daddr,
552 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
554 unsigned int h = xfrm_src_hash(saddr, family);
555 struct xfrm_state *x;
556 struct hlist_node *entry;
558 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
559 if (x->props.family != family ||
560 x->id.proto != proto)
565 if (x->id.daddr.a4 != daddr->a4 ||
566 x->props.saddr.a4 != saddr->a4)
570 if (!ipv6_addr_equal((struct in6_addr *)daddr,
573 !ipv6_addr_equal((struct in6_addr *)saddr,
587 static inline struct xfrm_state *
588 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
591 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
592 x->id.proto, family);
594 return __xfrm_state_lookup_byaddr(&x->id.daddr,
596 x->id.proto, family);
600 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
601 struct flowi *fl, struct xfrm_tmpl *tmpl,
602 struct xfrm_policy *pol, int *err,
603 unsigned short family)
605 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
606 struct hlist_node *entry;
607 struct xfrm_state *x, *x0;
608 int acquire_in_progress = 0;
610 struct xfrm_state *best = NULL;
612 spin_lock_bh(&xfrm_state_lock);
613 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
614 if (x->props.family == family &&
615 x->props.reqid == tmpl->reqid &&
616 !(x->props.flags & XFRM_STATE_WILDRECV) &&
617 xfrm_state_addr_check(x, daddr, saddr, family) &&
618 tmpl->mode == x->props.mode &&
619 tmpl->id.proto == x->id.proto &&
620 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
622 1. There is a valid state with matching selector.
624 2. Valid state with inappropriate selector. Skip.
626 Entering area of "sysdeps".
628 3. If state is not valid, selector is temporary,
629 it selects only session which triggered
630 previous resolution. Key manager will do
631 something to install a state with proper
634 if (x->km.state == XFRM_STATE_VALID) {
635 if (!xfrm_selector_match(&x->sel, fl, family) ||
636 !security_xfrm_state_pol_flow_match(x, pol, fl))
639 best->km.dying > x->km.dying ||
640 (best->km.dying == x->km.dying &&
641 best->curlft.add_time < x->curlft.add_time))
643 } else if (x->km.state == XFRM_STATE_ACQ) {
644 acquire_in_progress = 1;
645 } else if (x->km.state == XFRM_STATE_ERROR ||
646 x->km.state == XFRM_STATE_EXPIRED) {
647 if (xfrm_selector_match(&x->sel, fl, family) &&
648 security_xfrm_state_pol_flow_match(x, pol, fl))
655 if (!x && !error && !acquire_in_progress) {
657 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
658 tmpl->id.proto, family)) != NULL) {
663 x = xfrm_state_alloc();
668 /* Initialize temporary selector matching only
669 * to current session. */
670 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
672 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
674 x->km.state = XFRM_STATE_DEAD;
680 if (km_query(x, tmpl, pol) == 0) {
681 x->km.state = XFRM_STATE_ACQ;
682 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
683 h = xfrm_src_hash(saddr, family);
684 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
686 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
687 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
689 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
690 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
691 add_timer(&x->timer);
693 x->km.state = XFRM_STATE_DEAD;
703 *err = acquire_in_progress ? -EAGAIN : error;
704 spin_unlock_bh(&xfrm_state_lock);
708 static void __xfrm_state_insert(struct xfrm_state *x)
712 x->genid = ++xfrm_state_genid;
714 h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
715 x->props.reqid, x->props.family);
716 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
718 h = xfrm_src_hash(&x->props.saddr, x->props.family);
719 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
721 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
722 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
725 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
728 mod_timer(&x->timer, jiffies + HZ);
729 if (x->replay_maxage)
730 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
736 if (x->bydst.next != NULL &&
737 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
738 xfrm_state_num > xfrm_state_hmask)
739 schedule_work(&xfrm_hash_work);
742 /* xfrm_state_lock is held */
743 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
745 unsigned short family = xnew->props.family;
746 u32 reqid = xnew->props.reqid;
747 struct xfrm_state *x;
748 struct hlist_node *entry;
751 h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
752 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
753 if (x->props.family == family &&
754 x->props.reqid == reqid &&
755 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
756 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
757 x->genid = xfrm_state_genid;
761 void xfrm_state_insert(struct xfrm_state *x)
763 spin_lock_bh(&xfrm_state_lock);
764 __xfrm_state_bump_genids(x);
765 __xfrm_state_insert(x);
766 spin_unlock_bh(&xfrm_state_lock);
768 EXPORT_SYMBOL(xfrm_state_insert);
770 /* xfrm_state_lock is held */
771 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
773 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
774 struct hlist_node *entry;
775 struct xfrm_state *x;
777 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
778 if (x->props.reqid != reqid ||
779 x->props.mode != mode ||
780 x->props.family != family ||
781 x->km.state != XFRM_STATE_ACQ ||
787 if (x->id.daddr.a4 != daddr->a4 ||
788 x->props.saddr.a4 != saddr->a4)
792 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
793 (struct in6_addr *)daddr) ||
794 !ipv6_addr_equal((struct in6_addr *)
796 (struct in6_addr *)saddr))
808 x = xfrm_state_alloc();
812 x->sel.daddr.a4 = daddr->a4;
813 x->sel.saddr.a4 = saddr->a4;
814 x->sel.prefixlen_d = 32;
815 x->sel.prefixlen_s = 32;
816 x->props.saddr.a4 = saddr->a4;
817 x->id.daddr.a4 = daddr->a4;
821 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
822 (struct in6_addr *)daddr);
823 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
824 (struct in6_addr *)saddr);
825 x->sel.prefixlen_d = 128;
826 x->sel.prefixlen_s = 128;
827 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
828 (struct in6_addr *)saddr);
829 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
830 (struct in6_addr *)daddr);
834 x->km.state = XFRM_STATE_ACQ;
836 x->props.family = family;
837 x->props.mode = mode;
838 x->props.reqid = reqid;
839 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
841 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
842 add_timer(&x->timer);
843 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
844 h = xfrm_src_hash(saddr, family);
845 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
852 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
854 int xfrm_state_add(struct xfrm_state *x)
856 struct xfrm_state *x1;
859 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
861 family = x->props.family;
863 spin_lock_bh(&xfrm_state_lock);
865 x1 = __xfrm_state_locate(x, use_spi, family);
873 if (use_spi && x->km.seq) {
874 x1 = __xfrm_find_acq_byseq(x->km.seq);
875 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
882 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
884 &x->id.daddr, &x->props.saddr, 0);
886 __xfrm_state_bump_genids(x);
887 __xfrm_state_insert(x);
891 spin_unlock_bh(&xfrm_state_lock);
894 xfrm_state_delete(x1);
900 EXPORT_SYMBOL(xfrm_state_add);
902 int xfrm_state_update(struct xfrm_state *x)
904 struct xfrm_state *x1;
906 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
908 spin_lock_bh(&xfrm_state_lock);
909 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
915 if (xfrm_state_kern(x1)) {
921 if (x1->km.state == XFRM_STATE_ACQ) {
922 __xfrm_state_insert(x);
928 spin_unlock_bh(&xfrm_state_lock);
934 xfrm_state_delete(x1);
940 spin_lock_bh(&x1->lock);
941 if (likely(x1->km.state == XFRM_STATE_VALID)) {
942 if (x->encap && x1->encap)
943 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
944 if (x->coaddr && x1->coaddr) {
945 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
947 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
948 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
949 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
952 mod_timer(&x1->timer, jiffies + HZ);
953 if (x1->curlft.use_time)
954 xfrm_state_check_expire(x1);
958 spin_unlock_bh(&x1->lock);
964 EXPORT_SYMBOL(xfrm_state_update);
966 int xfrm_state_check_expire(struct xfrm_state *x)
968 if (!x->curlft.use_time)
969 x->curlft.use_time = (unsigned long)xtime.tv_sec;
971 if (x->km.state != XFRM_STATE_VALID)
974 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
975 x->curlft.packets >= x->lft.hard_packet_limit) {
976 x->km.state = XFRM_STATE_EXPIRED;
977 mod_timer(&x->timer, jiffies);
982 (x->curlft.bytes >= x->lft.soft_byte_limit ||
983 x->curlft.packets >= x->lft.soft_packet_limit)) {
985 km_state_expired(x, 0, 0);
989 EXPORT_SYMBOL(xfrm_state_check_expire);
991 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
993 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
997 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
999 /* Check tail too... */
1003 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1005 int err = xfrm_state_check_expire(x);
1008 err = xfrm_state_check_space(x, skb);
1012 EXPORT_SYMBOL(xfrm_state_check);
1015 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1016 unsigned short family)
1018 struct xfrm_state *x;
1020 spin_lock_bh(&xfrm_state_lock);
1021 x = __xfrm_state_lookup(daddr, spi, proto, family);
1022 spin_unlock_bh(&xfrm_state_lock);
1025 EXPORT_SYMBOL(xfrm_state_lookup);
1028 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1029 u8 proto, unsigned short family)
1031 struct xfrm_state *x;
1033 spin_lock_bh(&xfrm_state_lock);
1034 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1035 spin_unlock_bh(&xfrm_state_lock);
1038 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1041 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1042 xfrm_address_t *daddr, xfrm_address_t *saddr,
1043 int create, unsigned short family)
1045 struct xfrm_state *x;
1047 spin_lock_bh(&xfrm_state_lock);
1048 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1049 spin_unlock_bh(&xfrm_state_lock);
1053 EXPORT_SYMBOL(xfrm_find_acq);
1055 #ifdef CONFIG_XFRM_SUB_POLICY
1057 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1058 unsigned short family)
1061 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1063 return -EAFNOSUPPORT;
1065 spin_lock_bh(&xfrm_state_lock);
1066 if (afinfo->tmpl_sort)
1067 err = afinfo->tmpl_sort(dst, src, n);
1068 spin_unlock_bh(&xfrm_state_lock);
1069 xfrm_state_put_afinfo(afinfo);
1072 EXPORT_SYMBOL(xfrm_tmpl_sort);
1075 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1076 unsigned short family)
1079 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1081 return -EAFNOSUPPORT;
1083 spin_lock_bh(&xfrm_state_lock);
1084 if (afinfo->state_sort)
1085 err = afinfo->state_sort(dst, src, n);
1086 spin_unlock_bh(&xfrm_state_lock);
1087 xfrm_state_put_afinfo(afinfo);
1090 EXPORT_SYMBOL(xfrm_state_sort);
1093 /* Silly enough, but I'm lazy to build resolution list */
1095 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1099 for (i = 0; i <= xfrm_state_hmask; i++) {
1100 struct hlist_node *entry;
1101 struct xfrm_state *x;
1103 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1104 if (x->km.seq == seq &&
1105 x->km.state == XFRM_STATE_ACQ) {
1114 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1116 struct xfrm_state *x;
1118 spin_lock_bh(&xfrm_state_lock);
1119 x = __xfrm_find_acq_byseq(seq);
1120 spin_unlock_bh(&xfrm_state_lock);
1123 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1125 u32 xfrm_get_acqseq(void)
1129 static DEFINE_SPINLOCK(acqseq_lock);
1131 spin_lock_bh(&acqseq_lock);
1132 res = (++acqseq ? : ++acqseq);
1133 spin_unlock_bh(&acqseq_lock);
1136 EXPORT_SYMBOL(xfrm_get_acqseq);
1139 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1142 struct xfrm_state *x0;
1147 if (minspi == maxspi) {
1148 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1156 minspi = ntohl(minspi);
1157 maxspi = ntohl(maxspi);
1158 for (h=0; h<maxspi-minspi+1; h++) {
1159 spi = minspi + net_random()%(maxspi-minspi+1);
1160 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1162 x->id.spi = htonl(spi);
1169 spin_lock_bh(&xfrm_state_lock);
1170 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1171 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1172 spin_unlock_bh(&xfrm_state_lock);
1176 EXPORT_SYMBOL(xfrm_alloc_spi);
1178 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1182 struct xfrm_state *x;
1183 struct hlist_node *entry;
1187 spin_lock_bh(&xfrm_state_lock);
1188 for (i = 0; i <= xfrm_state_hmask; i++) {
1189 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1190 if (xfrm_id_proto_match(x->id.proto, proto))
1199 for (i = 0; i <= xfrm_state_hmask; i++) {
1200 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1201 if (!xfrm_id_proto_match(x->id.proto, proto))
1203 err = func(x, --count, data);
1209 spin_unlock_bh(&xfrm_state_lock);
1212 EXPORT_SYMBOL(xfrm_state_walk);
1215 void xfrm_replay_notify(struct xfrm_state *x, int event)
1218 /* we send notify messages in case
1219 * 1. we updated on of the sequence numbers, and the seqno difference
1220 * is at least x->replay_maxdiff, in this case we also update the
1221 * timeout of our timer function
1222 * 2. if x->replay_maxage has elapsed since last update,
1223 * and there were changes
1225 * The state structure must be locked!
1229 case XFRM_REPLAY_UPDATE:
1230 if (x->replay_maxdiff &&
1231 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1232 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1233 if (x->xflags & XFRM_TIME_DEFER)
1234 event = XFRM_REPLAY_TIMEOUT;
1241 case XFRM_REPLAY_TIMEOUT:
1242 if ((x->replay.seq == x->preplay.seq) &&
1243 (x->replay.bitmap == x->preplay.bitmap) &&
1244 (x->replay.oseq == x->preplay.oseq)) {
1245 x->xflags |= XFRM_TIME_DEFER;
1252 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1253 c.event = XFRM_MSG_NEWAE;
1254 c.data.aevent = event;
1255 km_state_notify(x, &c);
1257 if (x->replay_maxage &&
1258 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1259 x->xflags &= ~XFRM_TIME_DEFER;
1261 EXPORT_SYMBOL(xfrm_replay_notify);
1263 static void xfrm_replay_timer_handler(unsigned long data)
1265 struct xfrm_state *x = (struct xfrm_state*)data;
1267 spin_lock(&x->lock);
1269 if (x->km.state == XFRM_STATE_VALID) {
1270 if (xfrm_aevent_is_on())
1271 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1273 x->xflags |= XFRM_TIME_DEFER;
1276 spin_unlock(&x->lock);
1279 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1285 if (unlikely(seq == 0))
1288 if (likely(seq > x->replay.seq))
1291 diff = x->replay.seq - seq;
1292 if (diff >= x->props.replay_window) {
1293 x->stats.replay_window++;
1297 if (x->replay.bitmap & (1U << diff)) {
1303 EXPORT_SYMBOL(xfrm_replay_check);
1305 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1311 if (seq > x->replay.seq) {
1312 diff = seq - x->replay.seq;
1313 if (diff < x->props.replay_window)
1314 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1316 x->replay.bitmap = 1;
1317 x->replay.seq = seq;
1319 diff = x->replay.seq - seq;
1320 x->replay.bitmap |= (1U << diff);
1323 if (xfrm_aevent_is_on())
1324 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1326 EXPORT_SYMBOL(xfrm_replay_advance);
1328 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1329 static DEFINE_RWLOCK(xfrm_km_lock);
1331 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1333 struct xfrm_mgr *km;
1335 read_lock(&xfrm_km_lock);
1336 list_for_each_entry(km, &xfrm_km_list, list)
1337 if (km->notify_policy)
1338 km->notify_policy(xp, dir, c);
1339 read_unlock(&xfrm_km_lock);
1342 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1344 struct xfrm_mgr *km;
1345 read_lock(&xfrm_km_lock);
1346 list_for_each_entry(km, &xfrm_km_list, list)
1349 read_unlock(&xfrm_km_lock);
1352 EXPORT_SYMBOL(km_policy_notify);
1353 EXPORT_SYMBOL(km_state_notify);
1355 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1361 c.event = XFRM_MSG_EXPIRE;
1362 km_state_notify(x, &c);
1368 EXPORT_SYMBOL(km_state_expired);
1370 * We send to all registered managers regardless of failure
1371 * We are happy with one success
1373 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1375 int err = -EINVAL, acqret;
1376 struct xfrm_mgr *km;
1378 read_lock(&xfrm_km_lock);
1379 list_for_each_entry(km, &xfrm_km_list, list) {
1380 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1384 read_unlock(&xfrm_km_lock);
1387 EXPORT_SYMBOL(km_query);
1389 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1392 struct xfrm_mgr *km;
1394 read_lock(&xfrm_km_lock);
1395 list_for_each_entry(km, &xfrm_km_list, list) {
1396 if (km->new_mapping)
1397 err = km->new_mapping(x, ipaddr, sport);
1401 read_unlock(&xfrm_km_lock);
1404 EXPORT_SYMBOL(km_new_mapping);
1406 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1412 c.event = XFRM_MSG_POLEXPIRE;
1413 km_policy_notify(pol, dir, &c);
1418 EXPORT_SYMBOL(km_policy_expired);
1420 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1424 struct xfrm_mgr *km;
1426 read_lock(&xfrm_km_lock);
1427 list_for_each_entry(km, &xfrm_km_list, list) {
1429 ret = km->report(proto, sel, addr);
1434 read_unlock(&xfrm_km_lock);
1437 EXPORT_SYMBOL(km_report);
1439 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1443 struct xfrm_mgr *km;
1444 struct xfrm_policy *pol = NULL;
1446 if (optlen <= 0 || optlen > PAGE_SIZE)
1449 data = kmalloc(optlen, GFP_KERNEL);
1454 if (copy_from_user(data, optval, optlen))
1458 read_lock(&xfrm_km_lock);
1459 list_for_each_entry(km, &xfrm_km_list, list) {
1460 pol = km->compile_policy(sk, optname, data,
1465 read_unlock(&xfrm_km_lock);
1468 xfrm_sk_policy_insert(sk, err, pol);
1477 EXPORT_SYMBOL(xfrm_user_policy);
1479 int xfrm_register_km(struct xfrm_mgr *km)
1481 write_lock_bh(&xfrm_km_lock);
1482 list_add_tail(&km->list, &xfrm_km_list);
1483 write_unlock_bh(&xfrm_km_lock);
1486 EXPORT_SYMBOL(xfrm_register_km);
1488 int xfrm_unregister_km(struct xfrm_mgr *km)
1490 write_lock_bh(&xfrm_km_lock);
1491 list_del(&km->list);
1492 write_unlock_bh(&xfrm_km_lock);
1495 EXPORT_SYMBOL(xfrm_unregister_km);
1497 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1500 if (unlikely(afinfo == NULL))
1502 if (unlikely(afinfo->family >= NPROTO))
1503 return -EAFNOSUPPORT;
1504 write_lock_bh(&xfrm_state_afinfo_lock);
1505 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1508 xfrm_state_afinfo[afinfo->family] = afinfo;
1509 write_unlock_bh(&xfrm_state_afinfo_lock);
1512 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1514 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1517 if (unlikely(afinfo == NULL))
1519 if (unlikely(afinfo->family >= NPROTO))
1520 return -EAFNOSUPPORT;
1521 write_lock_bh(&xfrm_state_afinfo_lock);
1522 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1523 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1526 xfrm_state_afinfo[afinfo->family] = NULL;
1528 write_unlock_bh(&xfrm_state_afinfo_lock);
1531 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1533 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1535 struct xfrm_state_afinfo *afinfo;
1536 if (unlikely(family >= NPROTO))
1538 read_lock(&xfrm_state_afinfo_lock);
1539 afinfo = xfrm_state_afinfo[family];
1540 if (unlikely(!afinfo))
1541 read_unlock(&xfrm_state_afinfo_lock);
1545 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1547 read_unlock(&xfrm_state_afinfo_lock);
1550 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1551 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1554 struct xfrm_state *t = x->tunnel;
1556 if (atomic_read(&t->tunnel_users) == 2)
1557 xfrm_state_delete(t);
1558 atomic_dec(&t->tunnel_users);
1563 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1566 * This function is NOT optimal. For example, with ESP it will give an
1567 * MTU that's usually two bytes short of being optimal. However, it will
1568 * usually give an answer that's a multiple of 4 provided the input is
1569 * also a multiple of 4.
1571 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1575 res -= x->props.header_len;
1583 spin_lock_bh(&x->lock);
1584 if (x->km.state == XFRM_STATE_VALID &&
1585 x->type && x->type->get_max_size)
1586 m = x->type->get_max_size(x, m);
1588 m += x->props.header_len;
1589 spin_unlock_bh(&x->lock);
1599 int xfrm_init_state(struct xfrm_state *x)
1601 struct xfrm_state_afinfo *afinfo;
1602 int family = x->props.family;
1605 err = -EAFNOSUPPORT;
1606 afinfo = xfrm_state_get_afinfo(family);
1611 if (afinfo->init_flags)
1612 err = afinfo->init_flags(x);
1614 xfrm_state_put_afinfo(afinfo);
1619 err = -EPROTONOSUPPORT;
1620 x->type = xfrm_get_type(x->id.proto, family);
1621 if (x->type == NULL)
1624 err = x->type->init_state(x);
1628 x->mode = xfrm_get_mode(x->props.mode, family);
1629 if (x->mode == NULL)
1632 x->km.state = XFRM_STATE_VALID;
1638 EXPORT_SYMBOL(xfrm_init_state);
1640 void __init xfrm_state_init(void)
1644 sz = sizeof(struct hlist_head) * 8;
1646 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1647 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1648 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1649 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1650 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1651 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1653 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);