6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
27 EXPORT_SYMBOL(xfrm_nl);
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35 /* Each xfrm_state may be linked to two tables:
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39 destination/tunnel endpoint. (output)
42 static DEFINE_SPINLOCK(xfrm_state_lock);
44 /* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
60 return ntohl(addr->a4);
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
65 return ntohl(addr->a6[2]^addr->a6[3]);
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69 u32 reqid, unsigned short family,
72 unsigned int h = family ^ reqid;
75 h ^= __xfrm4_addr_hash(addr);
78 h ^= __xfrm6_addr_hash(addr);
81 return (h ^ (h >> 16)) & hmask;
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85 unsigned short family)
87 return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
93 unsigned int h = family;
96 h ^= __xfrm4_addr_hash(addr);
99 h ^= __xfrm6_addr_hash(addr);
102 return (h ^ (h >> 16)) & hmask;
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
107 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
110 static inline unsigned int
111 __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
114 unsigned int h = spi ^ proto;
117 h ^= __xfrm4_addr_hash(addr);
120 h ^= __xfrm6_addr_hash(addr);
123 return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
126 static inline unsigned int
127 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
129 return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
132 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
134 struct hlist_head *n;
137 n = kmalloc(sz, GFP_KERNEL);
139 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
141 n = (struct hlist_head *)
142 __get_free_pages(GFP_KERNEL, get_order(sz));
150 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
157 free_pages((unsigned long)n, get_order(sz));
160 static void xfrm_hash_transfer(struct hlist_head *list,
161 struct hlist_head *ndsttable,
162 struct hlist_head *nsrctable,
163 struct hlist_head *nspitable,
164 unsigned int nhashmask)
166 struct hlist_node *entry, *tmp;
167 struct xfrm_state *x;
169 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
172 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
173 x->props.family, nhashmask);
174 hlist_add_head(&x->bydst, ndsttable+h);
176 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
178 hlist_add_head(&x->bysrc, nsrctable+h);
180 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
181 x->props.family, nhashmask);
182 hlist_add_head(&x->byspi, nspitable+h);
186 static unsigned long xfrm_hash_new_size(void)
188 return ((xfrm_state_hmask + 1) << 1) *
189 sizeof(struct hlist_head);
192 static DEFINE_MUTEX(hash_resize_mutex);
194 static void xfrm_hash_resize(void *__unused)
196 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
197 unsigned long nsize, osize;
198 unsigned int nhashmask, ohashmask;
201 mutex_lock(&hash_resize_mutex);
203 nsize = xfrm_hash_new_size();
204 ndst = xfrm_state_hash_alloc(nsize);
207 nsrc = xfrm_state_hash_alloc(nsize);
209 xfrm_state_hash_free(ndst, nsize);
212 nspi = xfrm_state_hash_alloc(nsize);
214 xfrm_state_hash_free(ndst, nsize);
215 xfrm_state_hash_free(nsrc, nsize);
219 spin_lock_bh(&xfrm_state_lock);
221 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
222 for (i = xfrm_state_hmask; i >= 0; i--)
223 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
226 odst = xfrm_state_bydst;
227 osrc = xfrm_state_bysrc;
228 ospi = xfrm_state_byspi;
229 ohashmask = xfrm_state_hmask;
231 xfrm_state_bydst = ndst;
232 xfrm_state_bysrc = nsrc;
233 xfrm_state_byspi = nspi;
234 xfrm_state_hmask = nhashmask;
236 spin_unlock_bh(&xfrm_state_lock);
238 osize = (ohashmask + 1) * sizeof(struct hlist_head);
239 xfrm_state_hash_free(odst, osize);
240 xfrm_state_hash_free(osrc, osize);
241 xfrm_state_hash_free(ospi, osize);
244 mutex_unlock(&hash_resize_mutex);
247 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
249 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
250 EXPORT_SYMBOL(km_waitq);
252 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
253 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
255 static struct work_struct xfrm_state_gc_work;
256 static HLIST_HEAD(xfrm_state_gc_list);
257 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
259 static int xfrm_state_gc_flush_bundles;
261 int __xfrm_state_delete(struct xfrm_state *x);
263 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
264 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
266 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
267 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
269 static void xfrm_state_gc_destroy(struct xfrm_state *x)
271 if (del_timer(&x->timer))
273 if (del_timer(&x->rtimer))
281 xfrm_put_mode(x->mode);
283 x->type->destructor(x);
284 xfrm_put_type(x->type);
286 security_xfrm_state_free(x);
290 static void xfrm_state_gc_task(void *data)
292 struct xfrm_state *x;
293 struct hlist_node *entry, *tmp;
294 struct hlist_head gc_list;
296 if (xfrm_state_gc_flush_bundles) {
297 xfrm_state_gc_flush_bundles = 0;
298 xfrm_flush_bundles();
301 spin_lock_bh(&xfrm_state_gc_lock);
302 gc_list.first = xfrm_state_gc_list.first;
303 INIT_HLIST_HEAD(&xfrm_state_gc_list);
304 spin_unlock_bh(&xfrm_state_gc_lock);
306 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
307 xfrm_state_gc_destroy(x);
312 static inline unsigned long make_jiffies(long secs)
314 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
315 return MAX_SCHEDULE_TIMEOUT-1;
320 static void xfrm_timer_handler(unsigned long data)
322 struct xfrm_state *x = (struct xfrm_state*)data;
323 unsigned long now = (unsigned long)xtime.tv_sec;
324 long next = LONG_MAX;
328 if (x->km.state == XFRM_STATE_DEAD)
330 if (x->km.state == XFRM_STATE_EXPIRED)
332 if (x->lft.hard_add_expires_seconds) {
333 long tmo = x->lft.hard_add_expires_seconds +
334 x->curlft.add_time - now;
340 if (x->lft.hard_use_expires_seconds) {
341 long tmo = x->lft.hard_use_expires_seconds +
342 (x->curlft.use_time ? : now) - now;
350 if (x->lft.soft_add_expires_seconds) {
351 long tmo = x->lft.soft_add_expires_seconds +
352 x->curlft.add_time - now;
358 if (x->lft.soft_use_expires_seconds) {
359 long tmo = x->lft.soft_use_expires_seconds +
360 (x->curlft.use_time ? : now) - now;
369 km_state_expired(x, 0, 0);
371 if (next != LONG_MAX &&
372 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
377 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
378 x->km.state = XFRM_STATE_EXPIRED;
383 if (!__xfrm_state_delete(x) && x->id.spi)
384 km_state_expired(x, 1, 0);
387 spin_unlock(&x->lock);
391 static void xfrm_replay_timer_handler(unsigned long data);
393 struct xfrm_state *xfrm_state_alloc(void)
395 struct xfrm_state *x;
397 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
400 atomic_set(&x->refcnt, 1);
401 atomic_set(&x->tunnel_users, 0);
402 INIT_HLIST_NODE(&x->bydst);
403 INIT_HLIST_NODE(&x->bysrc);
404 INIT_HLIST_NODE(&x->byspi);
405 init_timer(&x->timer);
406 x->timer.function = xfrm_timer_handler;
407 x->timer.data = (unsigned long)x;
408 init_timer(&x->rtimer);
409 x->rtimer.function = xfrm_replay_timer_handler;
410 x->rtimer.data = (unsigned long)x;
411 x->curlft.add_time = (unsigned long)xtime.tv_sec;
412 x->lft.soft_byte_limit = XFRM_INF;
413 x->lft.soft_packet_limit = XFRM_INF;
414 x->lft.hard_byte_limit = XFRM_INF;
415 x->lft.hard_packet_limit = XFRM_INF;
416 x->replay_maxage = 0;
417 x->replay_maxdiff = 0;
418 spin_lock_init(&x->lock);
422 EXPORT_SYMBOL(xfrm_state_alloc);
424 void __xfrm_state_destroy(struct xfrm_state *x)
426 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
428 spin_lock_bh(&xfrm_state_gc_lock);
429 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
430 spin_unlock_bh(&xfrm_state_gc_lock);
431 schedule_work(&xfrm_state_gc_work);
433 EXPORT_SYMBOL(__xfrm_state_destroy);
435 int __xfrm_state_delete(struct xfrm_state *x)
439 if (x->km.state != XFRM_STATE_DEAD) {
440 x->km.state = XFRM_STATE_DEAD;
441 spin_lock(&xfrm_state_lock);
442 hlist_del(&x->bydst);
444 hlist_del(&x->bysrc);
447 hlist_del(&x->byspi);
451 spin_unlock(&xfrm_state_lock);
452 if (del_timer(&x->timer))
454 if (del_timer(&x->rtimer))
457 /* The number two in this test is the reference
458 * mentioned in the comment below plus the reference
459 * our caller holds. A larger value means that
460 * there are DSTs attached to this xfrm_state.
462 if (atomic_read(&x->refcnt) > 2) {
463 xfrm_state_gc_flush_bundles = 1;
464 schedule_work(&xfrm_state_gc_work);
467 /* All xfrm_state objects are created by xfrm_state_alloc.
468 * The xfrm_state_alloc call gives a reference, and that
469 * is what we are dropping here.
477 EXPORT_SYMBOL(__xfrm_state_delete);
479 int xfrm_state_delete(struct xfrm_state *x)
483 spin_lock_bh(&x->lock);
484 err = __xfrm_state_delete(x);
485 spin_unlock_bh(&x->lock);
489 EXPORT_SYMBOL(xfrm_state_delete);
491 void xfrm_state_flush(u8 proto)
495 spin_lock_bh(&xfrm_state_lock);
496 for (i = 0; i < xfrm_state_hmask; i++) {
497 struct hlist_node *entry;
498 struct xfrm_state *x;
500 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
501 if (!xfrm_state_kern(x) &&
502 xfrm_id_proto_match(x->id.proto, proto)) {
504 spin_unlock_bh(&xfrm_state_lock);
506 xfrm_state_delete(x);
509 spin_lock_bh(&xfrm_state_lock);
514 spin_unlock_bh(&xfrm_state_lock);
517 EXPORT_SYMBOL(xfrm_state_flush);
520 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
521 struct xfrm_tmpl *tmpl,
522 xfrm_address_t *daddr, xfrm_address_t *saddr,
523 unsigned short family)
525 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
528 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
529 xfrm_state_put_afinfo(afinfo);
533 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
535 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
536 struct xfrm_state *x;
537 struct hlist_node *entry;
539 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
540 if (x->props.family != family ||
542 x->id.proto != proto)
547 if (x->id.daddr.a4 != daddr->a4)
551 if (!ipv6_addr_equal((struct in6_addr *)daddr,
565 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
567 unsigned int h = xfrm_src_hash(saddr, family);
568 struct xfrm_state *x;
569 struct hlist_node *entry;
571 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
572 if (x->props.family != family ||
573 x->id.proto != proto)
578 if (x->id.daddr.a4 != daddr->a4 ||
579 x->props.saddr.a4 != saddr->a4)
583 if (!ipv6_addr_equal((struct in6_addr *)daddr,
586 !ipv6_addr_equal((struct in6_addr *)saddr,
600 static inline struct xfrm_state *
601 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
604 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
605 x->id.proto, family);
607 return __xfrm_state_lookup_byaddr(&x->id.daddr,
609 x->id.proto, family);
613 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
614 struct flowi *fl, struct xfrm_tmpl *tmpl,
615 struct xfrm_policy *pol, int *err,
616 unsigned short family)
618 unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
619 struct hlist_node *entry;
620 struct xfrm_state *x, *x0;
621 int acquire_in_progress = 0;
623 struct xfrm_state *best = NULL;
625 spin_lock_bh(&xfrm_state_lock);
626 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
627 if (x->props.family == family &&
628 x->props.reqid == tmpl->reqid &&
629 !(x->props.flags & XFRM_STATE_WILDRECV) &&
630 xfrm_state_addr_check(x, daddr, saddr, family) &&
631 tmpl->mode == x->props.mode &&
632 tmpl->id.proto == x->id.proto &&
633 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
635 1. There is a valid state with matching selector.
637 2. Valid state with inappropriate selector. Skip.
639 Entering area of "sysdeps".
641 3. If state is not valid, selector is temporary,
642 it selects only session which triggered
643 previous resolution. Key manager will do
644 something to install a state with proper
647 if (x->km.state == XFRM_STATE_VALID) {
648 if (!xfrm_selector_match(&x->sel, fl, family) ||
649 !security_xfrm_state_pol_flow_match(x, pol, fl))
652 best->km.dying > x->km.dying ||
653 (best->km.dying == x->km.dying &&
654 best->curlft.add_time < x->curlft.add_time))
656 } else if (x->km.state == XFRM_STATE_ACQ) {
657 acquire_in_progress = 1;
658 } else if (x->km.state == XFRM_STATE_ERROR ||
659 x->km.state == XFRM_STATE_EXPIRED) {
660 if (xfrm_selector_match(&x->sel, fl, family) &&
661 security_xfrm_state_pol_flow_match(x, pol, fl))
668 if (!x && !error && !acquire_in_progress) {
670 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
671 tmpl->id.proto, family)) != NULL) {
676 x = xfrm_state_alloc();
681 /* Initialize temporary selector matching only
682 * to current session. */
683 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
685 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
687 x->km.state = XFRM_STATE_DEAD;
693 if (km_query(x, tmpl, pol) == 0) {
694 x->km.state = XFRM_STATE_ACQ;
695 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
697 h = xfrm_src_hash(saddr, family);
698 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
701 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
702 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
705 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
707 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
708 add_timer(&x->timer);
710 x->km.state = XFRM_STATE_DEAD;
720 *err = acquire_in_progress ? -EAGAIN : error;
721 spin_unlock_bh(&xfrm_state_lock);
725 static void __xfrm_state_insert(struct xfrm_state *x)
729 x->genid = ++xfrm_state_genid;
731 h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
732 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
735 h = xfrm_src_hash(&x->props.saddr, x->props.family);
736 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
739 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
740 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
743 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
747 if (!mod_timer(&x->timer, jiffies + HZ))
750 if (x->replay_maxage &&
751 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
758 if (x->bydst.next != NULL &&
759 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
760 xfrm_state_num > xfrm_state_hmask)
761 schedule_work(&xfrm_hash_work);
764 /* xfrm_state_lock is held */
765 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
767 unsigned short family = xnew->props.family;
768 u32 reqid = xnew->props.reqid;
769 struct xfrm_state *x;
770 struct hlist_node *entry;
773 h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
774 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
775 if (x->props.family == family &&
776 x->props.reqid == reqid &&
777 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
778 x->genid = xfrm_state_genid;
782 void xfrm_state_insert(struct xfrm_state *x)
784 spin_lock_bh(&xfrm_state_lock);
785 __xfrm_state_bump_genids(x);
786 __xfrm_state_insert(x);
787 spin_unlock_bh(&xfrm_state_lock);
789 EXPORT_SYMBOL(xfrm_state_insert);
791 /* xfrm_state_lock is held */
792 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
794 unsigned int h = xfrm_dst_hash(daddr, reqid, family);
795 struct hlist_node *entry;
796 struct xfrm_state *x;
798 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
799 if (x->props.reqid != reqid ||
800 x->props.mode != mode ||
801 x->props.family != family ||
802 x->km.state != XFRM_STATE_ACQ ||
808 if (x->id.daddr.a4 != daddr->a4 ||
809 x->props.saddr.a4 != saddr->a4)
813 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
814 (struct in6_addr *)daddr) ||
815 !ipv6_addr_equal((struct in6_addr *)
817 (struct in6_addr *)saddr))
829 x = xfrm_state_alloc();
833 x->sel.daddr.a4 = daddr->a4;
834 x->sel.saddr.a4 = saddr->a4;
835 x->sel.prefixlen_d = 32;
836 x->sel.prefixlen_s = 32;
837 x->props.saddr.a4 = saddr->a4;
838 x->id.daddr.a4 = daddr->a4;
842 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
843 (struct in6_addr *)daddr);
844 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
845 (struct in6_addr *)saddr);
846 x->sel.prefixlen_d = 128;
847 x->sel.prefixlen_s = 128;
848 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
849 (struct in6_addr *)saddr);
850 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
851 (struct in6_addr *)daddr);
855 x->km.state = XFRM_STATE_ACQ;
857 x->props.family = family;
858 x->props.mode = mode;
859 x->props.reqid = reqid;
860 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
862 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
863 add_timer(&x->timer);
865 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
866 h = xfrm_src_hash(saddr, family);
868 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
875 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
877 int xfrm_state_add(struct xfrm_state *x)
879 struct xfrm_state *x1;
882 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
884 family = x->props.family;
886 spin_lock_bh(&xfrm_state_lock);
888 x1 = __xfrm_state_locate(x, use_spi, family);
896 if (use_spi && x->km.seq) {
897 x1 = __xfrm_find_acq_byseq(x->km.seq);
898 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
905 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
907 &x->id.daddr, &x->props.saddr, 0);
909 __xfrm_state_bump_genids(x);
910 __xfrm_state_insert(x);
914 spin_unlock_bh(&xfrm_state_lock);
917 xfrm_state_delete(x1);
923 EXPORT_SYMBOL(xfrm_state_add);
925 int xfrm_state_update(struct xfrm_state *x)
927 struct xfrm_state *x1;
929 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
931 spin_lock_bh(&xfrm_state_lock);
932 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
938 if (xfrm_state_kern(x1)) {
944 if (x1->km.state == XFRM_STATE_ACQ) {
945 __xfrm_state_insert(x);
951 spin_unlock_bh(&xfrm_state_lock);
957 xfrm_state_delete(x1);
963 spin_lock_bh(&x1->lock);
964 if (likely(x1->km.state == XFRM_STATE_VALID)) {
965 if (x->encap && x1->encap)
966 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
967 if (x->coaddr && x1->coaddr) {
968 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
970 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
971 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
972 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
975 if (!mod_timer(&x1->timer, jiffies + HZ))
977 if (x1->curlft.use_time)
978 xfrm_state_check_expire(x1);
982 spin_unlock_bh(&x1->lock);
988 EXPORT_SYMBOL(xfrm_state_update);
990 int xfrm_state_check_expire(struct xfrm_state *x)
992 if (!x->curlft.use_time)
993 x->curlft.use_time = (unsigned long)xtime.tv_sec;
995 if (x->km.state != XFRM_STATE_VALID)
998 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
999 x->curlft.packets >= x->lft.hard_packet_limit) {
1000 x->km.state = XFRM_STATE_EXPIRED;
1001 if (!mod_timer(&x->timer, jiffies))
1007 (x->curlft.bytes >= x->lft.soft_byte_limit ||
1008 x->curlft.packets >= x->lft.soft_packet_limit)) {
1010 km_state_expired(x, 0, 0);
1014 EXPORT_SYMBOL(xfrm_state_check_expire);
1016 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1018 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1019 - skb_headroom(skb);
1022 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1024 /* Check tail too... */
1028 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1030 int err = xfrm_state_check_expire(x);
1033 err = xfrm_state_check_space(x, skb);
1037 EXPORT_SYMBOL(xfrm_state_check);
1040 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1041 unsigned short family)
1043 struct xfrm_state *x;
1045 spin_lock_bh(&xfrm_state_lock);
1046 x = __xfrm_state_lookup(daddr, spi, proto, family);
1047 spin_unlock_bh(&xfrm_state_lock);
1050 EXPORT_SYMBOL(xfrm_state_lookup);
1053 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1054 u8 proto, unsigned short family)
1056 struct xfrm_state *x;
1058 spin_lock_bh(&xfrm_state_lock);
1059 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1060 spin_unlock_bh(&xfrm_state_lock);
1063 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1066 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1067 xfrm_address_t *daddr, xfrm_address_t *saddr,
1068 int create, unsigned short family)
1070 struct xfrm_state *x;
1072 spin_lock_bh(&xfrm_state_lock);
1073 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1074 spin_unlock_bh(&xfrm_state_lock);
1078 EXPORT_SYMBOL(xfrm_find_acq);
1080 #ifdef CONFIG_XFRM_SUB_POLICY
1082 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1083 unsigned short family)
1086 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1088 return -EAFNOSUPPORT;
1090 spin_lock_bh(&xfrm_state_lock);
1091 if (afinfo->tmpl_sort)
1092 err = afinfo->tmpl_sort(dst, src, n);
1093 spin_unlock_bh(&xfrm_state_lock);
1094 xfrm_state_put_afinfo(afinfo);
1097 EXPORT_SYMBOL(xfrm_tmpl_sort);
1100 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1101 unsigned short family)
1104 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1106 return -EAFNOSUPPORT;
1108 spin_lock_bh(&xfrm_state_lock);
1109 if (afinfo->state_sort)
1110 err = afinfo->state_sort(dst, src, n);
1111 spin_unlock_bh(&xfrm_state_lock);
1112 xfrm_state_put_afinfo(afinfo);
1115 EXPORT_SYMBOL(xfrm_state_sort);
1118 /* Silly enough, but I'm lazy to build resolution list */
1120 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1124 for (i = 0; i <= xfrm_state_hmask; i++) {
1125 struct hlist_node *entry;
1126 struct xfrm_state *x;
1128 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1129 if (x->km.seq == seq &&
1130 x->km.state == XFRM_STATE_ACQ) {
1139 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1141 struct xfrm_state *x;
1143 spin_lock_bh(&xfrm_state_lock);
1144 x = __xfrm_find_acq_byseq(seq);
1145 spin_unlock_bh(&xfrm_state_lock);
1148 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1150 u32 xfrm_get_acqseq(void)
1154 static DEFINE_SPINLOCK(acqseq_lock);
1156 spin_lock_bh(&acqseq_lock);
1157 res = (++acqseq ? : ++acqseq);
1158 spin_unlock_bh(&acqseq_lock);
1161 EXPORT_SYMBOL(xfrm_get_acqseq);
1164 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1167 struct xfrm_state *x0;
1172 if (minspi == maxspi) {
1173 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1181 minspi = ntohl(minspi);
1182 maxspi = ntohl(maxspi);
1183 for (h=0; h<maxspi-minspi+1; h++) {
1184 spi = minspi + net_random()%(maxspi-minspi+1);
1185 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1187 x->id.spi = htonl(spi);
1194 spin_lock_bh(&xfrm_state_lock);
1195 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1196 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1198 spin_unlock_bh(&xfrm_state_lock);
1202 EXPORT_SYMBOL(xfrm_alloc_spi);
1204 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1208 struct xfrm_state *x;
1209 struct hlist_node *entry;
1213 spin_lock_bh(&xfrm_state_lock);
1214 for (i = 0; i <= xfrm_state_hmask; i++) {
1215 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1216 if (xfrm_id_proto_match(x->id.proto, proto))
1225 for (i = 0; i <= xfrm_state_hmask; i++) {
1226 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1227 if (!xfrm_id_proto_match(x->id.proto, proto))
1229 err = func(x, --count, data);
1235 spin_unlock_bh(&xfrm_state_lock);
1238 EXPORT_SYMBOL(xfrm_state_walk);
1241 void xfrm_replay_notify(struct xfrm_state *x, int event)
1244 /* we send notify messages in case
1245 * 1. we updated on of the sequence numbers, and the seqno difference
1246 * is at least x->replay_maxdiff, in this case we also update the
1247 * timeout of our timer function
1248 * 2. if x->replay_maxage has elapsed since last update,
1249 * and there were changes
1251 * The state structure must be locked!
1255 case XFRM_REPLAY_UPDATE:
1256 if (x->replay_maxdiff &&
1257 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1258 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1259 if (x->xflags & XFRM_TIME_DEFER)
1260 event = XFRM_REPLAY_TIMEOUT;
1267 case XFRM_REPLAY_TIMEOUT:
1268 if ((x->replay.seq == x->preplay.seq) &&
1269 (x->replay.bitmap == x->preplay.bitmap) &&
1270 (x->replay.oseq == x->preplay.oseq)) {
1271 x->xflags |= XFRM_TIME_DEFER;
1278 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1279 c.event = XFRM_MSG_NEWAE;
1280 c.data.aevent = event;
1281 km_state_notify(x, &c);
1283 if (x->replay_maxage &&
1284 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1286 x->xflags &= ~XFRM_TIME_DEFER;
1289 EXPORT_SYMBOL(xfrm_replay_notify);
1291 static void xfrm_replay_timer_handler(unsigned long data)
1293 struct xfrm_state *x = (struct xfrm_state*)data;
1295 spin_lock(&x->lock);
1297 if (x->km.state == XFRM_STATE_VALID) {
1298 if (xfrm_aevent_is_on())
1299 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1301 x->xflags |= XFRM_TIME_DEFER;
1304 spin_unlock(&x->lock);
1308 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1314 if (unlikely(seq == 0))
1317 if (likely(seq > x->replay.seq))
1320 diff = x->replay.seq - seq;
1321 if (diff >= x->props.replay_window) {
1322 x->stats.replay_window++;
1326 if (x->replay.bitmap & (1U << diff)) {
1332 EXPORT_SYMBOL(xfrm_replay_check);
1334 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1340 if (seq > x->replay.seq) {
1341 diff = seq - x->replay.seq;
1342 if (diff < x->props.replay_window)
1343 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1345 x->replay.bitmap = 1;
1346 x->replay.seq = seq;
1348 diff = x->replay.seq - seq;
1349 x->replay.bitmap |= (1U << diff);
1352 if (xfrm_aevent_is_on())
1353 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1355 EXPORT_SYMBOL(xfrm_replay_advance);
1357 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1358 static DEFINE_RWLOCK(xfrm_km_lock);
1360 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1362 struct xfrm_mgr *km;
1364 read_lock(&xfrm_km_lock);
1365 list_for_each_entry(km, &xfrm_km_list, list)
1366 if (km->notify_policy)
1367 km->notify_policy(xp, dir, c);
1368 read_unlock(&xfrm_km_lock);
1371 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1373 struct xfrm_mgr *km;
1374 read_lock(&xfrm_km_lock);
1375 list_for_each_entry(km, &xfrm_km_list, list)
1378 read_unlock(&xfrm_km_lock);
1381 EXPORT_SYMBOL(km_policy_notify);
1382 EXPORT_SYMBOL(km_state_notify);
1384 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1390 c.event = XFRM_MSG_EXPIRE;
1391 km_state_notify(x, &c);
1397 EXPORT_SYMBOL(km_state_expired);
1399 * We send to all registered managers regardless of failure
1400 * We are happy with one success
1402 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1404 int err = -EINVAL, acqret;
1405 struct xfrm_mgr *km;
1407 read_lock(&xfrm_km_lock);
1408 list_for_each_entry(km, &xfrm_km_list, list) {
1409 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1413 read_unlock(&xfrm_km_lock);
1416 EXPORT_SYMBOL(km_query);
1418 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1421 struct xfrm_mgr *km;
1423 read_lock(&xfrm_km_lock);
1424 list_for_each_entry(km, &xfrm_km_list, list) {
1425 if (km->new_mapping)
1426 err = km->new_mapping(x, ipaddr, sport);
1430 read_unlock(&xfrm_km_lock);
1433 EXPORT_SYMBOL(km_new_mapping);
1435 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1441 c.event = XFRM_MSG_POLEXPIRE;
1442 km_policy_notify(pol, dir, &c);
1447 EXPORT_SYMBOL(km_policy_expired);
1449 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1453 struct xfrm_mgr *km;
1455 read_lock(&xfrm_km_lock);
1456 list_for_each_entry(km, &xfrm_km_list, list) {
1458 ret = km->report(proto, sel, addr);
1463 read_unlock(&xfrm_km_lock);
1466 EXPORT_SYMBOL(km_report);
1468 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1472 struct xfrm_mgr *km;
1473 struct xfrm_policy *pol = NULL;
1475 if (optlen <= 0 || optlen > PAGE_SIZE)
1478 data = kmalloc(optlen, GFP_KERNEL);
1483 if (copy_from_user(data, optval, optlen))
1487 read_lock(&xfrm_km_lock);
1488 list_for_each_entry(km, &xfrm_km_list, list) {
1489 pol = km->compile_policy(sk, optname, data,
1494 read_unlock(&xfrm_km_lock);
1497 xfrm_sk_policy_insert(sk, err, pol);
1506 EXPORT_SYMBOL(xfrm_user_policy);
1508 int xfrm_register_km(struct xfrm_mgr *km)
1510 write_lock_bh(&xfrm_km_lock);
1511 list_add_tail(&km->list, &xfrm_km_list);
1512 write_unlock_bh(&xfrm_km_lock);
1515 EXPORT_SYMBOL(xfrm_register_km);
1517 int xfrm_unregister_km(struct xfrm_mgr *km)
1519 write_lock_bh(&xfrm_km_lock);
1520 list_del(&km->list);
1521 write_unlock_bh(&xfrm_km_lock);
1524 EXPORT_SYMBOL(xfrm_unregister_km);
1526 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1529 if (unlikely(afinfo == NULL))
1531 if (unlikely(afinfo->family >= NPROTO))
1532 return -EAFNOSUPPORT;
1533 write_lock_bh(&xfrm_state_afinfo_lock);
1534 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1537 xfrm_state_afinfo[afinfo->family] = afinfo;
1538 write_unlock_bh(&xfrm_state_afinfo_lock);
1541 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1543 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1546 if (unlikely(afinfo == NULL))
1548 if (unlikely(afinfo->family >= NPROTO))
1549 return -EAFNOSUPPORT;
1550 write_lock_bh(&xfrm_state_afinfo_lock);
1551 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1552 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1555 xfrm_state_afinfo[afinfo->family] = NULL;
1557 write_unlock_bh(&xfrm_state_afinfo_lock);
1560 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1562 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1564 struct xfrm_state_afinfo *afinfo;
1565 if (unlikely(family >= NPROTO))
1567 read_lock(&xfrm_state_afinfo_lock);
1568 afinfo = xfrm_state_afinfo[family];
1569 if (unlikely(!afinfo))
1570 read_unlock(&xfrm_state_afinfo_lock);
1574 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1576 read_unlock(&xfrm_state_afinfo_lock);
1579 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1580 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1583 struct xfrm_state *t = x->tunnel;
1585 if (atomic_read(&t->tunnel_users) == 2)
1586 xfrm_state_delete(t);
1587 atomic_dec(&t->tunnel_users);
1592 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1595 * This function is NOT optimal. For example, with ESP it will give an
1596 * MTU that's usually two bytes short of being optimal. However, it will
1597 * usually give an answer that's a multiple of 4 provided the input is
1598 * also a multiple of 4.
1600 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1604 res -= x->props.header_len;
1612 spin_lock_bh(&x->lock);
1613 if (x->km.state == XFRM_STATE_VALID &&
1614 x->type && x->type->get_max_size)
1615 m = x->type->get_max_size(x, m);
1617 m += x->props.header_len;
1618 spin_unlock_bh(&x->lock);
1628 int xfrm_init_state(struct xfrm_state *x)
1630 struct xfrm_state_afinfo *afinfo;
1631 int family = x->props.family;
1634 err = -EAFNOSUPPORT;
1635 afinfo = xfrm_state_get_afinfo(family);
1640 if (afinfo->init_flags)
1641 err = afinfo->init_flags(x);
1643 xfrm_state_put_afinfo(afinfo);
1648 err = -EPROTONOSUPPORT;
1649 x->type = xfrm_get_type(x->id.proto, family);
1650 if (x->type == NULL)
1653 err = x->type->init_state(x);
1657 x->mode = xfrm_get_mode(x->props.mode, family);
1658 if (x->mode == NULL)
1661 x->km.state = XFRM_STATE_VALID;
1667 EXPORT_SYMBOL(xfrm_init_state);
1669 void __init xfrm_state_init(void)
1673 sz = sizeof(struct hlist_head) * 8;
1675 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1676 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1677 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1678 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1679 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1680 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1682 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);