[XFRM] STATE: Introduce care-of address.
[linux-2.6-microblaze.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22
23 struct sock *xfrm_nl;
24 EXPORT_SYMBOL(xfrm_nl);
25
26 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
27 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
28
29 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
31
32 /* Each xfrm_state may be linked to two tables:
33
34    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35    2. Hash table by daddr to find what SAs exist for given
36       destination/tunnel endpoint. (output)
37  */
38
39 static DEFINE_SPINLOCK(xfrm_state_lock);
40
41 /* Hash table to find appropriate SA towards given target (endpoint
42  * of tunnel or destination of transport mode) allowed by selector.
43  *
44  * Main use is finding SA after policy selected tunnel or transport mode.
45  * Also, it can be used by ah/esp icmp error handler to find offending SA.
46  */
47 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
48 static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
49 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
50
51 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
52 EXPORT_SYMBOL(km_waitq);
53
54 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
55 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
56
57 static struct work_struct xfrm_state_gc_work;
58 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
59 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
60
61 static int xfrm_state_gc_flush_bundles;
62
63 int __xfrm_state_delete(struct xfrm_state *x);
64
65 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
66 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
67
68 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
69 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
70
71 static void xfrm_state_gc_destroy(struct xfrm_state *x)
72 {
73         if (del_timer(&x->timer))
74                 BUG();
75         if (del_timer(&x->rtimer))
76                 BUG();
77         kfree(x->aalg);
78         kfree(x->ealg);
79         kfree(x->calg);
80         kfree(x->encap);
81         kfree(x->coaddr);
82         if (x->mode)
83                 xfrm_put_mode(x->mode);
84         if (x->type) {
85                 x->type->destructor(x);
86                 xfrm_put_type(x->type);
87         }
88         security_xfrm_state_free(x);
89         kfree(x);
90 }
91
92 static void xfrm_state_gc_task(void *data)
93 {
94         struct xfrm_state *x;
95         struct list_head *entry, *tmp;
96         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
97
98         if (xfrm_state_gc_flush_bundles) {
99                 xfrm_state_gc_flush_bundles = 0;
100                 xfrm_flush_bundles();
101         }
102
103         spin_lock_bh(&xfrm_state_gc_lock);
104         list_splice_init(&xfrm_state_gc_list, &gc_list);
105         spin_unlock_bh(&xfrm_state_gc_lock);
106
107         list_for_each_safe(entry, tmp, &gc_list) {
108                 x = list_entry(entry, struct xfrm_state, bydst);
109                 xfrm_state_gc_destroy(x);
110         }
111         wake_up(&km_waitq);
112 }
113
114 static inline unsigned long make_jiffies(long secs)
115 {
116         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
117                 return MAX_SCHEDULE_TIMEOUT-1;
118         else
119                 return secs*HZ;
120 }
121
122 static void xfrm_timer_handler(unsigned long data)
123 {
124         struct xfrm_state *x = (struct xfrm_state*)data;
125         unsigned long now = (unsigned long)xtime.tv_sec;
126         long next = LONG_MAX;
127         int warn = 0;
128
129         spin_lock(&x->lock);
130         if (x->km.state == XFRM_STATE_DEAD)
131                 goto out;
132         if (x->km.state == XFRM_STATE_EXPIRED)
133                 goto expired;
134         if (x->lft.hard_add_expires_seconds) {
135                 long tmo = x->lft.hard_add_expires_seconds +
136                         x->curlft.add_time - now;
137                 if (tmo <= 0)
138                         goto expired;
139                 if (tmo < next)
140                         next = tmo;
141         }
142         if (x->lft.hard_use_expires_seconds) {
143                 long tmo = x->lft.hard_use_expires_seconds +
144                         (x->curlft.use_time ? : now) - now;
145                 if (tmo <= 0)
146                         goto expired;
147                 if (tmo < next)
148                         next = tmo;
149         }
150         if (x->km.dying)
151                 goto resched;
152         if (x->lft.soft_add_expires_seconds) {
153                 long tmo = x->lft.soft_add_expires_seconds +
154                         x->curlft.add_time - now;
155                 if (tmo <= 0)
156                         warn = 1;
157                 else if (tmo < next)
158                         next = tmo;
159         }
160         if (x->lft.soft_use_expires_seconds) {
161                 long tmo = x->lft.soft_use_expires_seconds +
162                         (x->curlft.use_time ? : now) - now;
163                 if (tmo <= 0)
164                         warn = 1;
165                 else if (tmo < next)
166                         next = tmo;
167         }
168
169         x->km.dying = warn;
170         if (warn)
171                 km_state_expired(x, 0, 0);
172 resched:
173         if (next != LONG_MAX &&
174             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
175                 xfrm_state_hold(x);
176         goto out;
177
178 expired:
179         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
180                 x->km.state = XFRM_STATE_EXPIRED;
181                 wake_up(&km_waitq);
182                 next = 2;
183                 goto resched;
184         }
185         if (!__xfrm_state_delete(x) && x->id.spi)
186                 km_state_expired(x, 1, 0);
187
188 out:
189         spin_unlock(&x->lock);
190         xfrm_state_put(x);
191 }
192
193 static void xfrm_replay_timer_handler(unsigned long data);
194
195 struct xfrm_state *xfrm_state_alloc(void)
196 {
197         struct xfrm_state *x;
198
199         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
200
201         if (x) {
202                 atomic_set(&x->refcnt, 1);
203                 atomic_set(&x->tunnel_users, 0);
204                 INIT_LIST_HEAD(&x->bydst);
205                 INIT_LIST_HEAD(&x->bysrc);
206                 INIT_LIST_HEAD(&x->byspi);
207                 init_timer(&x->timer);
208                 x->timer.function = xfrm_timer_handler;
209                 x->timer.data     = (unsigned long)x;
210                 init_timer(&x->rtimer);
211                 x->rtimer.function = xfrm_replay_timer_handler;
212                 x->rtimer.data     = (unsigned long)x;
213                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
214                 x->lft.soft_byte_limit = XFRM_INF;
215                 x->lft.soft_packet_limit = XFRM_INF;
216                 x->lft.hard_byte_limit = XFRM_INF;
217                 x->lft.hard_packet_limit = XFRM_INF;
218                 x->replay_maxage = 0;
219                 x->replay_maxdiff = 0;
220                 spin_lock_init(&x->lock);
221         }
222         return x;
223 }
224 EXPORT_SYMBOL(xfrm_state_alloc);
225
226 void __xfrm_state_destroy(struct xfrm_state *x)
227 {
228         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
229
230         spin_lock_bh(&xfrm_state_gc_lock);
231         list_add(&x->bydst, &xfrm_state_gc_list);
232         spin_unlock_bh(&xfrm_state_gc_lock);
233         schedule_work(&xfrm_state_gc_work);
234 }
235 EXPORT_SYMBOL(__xfrm_state_destroy);
236
237 int __xfrm_state_delete(struct xfrm_state *x)
238 {
239         int err = -ESRCH;
240
241         if (x->km.state != XFRM_STATE_DEAD) {
242                 x->km.state = XFRM_STATE_DEAD;
243                 spin_lock(&xfrm_state_lock);
244                 list_del(&x->bydst);
245                 __xfrm_state_put(x);
246                 list_del(&x->bysrc);
247                 __xfrm_state_put(x);
248                 if (x->id.spi) {
249                         list_del(&x->byspi);
250                         __xfrm_state_put(x);
251                 }
252                 spin_unlock(&xfrm_state_lock);
253                 if (del_timer(&x->timer))
254                         __xfrm_state_put(x);
255                 if (del_timer(&x->rtimer))
256                         __xfrm_state_put(x);
257
258                 /* The number two in this test is the reference
259                  * mentioned in the comment below plus the reference
260                  * our caller holds.  A larger value means that
261                  * there are DSTs attached to this xfrm_state.
262                  */
263                 if (atomic_read(&x->refcnt) > 2) {
264                         xfrm_state_gc_flush_bundles = 1;
265                         schedule_work(&xfrm_state_gc_work);
266                 }
267
268                 /* All xfrm_state objects are created by xfrm_state_alloc.
269                  * The xfrm_state_alloc call gives a reference, and that
270                  * is what we are dropping here.
271                  */
272                 __xfrm_state_put(x);
273                 err = 0;
274         }
275
276         return err;
277 }
278 EXPORT_SYMBOL(__xfrm_state_delete);
279
280 int xfrm_state_delete(struct xfrm_state *x)
281 {
282         int err;
283
284         spin_lock_bh(&x->lock);
285         err = __xfrm_state_delete(x);
286         spin_unlock_bh(&x->lock);
287
288         return err;
289 }
290 EXPORT_SYMBOL(xfrm_state_delete);
291
292 void xfrm_state_flush(u8 proto)
293 {
294         int i;
295         struct xfrm_state *x;
296
297         spin_lock_bh(&xfrm_state_lock);
298         for (i = 0; i < XFRM_DST_HSIZE; i++) {
299 restart:
300                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
301                         if (!xfrm_state_kern(x) &&
302                             xfrm_id_proto_match(x->id.proto, proto)) {
303                                 xfrm_state_hold(x);
304                                 spin_unlock_bh(&xfrm_state_lock);
305
306                                 xfrm_state_delete(x);
307                                 xfrm_state_put(x);
308
309                                 spin_lock_bh(&xfrm_state_lock);
310                                 goto restart;
311                         }
312                 }
313         }
314         spin_unlock_bh(&xfrm_state_lock);
315         wake_up(&km_waitq);
316 }
317 EXPORT_SYMBOL(xfrm_state_flush);
318
319 static int
320 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
321                   struct xfrm_tmpl *tmpl,
322                   xfrm_address_t *daddr, xfrm_address_t *saddr,
323                   unsigned short family)
324 {
325         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
326         if (!afinfo)
327                 return -1;
328         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
329         xfrm_state_put_afinfo(afinfo);
330         return 0;
331 }
332
333 struct xfrm_state *
334 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
335                 struct flowi *fl, struct xfrm_tmpl *tmpl,
336                 struct xfrm_policy *pol, int *err,
337                 unsigned short family)
338 {
339         unsigned h = xfrm_dst_hash(daddr, family);
340         struct xfrm_state *x, *x0;
341         int acquire_in_progress = 0;
342         int error = 0;
343         struct xfrm_state *best = NULL;
344         struct xfrm_state_afinfo *afinfo;
345         
346         afinfo = xfrm_state_get_afinfo(family);
347         if (afinfo == NULL) {
348                 *err = -EAFNOSUPPORT;
349                 return NULL;
350         }
351
352         spin_lock_bh(&xfrm_state_lock);
353         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
354                 if (x->props.family == family &&
355                     x->props.reqid == tmpl->reqid &&
356                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
357                     xfrm_state_addr_check(x, daddr, saddr, family) &&
358                     tmpl->mode == x->props.mode &&
359                     tmpl->id.proto == x->id.proto &&
360                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
361                         /* Resolution logic:
362                            1. There is a valid state with matching selector.
363                               Done.
364                            2. Valid state with inappropriate selector. Skip.
365
366                            Entering area of "sysdeps".
367
368                            3. If state is not valid, selector is temporary,
369                               it selects only session which triggered
370                               previous resolution. Key manager will do
371                               something to install a state with proper
372                               selector.
373                          */
374                         if (x->km.state == XFRM_STATE_VALID) {
375                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
376                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
377                                         continue;
378                                 if (!best ||
379                                     best->km.dying > x->km.dying ||
380                                     (best->km.dying == x->km.dying &&
381                                      best->curlft.add_time < x->curlft.add_time))
382                                         best = x;
383                         } else if (x->km.state == XFRM_STATE_ACQ) {
384                                 acquire_in_progress = 1;
385                         } else if (x->km.state == XFRM_STATE_ERROR ||
386                                    x->km.state == XFRM_STATE_EXPIRED) {
387                                 if (xfrm_selector_match(&x->sel, fl, family) &&
388                                     security_xfrm_state_pol_flow_match(x, pol, fl))
389                                         error = -ESRCH;
390                         }
391                 }
392         }
393
394         x = best;
395         if (!x && !error && !acquire_in_progress) {
396                 if (tmpl->id.spi &&
397                     (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
398                                                tmpl->id.proto)) != NULL) {
399                         xfrm_state_put(x0);
400                         error = -EEXIST;
401                         goto out;
402                 }
403                 x = xfrm_state_alloc();
404                 if (x == NULL) {
405                         error = -ENOMEM;
406                         goto out;
407                 }
408                 /* Initialize temporary selector matching only
409                  * to current session. */
410                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
411
412                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
413                 if (error) {
414                         x->km.state = XFRM_STATE_DEAD;
415                         xfrm_state_put(x);
416                         x = NULL;
417                         goto out;
418                 }
419
420                 if (km_query(x, tmpl, pol) == 0) {
421                         x->km.state = XFRM_STATE_ACQ;
422                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
423                         xfrm_state_hold(x);
424                         list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
425                         xfrm_state_hold(x);
426                         if (x->id.spi) {
427                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
428                                 list_add(&x->byspi, xfrm_state_byspi+h);
429                                 xfrm_state_hold(x);
430                         }
431                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
432                         xfrm_state_hold(x);
433                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
434                         add_timer(&x->timer);
435                 } else {
436                         x->km.state = XFRM_STATE_DEAD;
437                         xfrm_state_put(x);
438                         x = NULL;
439                         error = -ESRCH;
440                 }
441         }
442 out:
443         if (x)
444                 xfrm_state_hold(x);
445         else
446                 *err = acquire_in_progress ? -EAGAIN : error;
447         spin_unlock_bh(&xfrm_state_lock);
448         xfrm_state_put_afinfo(afinfo);
449         return x;
450 }
451
452 static void __xfrm_state_insert(struct xfrm_state *x)
453 {
454         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
455
456         list_add(&x->bydst, xfrm_state_bydst+h);
457         xfrm_state_hold(x);
458
459         h = xfrm_src_hash(&x->props.saddr, x->props.family);
460
461         list_add(&x->bysrc, xfrm_state_bysrc+h);
462         xfrm_state_hold(x);
463
464         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
465                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
466                                   x->props.family);
467
468                 list_add(&x->byspi, xfrm_state_byspi+h);
469                 xfrm_state_hold(x);
470         }
471
472         if (!mod_timer(&x->timer, jiffies + HZ))
473                 xfrm_state_hold(x);
474
475         if (x->replay_maxage &&
476             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
477                 xfrm_state_hold(x);
478
479         wake_up(&km_waitq);
480 }
481
482 void xfrm_state_insert(struct xfrm_state *x)
483 {
484         spin_lock_bh(&xfrm_state_lock);
485         __xfrm_state_insert(x);
486         spin_unlock_bh(&xfrm_state_lock);
487
488         xfrm_flush_all_bundles();
489 }
490 EXPORT_SYMBOL(xfrm_state_insert);
491
492 static inline struct xfrm_state *
493 __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x,
494                     int use_spi)
495 {
496         if (use_spi)
497                 return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
498         else
499                 return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto);
500 }
501
502 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
503
504 int xfrm_state_add(struct xfrm_state *x)
505 {
506         struct xfrm_state_afinfo *afinfo;
507         struct xfrm_state *x1;
508         int family;
509         int err;
510         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
511
512         family = x->props.family;
513         afinfo = xfrm_state_get_afinfo(family);
514         if (unlikely(afinfo == NULL))
515                 return -EAFNOSUPPORT;
516
517         spin_lock_bh(&xfrm_state_lock);
518
519         x1 = __xfrm_state_locate(afinfo, x, use_spi);
520         if (x1) {
521                 xfrm_state_put(x1);
522                 x1 = NULL;
523                 err = -EEXIST;
524                 goto out;
525         }
526
527         if (use_spi && x->km.seq) {
528                 x1 = __xfrm_find_acq_byseq(x->km.seq);
529                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
530                         xfrm_state_put(x1);
531                         x1 = NULL;
532                 }
533         }
534
535         if (use_spi && !x1)
536                 x1 = afinfo->find_acq(
537                         x->props.mode, x->props.reqid, x->id.proto,
538                         &x->id.daddr, &x->props.saddr, 0);
539
540         __xfrm_state_insert(x);
541         err = 0;
542
543 out:
544         spin_unlock_bh(&xfrm_state_lock);
545         xfrm_state_put_afinfo(afinfo);
546
547         if (!err)
548                 xfrm_flush_all_bundles();
549
550         if (x1) {
551                 xfrm_state_delete(x1);
552                 xfrm_state_put(x1);
553         }
554
555         return err;
556 }
557 EXPORT_SYMBOL(xfrm_state_add);
558
559 int xfrm_state_update(struct xfrm_state *x)
560 {
561         struct xfrm_state_afinfo *afinfo;
562         struct xfrm_state *x1;
563         int err;
564         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
565
566         afinfo = xfrm_state_get_afinfo(x->props.family);
567         if (unlikely(afinfo == NULL))
568                 return -EAFNOSUPPORT;
569
570         spin_lock_bh(&xfrm_state_lock);
571         x1 = __xfrm_state_locate(afinfo, x, use_spi);
572
573         err = -ESRCH;
574         if (!x1)
575                 goto out;
576
577         if (xfrm_state_kern(x1)) {
578                 xfrm_state_put(x1);
579                 err = -EEXIST;
580                 goto out;
581         }
582
583         if (x1->km.state == XFRM_STATE_ACQ) {
584                 __xfrm_state_insert(x);
585                 x = NULL;
586         }
587         err = 0;
588
589 out:
590         spin_unlock_bh(&xfrm_state_lock);
591         xfrm_state_put_afinfo(afinfo);
592
593         if (err)
594                 return err;
595
596         if (!x) {
597                 xfrm_state_delete(x1);
598                 xfrm_state_put(x1);
599                 return 0;
600         }
601
602         err = -EINVAL;
603         spin_lock_bh(&x1->lock);
604         if (likely(x1->km.state == XFRM_STATE_VALID)) {
605                 if (x->encap && x1->encap)
606                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
607                 if (x->coaddr && x1->coaddr) {
608                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
609                 }
610                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
611                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
612                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
613                 x1->km.dying = 0;
614
615                 if (!mod_timer(&x1->timer, jiffies + HZ))
616                         xfrm_state_hold(x1);
617                 if (x1->curlft.use_time)
618                         xfrm_state_check_expire(x1);
619
620                 err = 0;
621         }
622         spin_unlock_bh(&x1->lock);
623
624         xfrm_state_put(x1);
625
626         return err;
627 }
628 EXPORT_SYMBOL(xfrm_state_update);
629
630 int xfrm_state_check_expire(struct xfrm_state *x)
631 {
632         if (!x->curlft.use_time)
633                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
634
635         if (x->km.state != XFRM_STATE_VALID)
636                 return -EINVAL;
637
638         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
639             x->curlft.packets >= x->lft.hard_packet_limit) {
640                 x->km.state = XFRM_STATE_EXPIRED;
641                 if (!mod_timer(&x->timer, jiffies))
642                         xfrm_state_hold(x);
643                 return -EINVAL;
644         }
645
646         if (!x->km.dying &&
647             (x->curlft.bytes >= x->lft.soft_byte_limit ||
648              x->curlft.packets >= x->lft.soft_packet_limit)) {
649                 x->km.dying = 1;
650                 km_state_expired(x, 0, 0);
651         }
652         return 0;
653 }
654 EXPORT_SYMBOL(xfrm_state_check_expire);
655
656 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
657 {
658         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
659                 - skb_headroom(skb);
660
661         if (nhead > 0)
662                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
663
664         /* Check tail too... */
665         return 0;
666 }
667
668 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
669 {
670         int err = xfrm_state_check_expire(x);
671         if (err < 0)
672                 goto err;
673         err = xfrm_state_check_space(x, skb);
674 err:
675         return err;
676 }
677 EXPORT_SYMBOL(xfrm_state_check);
678
679 struct xfrm_state *
680 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
681                   unsigned short family)
682 {
683         struct xfrm_state *x;
684         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
685         if (!afinfo)
686                 return NULL;
687
688         spin_lock_bh(&xfrm_state_lock);
689         x = afinfo->state_lookup(daddr, spi, proto);
690         spin_unlock_bh(&xfrm_state_lock);
691         xfrm_state_put_afinfo(afinfo);
692         return x;
693 }
694 EXPORT_SYMBOL(xfrm_state_lookup);
695
696 struct xfrm_state *
697 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
698                          u8 proto, unsigned short family)
699 {
700         struct xfrm_state *x;
701         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
702         if (!afinfo)
703                 return NULL;
704
705         spin_lock_bh(&xfrm_state_lock);
706         x = afinfo->state_lookup_byaddr(daddr, saddr, proto);
707         spin_unlock_bh(&xfrm_state_lock);
708         xfrm_state_put_afinfo(afinfo);
709         return x;
710 }
711 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
712
713 struct xfrm_state *
714 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
715               xfrm_address_t *daddr, xfrm_address_t *saddr, 
716               int create, unsigned short family)
717 {
718         struct xfrm_state *x;
719         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
720         if (!afinfo)
721                 return NULL;
722
723         spin_lock_bh(&xfrm_state_lock);
724         x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
725         spin_unlock_bh(&xfrm_state_lock);
726         xfrm_state_put_afinfo(afinfo);
727         return x;
728 }
729 EXPORT_SYMBOL(xfrm_find_acq);
730
731 /* Silly enough, but I'm lazy to build resolution list */
732
733 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
734 {
735         int i;
736         struct xfrm_state *x;
737
738         for (i = 0; i < XFRM_DST_HSIZE; i++) {
739                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
740                         if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
741                                 xfrm_state_hold(x);
742                                 return x;
743                         }
744                 }
745         }
746         return NULL;
747 }
748
749 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
750 {
751         struct xfrm_state *x;
752
753         spin_lock_bh(&xfrm_state_lock);
754         x = __xfrm_find_acq_byseq(seq);
755         spin_unlock_bh(&xfrm_state_lock);
756         return x;
757 }
758 EXPORT_SYMBOL(xfrm_find_acq_byseq);
759
760 u32 xfrm_get_acqseq(void)
761 {
762         u32 res;
763         static u32 acqseq;
764         static DEFINE_SPINLOCK(acqseq_lock);
765
766         spin_lock_bh(&acqseq_lock);
767         res = (++acqseq ? : ++acqseq);
768         spin_unlock_bh(&acqseq_lock);
769         return res;
770 }
771 EXPORT_SYMBOL(xfrm_get_acqseq);
772
773 void
774 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
775 {
776         u32 h;
777         struct xfrm_state *x0;
778
779         if (x->id.spi)
780                 return;
781
782         if (minspi == maxspi) {
783                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
784                 if (x0) {
785                         xfrm_state_put(x0);
786                         return;
787                 }
788                 x->id.spi = minspi;
789         } else {
790                 u32 spi = 0;
791                 minspi = ntohl(minspi);
792                 maxspi = ntohl(maxspi);
793                 for (h=0; h<maxspi-minspi+1; h++) {
794                         spi = minspi + net_random()%(maxspi-minspi+1);
795                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
796                         if (x0 == NULL) {
797                                 x->id.spi = htonl(spi);
798                                 break;
799                         }
800                         xfrm_state_put(x0);
801                 }
802         }
803         if (x->id.spi) {
804                 spin_lock_bh(&xfrm_state_lock);
805                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
806                 list_add(&x->byspi, xfrm_state_byspi+h);
807                 xfrm_state_hold(x);
808                 spin_unlock_bh(&xfrm_state_lock);
809                 wake_up(&km_waitq);
810         }
811 }
812 EXPORT_SYMBOL(xfrm_alloc_spi);
813
814 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
815                     void *data)
816 {
817         int i;
818         struct xfrm_state *x;
819         int count = 0;
820         int err = 0;
821
822         spin_lock_bh(&xfrm_state_lock);
823         for (i = 0; i < XFRM_DST_HSIZE; i++) {
824                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
825                         if (xfrm_id_proto_match(x->id.proto, proto))
826                                 count++;
827                 }
828         }
829         if (count == 0) {
830                 err = -ENOENT;
831                 goto out;
832         }
833
834         for (i = 0; i < XFRM_DST_HSIZE; i++) {
835                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
836                         if (!xfrm_id_proto_match(x->id.proto, proto))
837                                 continue;
838                         err = func(x, --count, data);
839                         if (err)
840                                 goto out;
841                 }
842         }
843 out:
844         spin_unlock_bh(&xfrm_state_lock);
845         return err;
846 }
847 EXPORT_SYMBOL(xfrm_state_walk);
848
849
850 void xfrm_replay_notify(struct xfrm_state *x, int event)
851 {
852         struct km_event c;
853         /* we send notify messages in case
854          *  1. we updated on of the sequence numbers, and the seqno difference
855          *     is at least x->replay_maxdiff, in this case we also update the
856          *     timeout of our timer function
857          *  2. if x->replay_maxage has elapsed since last update,
858          *     and there were changes
859          *
860          *  The state structure must be locked!
861          */
862
863         switch (event) {
864         case XFRM_REPLAY_UPDATE:
865                 if (x->replay_maxdiff &&
866                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
867                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
868                         if (x->xflags & XFRM_TIME_DEFER)
869                                 event = XFRM_REPLAY_TIMEOUT;
870                         else
871                                 return;
872                 }
873
874                 break;
875
876         case XFRM_REPLAY_TIMEOUT:
877                 if ((x->replay.seq == x->preplay.seq) &&
878                     (x->replay.bitmap == x->preplay.bitmap) &&
879                     (x->replay.oseq == x->preplay.oseq)) {
880                         x->xflags |= XFRM_TIME_DEFER;
881                         return;
882                 }
883
884                 break;
885         }
886
887         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
888         c.event = XFRM_MSG_NEWAE;
889         c.data.aevent = event;
890         km_state_notify(x, &c);
891
892         if (x->replay_maxage &&
893             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
894                 xfrm_state_hold(x);
895                 x->xflags &= ~XFRM_TIME_DEFER;
896         }
897 }
898 EXPORT_SYMBOL(xfrm_replay_notify);
899
900 static void xfrm_replay_timer_handler(unsigned long data)
901 {
902         struct xfrm_state *x = (struct xfrm_state*)data;
903
904         spin_lock(&x->lock);
905
906         if (x->km.state == XFRM_STATE_VALID) {
907                 if (xfrm_aevent_is_on())
908                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
909                 else
910                         x->xflags |= XFRM_TIME_DEFER;
911         }
912
913         spin_unlock(&x->lock);
914         xfrm_state_put(x);
915 }
916
917 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
918 {
919         u32 diff;
920
921         seq = ntohl(seq);
922
923         if (unlikely(seq == 0))
924                 return -EINVAL;
925
926         if (likely(seq > x->replay.seq))
927                 return 0;
928
929         diff = x->replay.seq - seq;
930         if (diff >= x->props.replay_window) {
931                 x->stats.replay_window++;
932                 return -EINVAL;
933         }
934
935         if (x->replay.bitmap & (1U << diff)) {
936                 x->stats.replay++;
937                 return -EINVAL;
938         }
939         return 0;
940 }
941 EXPORT_SYMBOL(xfrm_replay_check);
942
943 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
944 {
945         u32 diff;
946
947         seq = ntohl(seq);
948
949         if (seq > x->replay.seq) {
950                 diff = seq - x->replay.seq;
951                 if (diff < x->props.replay_window)
952                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
953                 else
954                         x->replay.bitmap = 1;
955                 x->replay.seq = seq;
956         } else {
957                 diff = x->replay.seq - seq;
958                 x->replay.bitmap |= (1U << diff);
959         }
960
961         if (xfrm_aevent_is_on())
962                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
963 }
964 EXPORT_SYMBOL(xfrm_replay_advance);
965
966 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
967 static DEFINE_RWLOCK(xfrm_km_lock);
968
969 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
970 {
971         struct xfrm_mgr *km;
972
973         read_lock(&xfrm_km_lock);
974         list_for_each_entry(km, &xfrm_km_list, list)
975                 if (km->notify_policy)
976                         km->notify_policy(xp, dir, c);
977         read_unlock(&xfrm_km_lock);
978 }
979
980 void km_state_notify(struct xfrm_state *x, struct km_event *c)
981 {
982         struct xfrm_mgr *km;
983         read_lock(&xfrm_km_lock);
984         list_for_each_entry(km, &xfrm_km_list, list)
985                 if (km->notify)
986                         km->notify(x, c);
987         read_unlock(&xfrm_km_lock);
988 }
989
990 EXPORT_SYMBOL(km_policy_notify);
991 EXPORT_SYMBOL(km_state_notify);
992
993 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
994 {
995         struct km_event c;
996
997         c.data.hard = hard;
998         c.pid = pid;
999         c.event = XFRM_MSG_EXPIRE;
1000         km_state_notify(x, &c);
1001
1002         if (hard)
1003                 wake_up(&km_waitq);
1004 }
1005
1006 EXPORT_SYMBOL(km_state_expired);
1007 /*
1008  * We send to all registered managers regardless of failure
1009  * We are happy with one success
1010 */
1011 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1012 {
1013         int err = -EINVAL, acqret;
1014         struct xfrm_mgr *km;
1015
1016         read_lock(&xfrm_km_lock);
1017         list_for_each_entry(km, &xfrm_km_list, list) {
1018                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1019                 if (!acqret)
1020                         err = acqret;
1021         }
1022         read_unlock(&xfrm_km_lock);
1023         return err;
1024 }
1025 EXPORT_SYMBOL(km_query);
1026
1027 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1028 {
1029         int err = -EINVAL;
1030         struct xfrm_mgr *km;
1031
1032         read_lock(&xfrm_km_lock);
1033         list_for_each_entry(km, &xfrm_km_list, list) {
1034                 if (km->new_mapping)
1035                         err = km->new_mapping(x, ipaddr, sport);
1036                 if (!err)
1037                         break;
1038         }
1039         read_unlock(&xfrm_km_lock);
1040         return err;
1041 }
1042 EXPORT_SYMBOL(km_new_mapping);
1043
1044 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1045 {
1046         struct km_event c;
1047
1048         c.data.hard = hard;
1049         c.pid = pid;
1050         c.event = XFRM_MSG_POLEXPIRE;
1051         km_policy_notify(pol, dir, &c);
1052
1053         if (hard)
1054                 wake_up(&km_waitq);
1055 }
1056 EXPORT_SYMBOL(km_policy_expired);
1057
1058 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1059 {
1060         int err;
1061         u8 *data;
1062         struct xfrm_mgr *km;
1063         struct xfrm_policy *pol = NULL;
1064
1065         if (optlen <= 0 || optlen > PAGE_SIZE)
1066                 return -EMSGSIZE;
1067
1068         data = kmalloc(optlen, GFP_KERNEL);
1069         if (!data)
1070                 return -ENOMEM;
1071
1072         err = -EFAULT;
1073         if (copy_from_user(data, optval, optlen))
1074                 goto out;
1075
1076         err = -EINVAL;
1077         read_lock(&xfrm_km_lock);
1078         list_for_each_entry(km, &xfrm_km_list, list) {
1079                 pol = km->compile_policy(sk, optname, data,
1080                                          optlen, &err);
1081                 if (err >= 0)
1082                         break;
1083         }
1084         read_unlock(&xfrm_km_lock);
1085
1086         if (err >= 0) {
1087                 xfrm_sk_policy_insert(sk, err, pol);
1088                 xfrm_pol_put(pol);
1089                 err = 0;
1090         }
1091
1092 out:
1093         kfree(data);
1094         return err;
1095 }
1096 EXPORT_SYMBOL(xfrm_user_policy);
1097
1098 int xfrm_register_km(struct xfrm_mgr *km)
1099 {
1100         write_lock_bh(&xfrm_km_lock);
1101         list_add_tail(&km->list, &xfrm_km_list);
1102         write_unlock_bh(&xfrm_km_lock);
1103         return 0;
1104 }
1105 EXPORT_SYMBOL(xfrm_register_km);
1106
1107 int xfrm_unregister_km(struct xfrm_mgr *km)
1108 {
1109         write_lock_bh(&xfrm_km_lock);
1110         list_del(&km->list);
1111         write_unlock_bh(&xfrm_km_lock);
1112         return 0;
1113 }
1114 EXPORT_SYMBOL(xfrm_unregister_km);
1115
1116 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1117 {
1118         int err = 0;
1119         if (unlikely(afinfo == NULL))
1120                 return -EINVAL;
1121         if (unlikely(afinfo->family >= NPROTO))
1122                 return -EAFNOSUPPORT;
1123         write_lock_bh(&xfrm_state_afinfo_lock);
1124         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1125                 err = -ENOBUFS;
1126         else {
1127                 afinfo->state_bydst = xfrm_state_bydst;
1128                 afinfo->state_bysrc = xfrm_state_bysrc;
1129                 afinfo->state_byspi = xfrm_state_byspi;
1130                 xfrm_state_afinfo[afinfo->family] = afinfo;
1131         }
1132         write_unlock_bh(&xfrm_state_afinfo_lock);
1133         return err;
1134 }
1135 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1136
1137 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1138 {
1139         int err = 0;
1140         if (unlikely(afinfo == NULL))
1141                 return -EINVAL;
1142         if (unlikely(afinfo->family >= NPROTO))
1143                 return -EAFNOSUPPORT;
1144         write_lock_bh(&xfrm_state_afinfo_lock);
1145         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1146                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1147                         err = -EINVAL;
1148                 else {
1149                         xfrm_state_afinfo[afinfo->family] = NULL;
1150                         afinfo->state_byspi = NULL;
1151                         afinfo->state_bysrc = NULL;
1152                         afinfo->state_bydst = NULL;
1153                 }
1154         }
1155         write_unlock_bh(&xfrm_state_afinfo_lock);
1156         return err;
1157 }
1158 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1159
1160 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1161 {
1162         struct xfrm_state_afinfo *afinfo;
1163         if (unlikely(family >= NPROTO))
1164                 return NULL;
1165         read_lock(&xfrm_state_afinfo_lock);
1166         afinfo = xfrm_state_afinfo[family];
1167         if (unlikely(!afinfo))
1168                 read_unlock(&xfrm_state_afinfo_lock);
1169         return afinfo;
1170 }
1171
1172 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1173 {
1174         read_unlock(&xfrm_state_afinfo_lock);
1175 }
1176
1177 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1178 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1179 {
1180         if (x->tunnel) {
1181                 struct xfrm_state *t = x->tunnel;
1182
1183                 if (atomic_read(&t->tunnel_users) == 2)
1184                         xfrm_state_delete(t);
1185                 atomic_dec(&t->tunnel_users);
1186                 xfrm_state_put(t);
1187                 x->tunnel = NULL;
1188         }
1189 }
1190 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1191
1192 /*
1193  * This function is NOT optimal.  For example, with ESP it will give an
1194  * MTU that's usually two bytes short of being optimal.  However, it will
1195  * usually give an answer that's a multiple of 4 provided the input is
1196  * also a multiple of 4.
1197  */
1198 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1199 {
1200         int res = mtu;
1201
1202         res -= x->props.header_len;
1203
1204         for (;;) {
1205                 int m = res;
1206
1207                 if (m < 68)
1208                         return 68;
1209
1210                 spin_lock_bh(&x->lock);
1211                 if (x->km.state == XFRM_STATE_VALID &&
1212                     x->type && x->type->get_max_size)
1213                         m = x->type->get_max_size(x, m);
1214                 else
1215                         m += x->props.header_len;
1216                 spin_unlock_bh(&x->lock);
1217
1218                 if (m <= mtu)
1219                         break;
1220                 res -= (m - mtu);
1221         }
1222
1223         return res;
1224 }
1225
1226 int xfrm_init_state(struct xfrm_state *x)
1227 {
1228         struct xfrm_state_afinfo *afinfo;
1229         int family = x->props.family;
1230         int err;
1231
1232         err = -EAFNOSUPPORT;
1233         afinfo = xfrm_state_get_afinfo(family);
1234         if (!afinfo)
1235                 goto error;
1236
1237         err = 0;
1238         if (afinfo->init_flags)
1239                 err = afinfo->init_flags(x);
1240
1241         xfrm_state_put_afinfo(afinfo);
1242
1243         if (err)
1244                 goto error;
1245
1246         err = -EPROTONOSUPPORT;
1247         x->type = xfrm_get_type(x->id.proto, family);
1248         if (x->type == NULL)
1249                 goto error;
1250
1251         err = x->type->init_state(x);
1252         if (err)
1253                 goto error;
1254
1255         x->mode = xfrm_get_mode(x->props.mode, family);
1256         if (x->mode == NULL)
1257                 goto error;
1258
1259         x->km.state = XFRM_STATE_VALID;
1260
1261 error:
1262         return err;
1263 }
1264
1265 EXPORT_SYMBOL(xfrm_init_state);
1266  
1267 void __init xfrm_state_init(void)
1268 {
1269         int i;
1270
1271         for (i=0; i<XFRM_DST_HSIZE; i++) {
1272                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1273                 INIT_LIST_HEAD(&xfrm_state_bysrc[i]);
1274                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1275         }
1276         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1277 }
1278