media: imx274: get rid of mode_index
[linux-2.6-microblaze.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 #define CREATE_TRACE_POINTS
42 #include "trace.h"
43
44 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
45
46 EXPORT_SYMBOL_GPL(dccp_statistics);
47
48 struct percpu_counter dccp_orphan_count;
49 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50
51 struct inet_hashinfo dccp_hashinfo;
52 EXPORT_SYMBOL_GPL(dccp_hashinfo);
53
54 /* the maximum queue length for tx in packets. 0 is no limit */
55 int sysctl_dccp_tx_qlen __read_mostly = 5;
56
57 #ifdef CONFIG_IP_DCCP_DEBUG
58 static const char *dccp_state_name(const int state)
59 {
60         static const char *const dccp_state_names[] = {
61         [DCCP_OPEN]             = "OPEN",
62         [DCCP_REQUESTING]       = "REQUESTING",
63         [DCCP_PARTOPEN]         = "PARTOPEN",
64         [DCCP_LISTEN]           = "LISTEN",
65         [DCCP_RESPOND]          = "RESPOND",
66         [DCCP_CLOSING]          = "CLOSING",
67         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
68         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
69         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
70         [DCCP_TIME_WAIT]        = "TIME_WAIT",
71         [DCCP_CLOSED]           = "CLOSED",
72         };
73
74         if (state >= DCCP_MAX_STATES)
75                 return "INVALID STATE!";
76         else
77                 return dccp_state_names[state];
78 }
79 #endif
80
81 void dccp_set_state(struct sock *sk, const int state)
82 {
83         const int oldstate = sk->sk_state;
84
85         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
86                       dccp_state_name(oldstate), dccp_state_name(state));
87         WARN_ON(state == oldstate);
88
89         switch (state) {
90         case DCCP_OPEN:
91                 if (oldstate != DCCP_OPEN)
92                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
93                 /* Client retransmits all Confirm options until entering OPEN */
94                 if (oldstate == DCCP_PARTOPEN)
95                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
96                 break;
97
98         case DCCP_CLOSED:
99                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
100                     oldstate == DCCP_CLOSING)
101                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
102
103                 sk->sk_prot->unhash(sk);
104                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
105                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
106                         inet_put_port(sk);
107                 /* fall through */
108         default:
109                 if (oldstate == DCCP_OPEN)
110                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
111         }
112
113         /* Change state AFTER socket is unhashed to avoid closed
114          * socket sitting in hash tables.
115          */
116         inet_sk_set_state(sk, state);
117 }
118
119 EXPORT_SYMBOL_GPL(dccp_set_state);
120
121 static void dccp_finish_passive_close(struct sock *sk)
122 {
123         switch (sk->sk_state) {
124         case DCCP_PASSIVE_CLOSE:
125                 /* Node (client or server) has received Close packet. */
126                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
127                 dccp_set_state(sk, DCCP_CLOSED);
128                 break;
129         case DCCP_PASSIVE_CLOSEREQ:
130                 /*
131                  * Client received CloseReq. We set the `active' flag so that
132                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
133                  */
134                 dccp_send_close(sk, 1);
135                 dccp_set_state(sk, DCCP_CLOSING);
136         }
137 }
138
139 void dccp_done(struct sock *sk)
140 {
141         dccp_set_state(sk, DCCP_CLOSED);
142         dccp_clear_xmit_timers(sk);
143
144         sk->sk_shutdown = SHUTDOWN_MASK;
145
146         if (!sock_flag(sk, SOCK_DEAD))
147                 sk->sk_state_change(sk);
148         else
149                 inet_csk_destroy_sock(sk);
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_done);
153
154 const char *dccp_packet_name(const int type)
155 {
156         static const char *const dccp_packet_names[] = {
157                 [DCCP_PKT_REQUEST]  = "REQUEST",
158                 [DCCP_PKT_RESPONSE] = "RESPONSE",
159                 [DCCP_PKT_DATA]     = "DATA",
160                 [DCCP_PKT_ACK]      = "ACK",
161                 [DCCP_PKT_DATAACK]  = "DATAACK",
162                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
163                 [DCCP_PKT_CLOSE]    = "CLOSE",
164                 [DCCP_PKT_RESET]    = "RESET",
165                 [DCCP_PKT_SYNC]     = "SYNC",
166                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
167         };
168
169         if (type >= DCCP_NR_PKT_TYPES)
170                 return "INVALID";
171         else
172                 return dccp_packet_names[type];
173 }
174
175 EXPORT_SYMBOL_GPL(dccp_packet_name);
176
177 static void dccp_sk_destruct(struct sock *sk)
178 {
179         struct dccp_sock *dp = dccp_sk(sk);
180
181         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
182         dp->dccps_hc_tx_ccid = NULL;
183         inet_sock_destruct(sk);
184 }
185
186 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
187 {
188         struct dccp_sock *dp = dccp_sk(sk);
189         struct inet_connection_sock *icsk = inet_csk(sk);
190
191         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
192         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
193         sk->sk_state            = DCCP_CLOSED;
194         sk->sk_write_space      = dccp_write_space;
195         sk->sk_destruct         = dccp_sk_destruct;
196         icsk->icsk_sync_mss     = dccp_sync_mss;
197         dp->dccps_mss_cache     = 536;
198         dp->dccps_rate_last     = jiffies;
199         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
200         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
201         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
202
203         dccp_init_xmit_timers(sk);
204
205         INIT_LIST_HEAD(&dp->dccps_featneg);
206         /* control socket doesn't need feat nego */
207         if (likely(ctl_sock_initialized))
208                 return dccp_feat_init(sk);
209         return 0;
210 }
211
212 EXPORT_SYMBOL_GPL(dccp_init_sock);
213
214 void dccp_destroy_sock(struct sock *sk)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217
218         __skb_queue_purge(&sk->sk_write_queue);
219         if (sk->sk_send_head != NULL) {
220                 kfree_skb(sk->sk_send_head);
221                 sk->sk_send_head = NULL;
222         }
223
224         /* Clean up a referenced DCCP bind bucket. */
225         if (inet_csk(sk)->icsk_bind_hash != NULL)
226                 inet_put_port(sk);
227
228         kfree(dp->dccps_service_list);
229         dp->dccps_service_list = NULL;
230
231         if (dp->dccps_hc_rx_ackvec != NULL) {
232                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
233                 dp->dccps_hc_rx_ackvec = NULL;
234         }
235         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236         dp->dccps_hc_rx_ccid = NULL;
237
238         /* clean up feature negotiation state */
239         dccp_feat_list_purge(&dp->dccps_featneg);
240 }
241
242 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
243
244 static inline int dccp_listen_start(struct sock *sk, int backlog)
245 {
246         struct dccp_sock *dp = dccp_sk(sk);
247
248         dp->dccps_role = DCCP_ROLE_LISTEN;
249         /* do not start to listen if feature negotiation setup fails */
250         if (dccp_feat_finalise_settings(dp))
251                 return -EPROTO;
252         return inet_csk_listen_start(sk, backlog);
253 }
254
255 static inline int dccp_need_reset(int state)
256 {
257         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
258                state != DCCP_REQUESTING;
259 }
260
261 int dccp_disconnect(struct sock *sk, int flags)
262 {
263         struct inet_connection_sock *icsk = inet_csk(sk);
264         struct inet_sock *inet = inet_sk(sk);
265         struct dccp_sock *dp = dccp_sk(sk);
266         int err = 0;
267         const int old_state = sk->sk_state;
268
269         if (old_state != DCCP_CLOSED)
270                 dccp_set_state(sk, DCCP_CLOSED);
271
272         /*
273          * This corresponds to the ABORT function of RFC793, sec. 3.8
274          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
275          */
276         if (old_state == DCCP_LISTEN) {
277                 inet_csk_listen_stop(sk);
278         } else if (dccp_need_reset(old_state)) {
279                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
280                 sk->sk_err = ECONNRESET;
281         } else if (old_state == DCCP_REQUESTING)
282                 sk->sk_err = ECONNRESET;
283
284         dccp_clear_xmit_timers(sk);
285         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
286         dp->dccps_hc_rx_ccid = NULL;
287
288         __skb_queue_purge(&sk->sk_receive_queue);
289         __skb_queue_purge(&sk->sk_write_queue);
290         if (sk->sk_send_head != NULL) {
291                 __kfree_skb(sk->sk_send_head);
292                 sk->sk_send_head = NULL;
293         }
294
295         inet->inet_dport = 0;
296
297         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
298                 inet_reset_saddr(sk);
299
300         sk->sk_shutdown = 0;
301         sock_reset_flag(sk, SOCK_DONE);
302
303         icsk->icsk_backoff = 0;
304         inet_csk_delack_init(sk);
305         __sk_dst_reset(sk);
306
307         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
308
309         sk->sk_error_report(sk);
310         return err;
311 }
312
313 EXPORT_SYMBOL_GPL(dccp_disconnect);
314
315 __poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
316 {
317         __poll_t mask;
318         struct sock *sk = sock->sk;
319
320         if (sk->sk_state == DCCP_LISTEN)
321                 return inet_csk_listen_poll(sk);
322
323         /* Socket is not locked. We are protected from async events
324            by poll logic and correct handling of state changes
325            made by another threads is impossible in any case.
326          */
327
328         mask = 0;
329         if (sk->sk_err)
330                 mask = EPOLLERR;
331
332         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
333                 mask |= EPOLLHUP;
334         if (sk->sk_shutdown & RCV_SHUTDOWN)
335                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
336
337         /* Connected? */
338         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
339                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
340                         mask |= EPOLLIN | EPOLLRDNORM;
341
342                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
343                         if (sk_stream_is_writeable(sk)) {
344                                 mask |= EPOLLOUT | EPOLLWRNORM;
345                         } else {  /* send SIGIO later */
346                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
347                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
348
349                                 /* Race breaker. If space is freed after
350                                  * wspace test but before the flags are set,
351                                  * IO signal will be lost.
352                                  */
353                                 if (sk_stream_is_writeable(sk))
354                                         mask |= EPOLLOUT | EPOLLWRNORM;
355                         }
356                 }
357         }
358         return mask;
359 }
360
361 EXPORT_SYMBOL_GPL(dccp_poll_mask);
362
363 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
364 {
365         int rc = -ENOTCONN;
366
367         lock_sock(sk);
368
369         if (sk->sk_state == DCCP_LISTEN)
370                 goto out;
371
372         switch (cmd) {
373         case SIOCINQ: {
374                 struct sk_buff *skb;
375                 unsigned long amount = 0;
376
377                 skb = skb_peek(&sk->sk_receive_queue);
378                 if (skb != NULL) {
379                         /*
380                          * We will only return the amount of this packet since
381                          * that is all that will be read.
382                          */
383                         amount = skb->len;
384                 }
385                 rc = put_user(amount, (int __user *)arg);
386         }
387                 break;
388         default:
389                 rc = -ENOIOCTLCMD;
390                 break;
391         }
392 out:
393         release_sock(sk);
394         return rc;
395 }
396
397 EXPORT_SYMBOL_GPL(dccp_ioctl);
398
399 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
400                                    char __user *optval, unsigned int optlen)
401 {
402         struct dccp_sock *dp = dccp_sk(sk);
403         struct dccp_service_list *sl = NULL;
404
405         if (service == DCCP_SERVICE_INVALID_VALUE ||
406             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
407                 return -EINVAL;
408
409         if (optlen > sizeof(service)) {
410                 sl = kmalloc(optlen, GFP_KERNEL);
411                 if (sl == NULL)
412                         return -ENOMEM;
413
414                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
415                 if (copy_from_user(sl->dccpsl_list,
416                                    optval + sizeof(service),
417                                    optlen - sizeof(service)) ||
418                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
419                         kfree(sl);
420                         return -EFAULT;
421                 }
422         }
423
424         lock_sock(sk);
425         dp->dccps_service = service;
426
427         kfree(dp->dccps_service_list);
428
429         dp->dccps_service_list = sl;
430         release_sock(sk);
431         return 0;
432 }
433
434 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
435 {
436         u8 *list, len;
437         int i, rc;
438
439         if (cscov < 0 || cscov > 15)
440                 return -EINVAL;
441         /*
442          * Populate a list of permissible values, in the range cscov...15. This
443          * is necessary since feature negotiation of single values only works if
444          * both sides incidentally choose the same value. Since the list starts
445          * lowest-value first, negotiation will pick the smallest shared value.
446          */
447         if (cscov == 0)
448                 return 0;
449         len = 16 - cscov;
450
451         list = kmalloc(len, GFP_KERNEL);
452         if (list == NULL)
453                 return -ENOBUFS;
454
455         for (i = 0; i < len; i++)
456                 list[i] = cscov++;
457
458         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
459
460         if (rc == 0) {
461                 if (rx)
462                         dccp_sk(sk)->dccps_pcrlen = cscov;
463                 else
464                         dccp_sk(sk)->dccps_pcslen = cscov;
465         }
466         kfree(list);
467         return rc;
468 }
469
470 static int dccp_setsockopt_ccid(struct sock *sk, int type,
471                                 char __user *optval, unsigned int optlen)
472 {
473         u8 *val;
474         int rc = 0;
475
476         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
477                 return -EINVAL;
478
479         val = memdup_user(optval, optlen);
480         if (IS_ERR(val))
481                 return PTR_ERR(val);
482
483         lock_sock(sk);
484         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
485                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
486
487         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
488                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
489         release_sock(sk);
490
491         kfree(val);
492         return rc;
493 }
494
495 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
496                 char __user *optval, unsigned int optlen)
497 {
498         struct dccp_sock *dp = dccp_sk(sk);
499         int val, err = 0;
500
501         switch (optname) {
502         case DCCP_SOCKOPT_PACKET_SIZE:
503                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
504                 return 0;
505         case DCCP_SOCKOPT_CHANGE_L:
506         case DCCP_SOCKOPT_CHANGE_R:
507                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
508                 return 0;
509         case DCCP_SOCKOPT_CCID:
510         case DCCP_SOCKOPT_RX_CCID:
511         case DCCP_SOCKOPT_TX_CCID:
512                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
513         }
514
515         if (optlen < (int)sizeof(int))
516                 return -EINVAL;
517
518         if (get_user(val, (int __user *)optval))
519                 return -EFAULT;
520
521         if (optname == DCCP_SOCKOPT_SERVICE)
522                 return dccp_setsockopt_service(sk, val, optval, optlen);
523
524         lock_sock(sk);
525         switch (optname) {
526         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
527                 if (dp->dccps_role != DCCP_ROLE_SERVER)
528                         err = -EOPNOTSUPP;
529                 else
530                         dp->dccps_server_timewait = (val != 0);
531                 break;
532         case DCCP_SOCKOPT_SEND_CSCOV:
533                 err = dccp_setsockopt_cscov(sk, val, false);
534                 break;
535         case DCCP_SOCKOPT_RECV_CSCOV:
536                 err = dccp_setsockopt_cscov(sk, val, true);
537                 break;
538         case DCCP_SOCKOPT_QPOLICY_ID:
539                 if (sk->sk_state != DCCP_CLOSED)
540                         err = -EISCONN;
541                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
542                         err = -EINVAL;
543                 else
544                         dp->dccps_qpolicy = val;
545                 break;
546         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
547                 if (val < 0)
548                         err = -EINVAL;
549                 else
550                         dp->dccps_tx_qlen = val;
551                 break;
552         default:
553                 err = -ENOPROTOOPT;
554                 break;
555         }
556         release_sock(sk);
557
558         return err;
559 }
560
561 int dccp_setsockopt(struct sock *sk, int level, int optname,
562                     char __user *optval, unsigned int optlen)
563 {
564         if (level != SOL_DCCP)
565                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
566                                                              optname, optval,
567                                                              optlen);
568         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
569 }
570
571 EXPORT_SYMBOL_GPL(dccp_setsockopt);
572
573 #ifdef CONFIG_COMPAT
574 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
575                            char __user *optval, unsigned int optlen)
576 {
577         if (level != SOL_DCCP)
578                 return inet_csk_compat_setsockopt(sk, level, optname,
579                                                   optval, optlen);
580         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
581 }
582
583 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
584 #endif
585
586 static int dccp_getsockopt_service(struct sock *sk, int len,
587                                    __be32 __user *optval,
588                                    int __user *optlen)
589 {
590         const struct dccp_sock *dp = dccp_sk(sk);
591         const struct dccp_service_list *sl;
592         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
593
594         lock_sock(sk);
595         if ((sl = dp->dccps_service_list) != NULL) {
596                 slen = sl->dccpsl_nr * sizeof(u32);
597                 total_len += slen;
598         }
599
600         err = -EINVAL;
601         if (total_len > len)
602                 goto out;
603
604         err = 0;
605         if (put_user(total_len, optlen) ||
606             put_user(dp->dccps_service, optval) ||
607             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
608                 err = -EFAULT;
609 out:
610         release_sock(sk);
611         return err;
612 }
613
614 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
615                     char __user *optval, int __user *optlen)
616 {
617         struct dccp_sock *dp;
618         int val, len;
619
620         if (get_user(len, optlen))
621                 return -EFAULT;
622
623         if (len < (int)sizeof(int))
624                 return -EINVAL;
625
626         dp = dccp_sk(sk);
627
628         switch (optname) {
629         case DCCP_SOCKOPT_PACKET_SIZE:
630                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
631                 return 0;
632         case DCCP_SOCKOPT_SERVICE:
633                 return dccp_getsockopt_service(sk, len,
634                                                (__be32 __user *)optval, optlen);
635         case DCCP_SOCKOPT_GET_CUR_MPS:
636                 val = dp->dccps_mss_cache;
637                 break;
638         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
639                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
640         case DCCP_SOCKOPT_TX_CCID:
641                 val = ccid_get_current_tx_ccid(dp);
642                 if (val < 0)
643                         return -ENOPROTOOPT;
644                 break;
645         case DCCP_SOCKOPT_RX_CCID:
646                 val = ccid_get_current_rx_ccid(dp);
647                 if (val < 0)
648                         return -ENOPROTOOPT;
649                 break;
650         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
651                 val = dp->dccps_server_timewait;
652                 break;
653         case DCCP_SOCKOPT_SEND_CSCOV:
654                 val = dp->dccps_pcslen;
655                 break;
656         case DCCP_SOCKOPT_RECV_CSCOV:
657                 val = dp->dccps_pcrlen;
658                 break;
659         case DCCP_SOCKOPT_QPOLICY_ID:
660                 val = dp->dccps_qpolicy;
661                 break;
662         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
663                 val = dp->dccps_tx_qlen;
664                 break;
665         case 128 ... 191:
666                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
667                                              len, (u32 __user *)optval, optlen);
668         case 192 ... 255:
669                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
670                                              len, (u32 __user *)optval, optlen);
671         default:
672                 return -ENOPROTOOPT;
673         }
674
675         len = sizeof(val);
676         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
677                 return -EFAULT;
678
679         return 0;
680 }
681
682 int dccp_getsockopt(struct sock *sk, int level, int optname,
683                     char __user *optval, int __user *optlen)
684 {
685         if (level != SOL_DCCP)
686                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
687                                                              optname, optval,
688                                                              optlen);
689         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 }
691
692 EXPORT_SYMBOL_GPL(dccp_getsockopt);
693
694 #ifdef CONFIG_COMPAT
695 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
696                            char __user *optval, int __user *optlen)
697 {
698         if (level != SOL_DCCP)
699                 return inet_csk_compat_getsockopt(sk, level, optname,
700                                                   optval, optlen);
701         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
702 }
703
704 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
705 #endif
706
707 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
708 {
709         struct cmsghdr *cmsg;
710
711         /*
712          * Assign an (opaque) qpolicy priority value to skb->priority.
713          *
714          * We are overloading this skb field for use with the qpolicy subystem.
715          * The skb->priority is normally used for the SO_PRIORITY option, which
716          * is initialised from sk_priority. Since the assignment of sk_priority
717          * to skb->priority happens later (on layer 3), we overload this field
718          * for use with queueing priorities as long as the skb is on layer 4.
719          * The default priority value (if nothing is set) is 0.
720          */
721         skb->priority = 0;
722
723         for_each_cmsghdr(cmsg, msg) {
724                 if (!CMSG_OK(msg, cmsg))
725                         return -EINVAL;
726
727                 if (cmsg->cmsg_level != SOL_DCCP)
728                         continue;
729
730                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
731                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
732                         return -EINVAL;
733
734                 switch (cmsg->cmsg_type) {
735                 case DCCP_SCM_PRIORITY:
736                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
737                                 return -EINVAL;
738                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
739                         break;
740                 default:
741                         return -EINVAL;
742                 }
743         }
744         return 0;
745 }
746
747 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
748 {
749         const struct dccp_sock *dp = dccp_sk(sk);
750         const int flags = msg->msg_flags;
751         const int noblock = flags & MSG_DONTWAIT;
752         struct sk_buff *skb;
753         int rc, size;
754         long timeo;
755
756         trace_dccp_probe(sk, len);
757
758         if (len > dp->dccps_mss_cache)
759                 return -EMSGSIZE;
760
761         lock_sock(sk);
762
763         if (dccp_qpolicy_full(sk)) {
764                 rc = -EAGAIN;
765                 goto out_release;
766         }
767
768         timeo = sock_sndtimeo(sk, noblock);
769
770         /*
771          * We have to use sk_stream_wait_connect here to set sk_write_pending,
772          * so that the trick in dccp_rcv_request_sent_state_process.
773          */
774         /* Wait for a connection to finish. */
775         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
776                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
777                         goto out_release;
778
779         size = sk->sk_prot->max_header + len;
780         release_sock(sk);
781         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
782         lock_sock(sk);
783         if (skb == NULL)
784                 goto out_release;
785
786         if (sk->sk_state == DCCP_CLOSED) {
787                 rc = -ENOTCONN;
788                 goto out_discard;
789         }
790
791         skb_reserve(skb, sk->sk_prot->max_header);
792         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
793         if (rc != 0)
794                 goto out_discard;
795
796         rc = dccp_msghdr_parse(msg, skb);
797         if (rc != 0)
798                 goto out_discard;
799
800         dccp_qpolicy_push(sk, skb);
801         /*
802          * The xmit_timer is set if the TX CCID is rate-based and will expire
803          * when congestion control permits to release further packets into the
804          * network. Window-based CCIDs do not use this timer.
805          */
806         if (!timer_pending(&dp->dccps_xmit_timer))
807                 dccp_write_xmit(sk);
808 out_release:
809         release_sock(sk);
810         return rc ? : len;
811 out_discard:
812         kfree_skb(skb);
813         goto out_release;
814 }
815
816 EXPORT_SYMBOL_GPL(dccp_sendmsg);
817
818 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
819                  int flags, int *addr_len)
820 {
821         const struct dccp_hdr *dh;
822         long timeo;
823
824         lock_sock(sk);
825
826         if (sk->sk_state == DCCP_LISTEN) {
827                 len = -ENOTCONN;
828                 goto out;
829         }
830
831         timeo = sock_rcvtimeo(sk, nonblock);
832
833         do {
834                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
835
836                 if (skb == NULL)
837                         goto verify_sock_status;
838
839                 dh = dccp_hdr(skb);
840
841                 switch (dh->dccph_type) {
842                 case DCCP_PKT_DATA:
843                 case DCCP_PKT_DATAACK:
844                         goto found_ok_skb;
845
846                 case DCCP_PKT_CLOSE:
847                 case DCCP_PKT_CLOSEREQ:
848                         if (!(flags & MSG_PEEK))
849                                 dccp_finish_passive_close(sk);
850                         /* fall through */
851                 case DCCP_PKT_RESET:
852                         dccp_pr_debug("found fin (%s) ok!\n",
853                                       dccp_packet_name(dh->dccph_type));
854                         len = 0;
855                         goto found_fin_ok;
856                 default:
857                         dccp_pr_debug("packet_type=%s\n",
858                                       dccp_packet_name(dh->dccph_type));
859                         sk_eat_skb(sk, skb);
860                 }
861 verify_sock_status:
862                 if (sock_flag(sk, SOCK_DONE)) {
863                         len = 0;
864                         break;
865                 }
866
867                 if (sk->sk_err) {
868                         len = sock_error(sk);
869                         break;
870                 }
871
872                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
873                         len = 0;
874                         break;
875                 }
876
877                 if (sk->sk_state == DCCP_CLOSED) {
878                         if (!sock_flag(sk, SOCK_DONE)) {
879                                 /* This occurs when user tries to read
880                                  * from never connected socket.
881                                  */
882                                 len = -ENOTCONN;
883                                 break;
884                         }
885                         len = 0;
886                         break;
887                 }
888
889                 if (!timeo) {
890                         len = -EAGAIN;
891                         break;
892                 }
893
894                 if (signal_pending(current)) {
895                         len = sock_intr_errno(timeo);
896                         break;
897                 }
898
899                 sk_wait_data(sk, &timeo, NULL);
900                 continue;
901         found_ok_skb:
902                 if (len > skb->len)
903                         len = skb->len;
904                 else if (len < skb->len)
905                         msg->msg_flags |= MSG_TRUNC;
906
907                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
908                         /* Exception. Bailout! */
909                         len = -EFAULT;
910                         break;
911                 }
912                 if (flags & MSG_TRUNC)
913                         len = skb->len;
914         found_fin_ok:
915                 if (!(flags & MSG_PEEK))
916                         sk_eat_skb(sk, skb);
917                 break;
918         } while (1);
919 out:
920         release_sock(sk);
921         return len;
922 }
923
924 EXPORT_SYMBOL_GPL(dccp_recvmsg);
925
926 int inet_dccp_listen(struct socket *sock, int backlog)
927 {
928         struct sock *sk = sock->sk;
929         unsigned char old_state;
930         int err;
931
932         lock_sock(sk);
933
934         err = -EINVAL;
935         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
936                 goto out;
937
938         old_state = sk->sk_state;
939         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
940                 goto out;
941
942         /* Really, if the socket is already in listen state
943          * we can only allow the backlog to be adjusted.
944          */
945         if (old_state != DCCP_LISTEN) {
946                 /*
947                  * FIXME: here it probably should be sk->sk_prot->listen_start
948                  * see tcp_listen_start
949                  */
950                 err = dccp_listen_start(sk, backlog);
951                 if (err)
952                         goto out;
953         }
954         sk->sk_max_ack_backlog = backlog;
955         err = 0;
956
957 out:
958         release_sock(sk);
959         return err;
960 }
961
962 EXPORT_SYMBOL_GPL(inet_dccp_listen);
963
964 static void dccp_terminate_connection(struct sock *sk)
965 {
966         u8 next_state = DCCP_CLOSED;
967
968         switch (sk->sk_state) {
969         case DCCP_PASSIVE_CLOSE:
970         case DCCP_PASSIVE_CLOSEREQ:
971                 dccp_finish_passive_close(sk);
972                 break;
973         case DCCP_PARTOPEN:
974                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
975                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
976                 /* fall through */
977         case DCCP_OPEN:
978                 dccp_send_close(sk, 1);
979
980                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
981                     !dccp_sk(sk)->dccps_server_timewait)
982                         next_state = DCCP_ACTIVE_CLOSEREQ;
983                 else
984                         next_state = DCCP_CLOSING;
985                 /* fall through */
986         default:
987                 dccp_set_state(sk, next_state);
988         }
989 }
990
991 void dccp_close(struct sock *sk, long timeout)
992 {
993         struct dccp_sock *dp = dccp_sk(sk);
994         struct sk_buff *skb;
995         u32 data_was_unread = 0;
996         int state;
997
998         lock_sock(sk);
999
1000         sk->sk_shutdown = SHUTDOWN_MASK;
1001
1002         if (sk->sk_state == DCCP_LISTEN) {
1003                 dccp_set_state(sk, DCCP_CLOSED);
1004
1005                 /* Special case. */
1006                 inet_csk_listen_stop(sk);
1007
1008                 goto adjudge_to_death;
1009         }
1010
1011         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1012
1013         /*
1014          * We need to flush the recv. buffs.  We do this only on the
1015          * descriptor close, not protocol-sourced closes, because the
1016           *reader process may not have drained the data yet!
1017          */
1018         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1019                 data_was_unread += skb->len;
1020                 __kfree_skb(skb);
1021         }
1022
1023         /* If socket has been already reset kill it. */
1024         if (sk->sk_state == DCCP_CLOSED)
1025                 goto adjudge_to_death;
1026
1027         if (data_was_unread) {
1028                 /* Unread data was tossed, send an appropriate Reset Code */
1029                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1030                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1031                 dccp_set_state(sk, DCCP_CLOSED);
1032         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1033                 /* Check zero linger _after_ checking for unread data. */
1034                 sk->sk_prot->disconnect(sk, 0);
1035         } else if (sk->sk_state != DCCP_CLOSED) {
1036                 /*
1037                  * Normal connection termination. May need to wait if there are
1038                  * still packets in the TX queue that are delayed by the CCID.
1039                  */
1040                 dccp_flush_write_queue(sk, &timeout);
1041                 dccp_terminate_connection(sk);
1042         }
1043
1044         /*
1045          * Flush write queue. This may be necessary in several cases:
1046          * - we have been closed by the peer but still have application data;
1047          * - abortive termination (unread data or zero linger time),
1048          * - normal termination but queue could not be flushed within time limit
1049          */
1050         __skb_queue_purge(&sk->sk_write_queue);
1051
1052         sk_stream_wait_close(sk, timeout);
1053
1054 adjudge_to_death:
1055         state = sk->sk_state;
1056         sock_hold(sk);
1057         sock_orphan(sk);
1058
1059         /*
1060          * It is the last release_sock in its life. It will remove backlog.
1061          */
1062         release_sock(sk);
1063         /*
1064          * Now socket is owned by kernel and we acquire BH lock
1065          * to finish close. No need to check for user refs.
1066          */
1067         local_bh_disable();
1068         bh_lock_sock(sk);
1069         WARN_ON(sock_owned_by_user(sk));
1070
1071         percpu_counter_inc(sk->sk_prot->orphan_count);
1072
1073         /* Have we already been destroyed by a softirq or backlog? */
1074         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1075                 goto out;
1076
1077         if (sk->sk_state == DCCP_CLOSED)
1078                 inet_csk_destroy_sock(sk);
1079
1080         /* Otherwise, socket is reprieved until protocol close. */
1081
1082 out:
1083         bh_unlock_sock(sk);
1084         local_bh_enable();
1085         sock_put(sk);
1086 }
1087
1088 EXPORT_SYMBOL_GPL(dccp_close);
1089
1090 void dccp_shutdown(struct sock *sk, int how)
1091 {
1092         dccp_pr_debug("called shutdown(%x)\n", how);
1093 }
1094
1095 EXPORT_SYMBOL_GPL(dccp_shutdown);
1096
1097 static inline int __init dccp_mib_init(void)
1098 {
1099         dccp_statistics = alloc_percpu(struct dccp_mib);
1100         if (!dccp_statistics)
1101                 return -ENOMEM;
1102         return 0;
1103 }
1104
1105 static inline void dccp_mib_exit(void)
1106 {
1107         free_percpu(dccp_statistics);
1108 }
1109
1110 static int thash_entries;
1111 module_param(thash_entries, int, 0444);
1112 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1113
1114 #ifdef CONFIG_IP_DCCP_DEBUG
1115 bool dccp_debug;
1116 module_param(dccp_debug, bool, 0644);
1117 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1118
1119 EXPORT_SYMBOL_GPL(dccp_debug);
1120 #endif
1121
1122 static int __init dccp_init(void)
1123 {
1124         unsigned long goal;
1125         int ehash_order, bhash_order, i;
1126         int rc;
1127
1128         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1129                      FIELD_SIZEOF(struct sk_buff, cb));
1130         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1131         if (rc)
1132                 goto out_fail;
1133         rc = -ENOBUFS;
1134         inet_hashinfo_init(&dccp_hashinfo);
1135         dccp_hashinfo.bind_bucket_cachep =
1136                 kmem_cache_create("dccp_bind_bucket",
1137                                   sizeof(struct inet_bind_bucket), 0,
1138                                   SLAB_HWCACHE_ALIGN, NULL);
1139         if (!dccp_hashinfo.bind_bucket_cachep)
1140                 goto out_free_percpu;
1141
1142         /*
1143          * Size and allocate the main established and bind bucket
1144          * hash tables.
1145          *
1146          * The methodology is similar to that of the buffer cache.
1147          */
1148         if (totalram_pages >= (128 * 1024))
1149                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1150         else
1151                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1152
1153         if (thash_entries)
1154                 goal = (thash_entries *
1155                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1156         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1157                 ;
1158         do {
1159                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1160                                         sizeof(struct inet_ehash_bucket);
1161
1162                 while (hash_size & (hash_size - 1))
1163                         hash_size--;
1164                 dccp_hashinfo.ehash_mask = hash_size - 1;
1165                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1166                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1167         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1168
1169         if (!dccp_hashinfo.ehash) {
1170                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1171                 goto out_free_bind_bucket_cachep;
1172         }
1173
1174         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1175                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1176
1177         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1178                         goto out_free_dccp_ehash;
1179
1180         bhash_order = ehash_order;
1181
1182         do {
1183                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1184                                         sizeof(struct inet_bind_hashbucket);
1185                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1186                     bhash_order > 0)
1187                         continue;
1188                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1189                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1190         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1191
1192         if (!dccp_hashinfo.bhash) {
1193                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1194                 goto out_free_dccp_locks;
1195         }
1196
1197         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1198                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1199                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1200         }
1201
1202         rc = dccp_mib_init();
1203         if (rc)
1204                 goto out_free_dccp_bhash;
1205
1206         rc = dccp_ackvec_init();
1207         if (rc)
1208                 goto out_free_dccp_mib;
1209
1210         rc = dccp_sysctl_init();
1211         if (rc)
1212                 goto out_ackvec_exit;
1213
1214         rc = ccid_initialize_builtins();
1215         if (rc)
1216                 goto out_sysctl_exit;
1217
1218         dccp_timestamping_init();
1219
1220         return 0;
1221
1222 out_sysctl_exit:
1223         dccp_sysctl_exit();
1224 out_ackvec_exit:
1225         dccp_ackvec_exit();
1226 out_free_dccp_mib:
1227         dccp_mib_exit();
1228 out_free_dccp_bhash:
1229         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1230 out_free_dccp_locks:
1231         inet_ehash_locks_free(&dccp_hashinfo);
1232 out_free_dccp_ehash:
1233         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1234 out_free_bind_bucket_cachep:
1235         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1236 out_free_percpu:
1237         percpu_counter_destroy(&dccp_orphan_count);
1238 out_fail:
1239         dccp_hashinfo.bhash = NULL;
1240         dccp_hashinfo.ehash = NULL;
1241         dccp_hashinfo.bind_bucket_cachep = NULL;
1242         return rc;
1243 }
1244
1245 static void __exit dccp_fini(void)
1246 {
1247         ccid_cleanup_builtins();
1248         dccp_mib_exit();
1249         free_pages((unsigned long)dccp_hashinfo.bhash,
1250                    get_order(dccp_hashinfo.bhash_size *
1251                              sizeof(struct inet_bind_hashbucket)));
1252         free_pages((unsigned long)dccp_hashinfo.ehash,
1253                    get_order((dccp_hashinfo.ehash_mask + 1) *
1254                              sizeof(struct inet_ehash_bucket)));
1255         inet_ehash_locks_free(&dccp_hashinfo);
1256         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1257         dccp_ackvec_exit();
1258         dccp_sysctl_exit();
1259         percpu_counter_destroy(&dccp_orphan_count);
1260 }
1261
1262 module_init(dccp_init);
1263 module_exit(dccp_fini);
1264
1265 MODULE_LICENSE("GPL");
1266 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1267 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");