Merge tag 'mvebu-fixes-4.17-1' of git://git.infradead.org/linux-mvebu into fixes
[linux-2.6-microblaze.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 #define CREATE_TRACE_POINTS
42 #include "trace.h"
43
44 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
45
46 EXPORT_SYMBOL_GPL(dccp_statistics);
47
48 struct percpu_counter dccp_orphan_count;
49 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50
51 struct inet_hashinfo dccp_hashinfo;
52 EXPORT_SYMBOL_GPL(dccp_hashinfo);
53
54 /* the maximum queue length for tx in packets. 0 is no limit */
55 int sysctl_dccp_tx_qlen __read_mostly = 5;
56
57 #ifdef CONFIG_IP_DCCP_DEBUG
58 static const char *dccp_state_name(const int state)
59 {
60         static const char *const dccp_state_names[] = {
61         [DCCP_OPEN]             = "OPEN",
62         [DCCP_REQUESTING]       = "REQUESTING",
63         [DCCP_PARTOPEN]         = "PARTOPEN",
64         [DCCP_LISTEN]           = "LISTEN",
65         [DCCP_RESPOND]          = "RESPOND",
66         [DCCP_CLOSING]          = "CLOSING",
67         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
68         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
69         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
70         [DCCP_TIME_WAIT]        = "TIME_WAIT",
71         [DCCP_CLOSED]           = "CLOSED",
72         };
73
74         if (state >= DCCP_MAX_STATES)
75                 return "INVALID STATE!";
76         else
77                 return dccp_state_names[state];
78 }
79 #endif
80
81 void dccp_set_state(struct sock *sk, const int state)
82 {
83         const int oldstate = sk->sk_state;
84
85         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
86                       dccp_state_name(oldstate), dccp_state_name(state));
87         WARN_ON(state == oldstate);
88
89         switch (state) {
90         case DCCP_OPEN:
91                 if (oldstate != DCCP_OPEN)
92                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
93                 /* Client retransmits all Confirm options until entering OPEN */
94                 if (oldstate == DCCP_PARTOPEN)
95                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
96                 break;
97
98         case DCCP_CLOSED:
99                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
100                     oldstate == DCCP_CLOSING)
101                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
102
103                 sk->sk_prot->unhash(sk);
104                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
105                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
106                         inet_put_port(sk);
107                 /* fall through */
108         default:
109                 if (oldstate == DCCP_OPEN)
110                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
111         }
112
113         /* Change state AFTER socket is unhashed to avoid closed
114          * socket sitting in hash tables.
115          */
116         inet_sk_set_state(sk, state);
117 }
118
119 EXPORT_SYMBOL_GPL(dccp_set_state);
120
121 static void dccp_finish_passive_close(struct sock *sk)
122 {
123         switch (sk->sk_state) {
124         case DCCP_PASSIVE_CLOSE:
125                 /* Node (client or server) has received Close packet. */
126                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
127                 dccp_set_state(sk, DCCP_CLOSED);
128                 break;
129         case DCCP_PASSIVE_CLOSEREQ:
130                 /*
131                  * Client received CloseReq. We set the `active' flag so that
132                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
133                  */
134                 dccp_send_close(sk, 1);
135                 dccp_set_state(sk, DCCP_CLOSING);
136         }
137 }
138
139 void dccp_done(struct sock *sk)
140 {
141         dccp_set_state(sk, DCCP_CLOSED);
142         dccp_clear_xmit_timers(sk);
143
144         sk->sk_shutdown = SHUTDOWN_MASK;
145
146         if (!sock_flag(sk, SOCK_DEAD))
147                 sk->sk_state_change(sk);
148         else
149                 inet_csk_destroy_sock(sk);
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_done);
153
154 const char *dccp_packet_name(const int type)
155 {
156         static const char *const dccp_packet_names[] = {
157                 [DCCP_PKT_REQUEST]  = "REQUEST",
158                 [DCCP_PKT_RESPONSE] = "RESPONSE",
159                 [DCCP_PKT_DATA]     = "DATA",
160                 [DCCP_PKT_ACK]      = "ACK",
161                 [DCCP_PKT_DATAACK]  = "DATAACK",
162                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
163                 [DCCP_PKT_CLOSE]    = "CLOSE",
164                 [DCCP_PKT_RESET]    = "RESET",
165                 [DCCP_PKT_SYNC]     = "SYNC",
166                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
167         };
168
169         if (type >= DCCP_NR_PKT_TYPES)
170                 return "INVALID";
171         else
172                 return dccp_packet_names[type];
173 }
174
175 EXPORT_SYMBOL_GPL(dccp_packet_name);
176
177 static void dccp_sk_destruct(struct sock *sk)
178 {
179         struct dccp_sock *dp = dccp_sk(sk);
180
181         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
182         dp->dccps_hc_tx_ccid = NULL;
183         inet_sock_destruct(sk);
184 }
185
186 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
187 {
188         struct dccp_sock *dp = dccp_sk(sk);
189         struct inet_connection_sock *icsk = inet_csk(sk);
190
191         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
192         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
193         sk->sk_state            = DCCP_CLOSED;
194         sk->sk_write_space      = dccp_write_space;
195         sk->sk_destruct         = dccp_sk_destruct;
196         icsk->icsk_sync_mss     = dccp_sync_mss;
197         dp->dccps_mss_cache     = 536;
198         dp->dccps_rate_last     = jiffies;
199         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
200         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
201         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
202
203         dccp_init_xmit_timers(sk);
204
205         INIT_LIST_HEAD(&dp->dccps_featneg);
206         /* control socket doesn't need feat nego */
207         if (likely(ctl_sock_initialized))
208                 return dccp_feat_init(sk);
209         return 0;
210 }
211
212 EXPORT_SYMBOL_GPL(dccp_init_sock);
213
214 void dccp_destroy_sock(struct sock *sk)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217
218         __skb_queue_purge(&sk->sk_write_queue);
219         if (sk->sk_send_head != NULL) {
220                 kfree_skb(sk->sk_send_head);
221                 sk->sk_send_head = NULL;
222         }
223
224         /* Clean up a referenced DCCP bind bucket. */
225         if (inet_csk(sk)->icsk_bind_hash != NULL)
226                 inet_put_port(sk);
227
228         kfree(dp->dccps_service_list);
229         dp->dccps_service_list = NULL;
230
231         if (dp->dccps_hc_rx_ackvec != NULL) {
232                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
233                 dp->dccps_hc_rx_ackvec = NULL;
234         }
235         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236         dp->dccps_hc_rx_ccid = NULL;
237
238         /* clean up feature negotiation state */
239         dccp_feat_list_purge(&dp->dccps_featneg);
240 }
241
242 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
243
244 static inline int dccp_listen_start(struct sock *sk, int backlog)
245 {
246         struct dccp_sock *dp = dccp_sk(sk);
247
248         dp->dccps_role = DCCP_ROLE_LISTEN;
249         /* do not start to listen if feature negotiation setup fails */
250         if (dccp_feat_finalise_settings(dp))
251                 return -EPROTO;
252         return inet_csk_listen_start(sk, backlog);
253 }
254
255 static inline int dccp_need_reset(int state)
256 {
257         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
258                state != DCCP_REQUESTING;
259 }
260
261 int dccp_disconnect(struct sock *sk, int flags)
262 {
263         struct inet_connection_sock *icsk = inet_csk(sk);
264         struct inet_sock *inet = inet_sk(sk);
265         struct dccp_sock *dp = dccp_sk(sk);
266         int err = 0;
267         const int old_state = sk->sk_state;
268
269         if (old_state != DCCP_CLOSED)
270                 dccp_set_state(sk, DCCP_CLOSED);
271
272         /*
273          * This corresponds to the ABORT function of RFC793, sec. 3.8
274          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
275          */
276         if (old_state == DCCP_LISTEN) {
277                 inet_csk_listen_stop(sk);
278         } else if (dccp_need_reset(old_state)) {
279                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
280                 sk->sk_err = ECONNRESET;
281         } else if (old_state == DCCP_REQUESTING)
282                 sk->sk_err = ECONNRESET;
283
284         dccp_clear_xmit_timers(sk);
285         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
286         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
287         dp->dccps_hc_rx_ccid = NULL;
288         dp->dccps_hc_tx_ccid = NULL;
289
290         __skb_queue_purge(&sk->sk_receive_queue);
291         __skb_queue_purge(&sk->sk_write_queue);
292         if (sk->sk_send_head != NULL) {
293                 __kfree_skb(sk->sk_send_head);
294                 sk->sk_send_head = NULL;
295         }
296
297         inet->inet_dport = 0;
298
299         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
300                 inet_reset_saddr(sk);
301
302         sk->sk_shutdown = 0;
303         sock_reset_flag(sk, SOCK_DONE);
304
305         icsk->icsk_backoff = 0;
306         inet_csk_delack_init(sk);
307         __sk_dst_reset(sk);
308
309         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
310
311         sk->sk_error_report(sk);
312         return err;
313 }
314
315 EXPORT_SYMBOL_GPL(dccp_disconnect);
316
317 /*
318  *      Wait for a DCCP event.
319  *
320  *      Note that we don't need to lock the socket, as the upper poll layers
321  *      take care of normal races (between the test and the event) and we don't
322  *      go look at any of the socket buffers directly.
323  */
324 __poll_t dccp_poll(struct file *file, struct socket *sock,
325                        poll_table *wait)
326 {
327         __poll_t mask;
328         struct sock *sk = sock->sk;
329
330         sock_poll_wait(file, sk_sleep(sk), wait);
331         if (sk->sk_state == DCCP_LISTEN)
332                 return inet_csk_listen_poll(sk);
333
334         /* Socket is not locked. We are protected from async events
335            by poll logic and correct handling of state changes
336            made by another threads is impossible in any case.
337          */
338
339         mask = 0;
340         if (sk->sk_err)
341                 mask = EPOLLERR;
342
343         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
344                 mask |= EPOLLHUP;
345         if (sk->sk_shutdown & RCV_SHUTDOWN)
346                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
347
348         /* Connected? */
349         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
350                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
351                         mask |= EPOLLIN | EPOLLRDNORM;
352
353                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
354                         if (sk_stream_is_writeable(sk)) {
355                                 mask |= EPOLLOUT | EPOLLWRNORM;
356                         } else {  /* send SIGIO later */
357                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
358                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
359
360                                 /* Race breaker. If space is freed after
361                                  * wspace test but before the flags are set,
362                                  * IO signal will be lost.
363                                  */
364                                 if (sk_stream_is_writeable(sk))
365                                         mask |= EPOLLOUT | EPOLLWRNORM;
366                         }
367                 }
368         }
369         return mask;
370 }
371
372 EXPORT_SYMBOL_GPL(dccp_poll);
373
374 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
375 {
376         int rc = -ENOTCONN;
377
378         lock_sock(sk);
379
380         if (sk->sk_state == DCCP_LISTEN)
381                 goto out;
382
383         switch (cmd) {
384         case SIOCINQ: {
385                 struct sk_buff *skb;
386                 unsigned long amount = 0;
387
388                 skb = skb_peek(&sk->sk_receive_queue);
389                 if (skb != NULL) {
390                         /*
391                          * We will only return the amount of this packet since
392                          * that is all that will be read.
393                          */
394                         amount = skb->len;
395                 }
396                 rc = put_user(amount, (int __user *)arg);
397         }
398                 break;
399         default:
400                 rc = -ENOIOCTLCMD;
401                 break;
402         }
403 out:
404         release_sock(sk);
405         return rc;
406 }
407
408 EXPORT_SYMBOL_GPL(dccp_ioctl);
409
410 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
411                                    char __user *optval, unsigned int optlen)
412 {
413         struct dccp_sock *dp = dccp_sk(sk);
414         struct dccp_service_list *sl = NULL;
415
416         if (service == DCCP_SERVICE_INVALID_VALUE ||
417             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
418                 return -EINVAL;
419
420         if (optlen > sizeof(service)) {
421                 sl = kmalloc(optlen, GFP_KERNEL);
422                 if (sl == NULL)
423                         return -ENOMEM;
424
425                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
426                 if (copy_from_user(sl->dccpsl_list,
427                                    optval + sizeof(service),
428                                    optlen - sizeof(service)) ||
429                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
430                         kfree(sl);
431                         return -EFAULT;
432                 }
433         }
434
435         lock_sock(sk);
436         dp->dccps_service = service;
437
438         kfree(dp->dccps_service_list);
439
440         dp->dccps_service_list = sl;
441         release_sock(sk);
442         return 0;
443 }
444
445 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
446 {
447         u8 *list, len;
448         int i, rc;
449
450         if (cscov < 0 || cscov > 15)
451                 return -EINVAL;
452         /*
453          * Populate a list of permissible values, in the range cscov...15. This
454          * is necessary since feature negotiation of single values only works if
455          * both sides incidentally choose the same value. Since the list starts
456          * lowest-value first, negotiation will pick the smallest shared value.
457          */
458         if (cscov == 0)
459                 return 0;
460         len = 16 - cscov;
461
462         list = kmalloc(len, GFP_KERNEL);
463         if (list == NULL)
464                 return -ENOBUFS;
465
466         for (i = 0; i < len; i++)
467                 list[i] = cscov++;
468
469         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
470
471         if (rc == 0) {
472                 if (rx)
473                         dccp_sk(sk)->dccps_pcrlen = cscov;
474                 else
475                         dccp_sk(sk)->dccps_pcslen = cscov;
476         }
477         kfree(list);
478         return rc;
479 }
480
481 static int dccp_setsockopt_ccid(struct sock *sk, int type,
482                                 char __user *optval, unsigned int optlen)
483 {
484         u8 *val;
485         int rc = 0;
486
487         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
488                 return -EINVAL;
489
490         val = memdup_user(optval, optlen);
491         if (IS_ERR(val))
492                 return PTR_ERR(val);
493
494         lock_sock(sk);
495         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
496                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
497
498         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
499                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
500         release_sock(sk);
501
502         kfree(val);
503         return rc;
504 }
505
506 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
507                 char __user *optval, unsigned int optlen)
508 {
509         struct dccp_sock *dp = dccp_sk(sk);
510         int val, err = 0;
511
512         switch (optname) {
513         case DCCP_SOCKOPT_PACKET_SIZE:
514                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
515                 return 0;
516         case DCCP_SOCKOPT_CHANGE_L:
517         case DCCP_SOCKOPT_CHANGE_R:
518                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
519                 return 0;
520         case DCCP_SOCKOPT_CCID:
521         case DCCP_SOCKOPT_RX_CCID:
522         case DCCP_SOCKOPT_TX_CCID:
523                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
524         }
525
526         if (optlen < (int)sizeof(int))
527                 return -EINVAL;
528
529         if (get_user(val, (int __user *)optval))
530                 return -EFAULT;
531
532         if (optname == DCCP_SOCKOPT_SERVICE)
533                 return dccp_setsockopt_service(sk, val, optval, optlen);
534
535         lock_sock(sk);
536         switch (optname) {
537         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
538                 if (dp->dccps_role != DCCP_ROLE_SERVER)
539                         err = -EOPNOTSUPP;
540                 else
541                         dp->dccps_server_timewait = (val != 0);
542                 break;
543         case DCCP_SOCKOPT_SEND_CSCOV:
544                 err = dccp_setsockopt_cscov(sk, val, false);
545                 break;
546         case DCCP_SOCKOPT_RECV_CSCOV:
547                 err = dccp_setsockopt_cscov(sk, val, true);
548                 break;
549         case DCCP_SOCKOPT_QPOLICY_ID:
550                 if (sk->sk_state != DCCP_CLOSED)
551                         err = -EISCONN;
552                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
553                         err = -EINVAL;
554                 else
555                         dp->dccps_qpolicy = val;
556                 break;
557         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
558                 if (val < 0)
559                         err = -EINVAL;
560                 else
561                         dp->dccps_tx_qlen = val;
562                 break;
563         default:
564                 err = -ENOPROTOOPT;
565                 break;
566         }
567         release_sock(sk);
568
569         return err;
570 }
571
572 int dccp_setsockopt(struct sock *sk, int level, int optname,
573                     char __user *optval, unsigned int optlen)
574 {
575         if (level != SOL_DCCP)
576                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
577                                                              optname, optval,
578                                                              optlen);
579         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580 }
581
582 EXPORT_SYMBOL_GPL(dccp_setsockopt);
583
584 #ifdef CONFIG_COMPAT
585 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
586                            char __user *optval, unsigned int optlen)
587 {
588         if (level != SOL_DCCP)
589                 return inet_csk_compat_setsockopt(sk, level, optname,
590                                                   optval, optlen);
591         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593
594 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
595 #endif
596
597 static int dccp_getsockopt_service(struct sock *sk, int len,
598                                    __be32 __user *optval,
599                                    int __user *optlen)
600 {
601         const struct dccp_sock *dp = dccp_sk(sk);
602         const struct dccp_service_list *sl;
603         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
604
605         lock_sock(sk);
606         if ((sl = dp->dccps_service_list) != NULL) {
607                 slen = sl->dccpsl_nr * sizeof(u32);
608                 total_len += slen;
609         }
610
611         err = -EINVAL;
612         if (total_len > len)
613                 goto out;
614
615         err = 0;
616         if (put_user(total_len, optlen) ||
617             put_user(dp->dccps_service, optval) ||
618             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
619                 err = -EFAULT;
620 out:
621         release_sock(sk);
622         return err;
623 }
624
625 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
626                     char __user *optval, int __user *optlen)
627 {
628         struct dccp_sock *dp;
629         int val, len;
630
631         if (get_user(len, optlen))
632                 return -EFAULT;
633
634         if (len < (int)sizeof(int))
635                 return -EINVAL;
636
637         dp = dccp_sk(sk);
638
639         switch (optname) {
640         case DCCP_SOCKOPT_PACKET_SIZE:
641                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
642                 return 0;
643         case DCCP_SOCKOPT_SERVICE:
644                 return dccp_getsockopt_service(sk, len,
645                                                (__be32 __user *)optval, optlen);
646         case DCCP_SOCKOPT_GET_CUR_MPS:
647                 val = dp->dccps_mss_cache;
648                 break;
649         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
650                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
651         case DCCP_SOCKOPT_TX_CCID:
652                 val = ccid_get_current_tx_ccid(dp);
653                 if (val < 0)
654                         return -ENOPROTOOPT;
655                 break;
656         case DCCP_SOCKOPT_RX_CCID:
657                 val = ccid_get_current_rx_ccid(dp);
658                 if (val < 0)
659                         return -ENOPROTOOPT;
660                 break;
661         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662                 val = dp->dccps_server_timewait;
663                 break;
664         case DCCP_SOCKOPT_SEND_CSCOV:
665                 val = dp->dccps_pcslen;
666                 break;
667         case DCCP_SOCKOPT_RECV_CSCOV:
668                 val = dp->dccps_pcrlen;
669                 break;
670         case DCCP_SOCKOPT_QPOLICY_ID:
671                 val = dp->dccps_qpolicy;
672                 break;
673         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
674                 val = dp->dccps_tx_qlen;
675                 break;
676         case 128 ... 191:
677                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
678                                              len, (u32 __user *)optval, optlen);
679         case 192 ... 255:
680                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
681                                              len, (u32 __user *)optval, optlen);
682         default:
683                 return -ENOPROTOOPT;
684         }
685
686         len = sizeof(val);
687         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
688                 return -EFAULT;
689
690         return 0;
691 }
692
693 int dccp_getsockopt(struct sock *sk, int level, int optname,
694                     char __user *optval, int __user *optlen)
695 {
696         if (level != SOL_DCCP)
697                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
698                                                              optname, optval,
699                                                              optlen);
700         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
701 }
702
703 EXPORT_SYMBOL_GPL(dccp_getsockopt);
704
705 #ifdef CONFIG_COMPAT
706 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
707                            char __user *optval, int __user *optlen)
708 {
709         if (level != SOL_DCCP)
710                 return inet_csk_compat_getsockopt(sk, level, optname,
711                                                   optval, optlen);
712         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
713 }
714
715 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
716 #endif
717
718 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
719 {
720         struct cmsghdr *cmsg;
721
722         /*
723          * Assign an (opaque) qpolicy priority value to skb->priority.
724          *
725          * We are overloading this skb field for use with the qpolicy subystem.
726          * The skb->priority is normally used for the SO_PRIORITY option, which
727          * is initialised from sk_priority. Since the assignment of sk_priority
728          * to skb->priority happens later (on layer 3), we overload this field
729          * for use with queueing priorities as long as the skb is on layer 4.
730          * The default priority value (if nothing is set) is 0.
731          */
732         skb->priority = 0;
733
734         for_each_cmsghdr(cmsg, msg) {
735                 if (!CMSG_OK(msg, cmsg))
736                         return -EINVAL;
737
738                 if (cmsg->cmsg_level != SOL_DCCP)
739                         continue;
740
741                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
742                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
743                         return -EINVAL;
744
745                 switch (cmsg->cmsg_type) {
746                 case DCCP_SCM_PRIORITY:
747                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
748                                 return -EINVAL;
749                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
750                         break;
751                 default:
752                         return -EINVAL;
753                 }
754         }
755         return 0;
756 }
757
758 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
759 {
760         const struct dccp_sock *dp = dccp_sk(sk);
761         const int flags = msg->msg_flags;
762         const int noblock = flags & MSG_DONTWAIT;
763         struct sk_buff *skb;
764         int rc, size;
765         long timeo;
766
767         trace_dccp_probe(sk, len);
768
769         if (len > dp->dccps_mss_cache)
770                 return -EMSGSIZE;
771
772         lock_sock(sk);
773
774         if (dccp_qpolicy_full(sk)) {
775                 rc = -EAGAIN;
776                 goto out_release;
777         }
778
779         timeo = sock_sndtimeo(sk, noblock);
780
781         /*
782          * We have to use sk_stream_wait_connect here to set sk_write_pending,
783          * so that the trick in dccp_rcv_request_sent_state_process.
784          */
785         /* Wait for a connection to finish. */
786         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
787                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
788                         goto out_release;
789
790         size = sk->sk_prot->max_header + len;
791         release_sock(sk);
792         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
793         lock_sock(sk);
794         if (skb == NULL)
795                 goto out_release;
796
797         if (sk->sk_state == DCCP_CLOSED) {
798                 rc = -ENOTCONN;
799                 goto out_discard;
800         }
801
802         skb_reserve(skb, sk->sk_prot->max_header);
803         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
804         if (rc != 0)
805                 goto out_discard;
806
807         rc = dccp_msghdr_parse(msg, skb);
808         if (rc != 0)
809                 goto out_discard;
810
811         dccp_qpolicy_push(sk, skb);
812         /*
813          * The xmit_timer is set if the TX CCID is rate-based and will expire
814          * when congestion control permits to release further packets into the
815          * network. Window-based CCIDs do not use this timer.
816          */
817         if (!timer_pending(&dp->dccps_xmit_timer))
818                 dccp_write_xmit(sk);
819 out_release:
820         release_sock(sk);
821         return rc ? : len;
822 out_discard:
823         kfree_skb(skb);
824         goto out_release;
825 }
826
827 EXPORT_SYMBOL_GPL(dccp_sendmsg);
828
829 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
830                  int flags, int *addr_len)
831 {
832         const struct dccp_hdr *dh;
833         long timeo;
834
835         lock_sock(sk);
836
837         if (sk->sk_state == DCCP_LISTEN) {
838                 len = -ENOTCONN;
839                 goto out;
840         }
841
842         timeo = sock_rcvtimeo(sk, nonblock);
843
844         do {
845                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
846
847                 if (skb == NULL)
848                         goto verify_sock_status;
849
850                 dh = dccp_hdr(skb);
851
852                 switch (dh->dccph_type) {
853                 case DCCP_PKT_DATA:
854                 case DCCP_PKT_DATAACK:
855                         goto found_ok_skb;
856
857                 case DCCP_PKT_CLOSE:
858                 case DCCP_PKT_CLOSEREQ:
859                         if (!(flags & MSG_PEEK))
860                                 dccp_finish_passive_close(sk);
861                         /* fall through */
862                 case DCCP_PKT_RESET:
863                         dccp_pr_debug("found fin (%s) ok!\n",
864                                       dccp_packet_name(dh->dccph_type));
865                         len = 0;
866                         goto found_fin_ok;
867                 default:
868                         dccp_pr_debug("packet_type=%s\n",
869                                       dccp_packet_name(dh->dccph_type));
870                         sk_eat_skb(sk, skb);
871                 }
872 verify_sock_status:
873                 if (sock_flag(sk, SOCK_DONE)) {
874                         len = 0;
875                         break;
876                 }
877
878                 if (sk->sk_err) {
879                         len = sock_error(sk);
880                         break;
881                 }
882
883                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
884                         len = 0;
885                         break;
886                 }
887
888                 if (sk->sk_state == DCCP_CLOSED) {
889                         if (!sock_flag(sk, SOCK_DONE)) {
890                                 /* This occurs when user tries to read
891                                  * from never connected socket.
892                                  */
893                                 len = -ENOTCONN;
894                                 break;
895                         }
896                         len = 0;
897                         break;
898                 }
899
900                 if (!timeo) {
901                         len = -EAGAIN;
902                         break;
903                 }
904
905                 if (signal_pending(current)) {
906                         len = sock_intr_errno(timeo);
907                         break;
908                 }
909
910                 sk_wait_data(sk, &timeo, NULL);
911                 continue;
912         found_ok_skb:
913                 if (len > skb->len)
914                         len = skb->len;
915                 else if (len < skb->len)
916                         msg->msg_flags |= MSG_TRUNC;
917
918                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
919                         /* Exception. Bailout! */
920                         len = -EFAULT;
921                         break;
922                 }
923                 if (flags & MSG_TRUNC)
924                         len = skb->len;
925         found_fin_ok:
926                 if (!(flags & MSG_PEEK))
927                         sk_eat_skb(sk, skb);
928                 break;
929         } while (1);
930 out:
931         release_sock(sk);
932         return len;
933 }
934
935 EXPORT_SYMBOL_GPL(dccp_recvmsg);
936
937 int inet_dccp_listen(struct socket *sock, int backlog)
938 {
939         struct sock *sk = sock->sk;
940         unsigned char old_state;
941         int err;
942
943         lock_sock(sk);
944
945         err = -EINVAL;
946         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
947                 goto out;
948
949         old_state = sk->sk_state;
950         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
951                 goto out;
952
953         /* Really, if the socket is already in listen state
954          * we can only allow the backlog to be adjusted.
955          */
956         if (old_state != DCCP_LISTEN) {
957                 /*
958                  * FIXME: here it probably should be sk->sk_prot->listen_start
959                  * see tcp_listen_start
960                  */
961                 err = dccp_listen_start(sk, backlog);
962                 if (err)
963                         goto out;
964         }
965         sk->sk_max_ack_backlog = backlog;
966         err = 0;
967
968 out:
969         release_sock(sk);
970         return err;
971 }
972
973 EXPORT_SYMBOL_GPL(inet_dccp_listen);
974
975 static void dccp_terminate_connection(struct sock *sk)
976 {
977         u8 next_state = DCCP_CLOSED;
978
979         switch (sk->sk_state) {
980         case DCCP_PASSIVE_CLOSE:
981         case DCCP_PASSIVE_CLOSEREQ:
982                 dccp_finish_passive_close(sk);
983                 break;
984         case DCCP_PARTOPEN:
985                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
986                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
987                 /* fall through */
988         case DCCP_OPEN:
989                 dccp_send_close(sk, 1);
990
991                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
992                     !dccp_sk(sk)->dccps_server_timewait)
993                         next_state = DCCP_ACTIVE_CLOSEREQ;
994                 else
995                         next_state = DCCP_CLOSING;
996                 /* fall through */
997         default:
998                 dccp_set_state(sk, next_state);
999         }
1000 }
1001
1002 void dccp_close(struct sock *sk, long timeout)
1003 {
1004         struct dccp_sock *dp = dccp_sk(sk);
1005         struct sk_buff *skb;
1006         u32 data_was_unread = 0;
1007         int state;
1008
1009         lock_sock(sk);
1010
1011         sk->sk_shutdown = SHUTDOWN_MASK;
1012
1013         if (sk->sk_state == DCCP_LISTEN) {
1014                 dccp_set_state(sk, DCCP_CLOSED);
1015
1016                 /* Special case. */
1017                 inet_csk_listen_stop(sk);
1018
1019                 goto adjudge_to_death;
1020         }
1021
1022         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1023
1024         /*
1025          * We need to flush the recv. buffs.  We do this only on the
1026          * descriptor close, not protocol-sourced closes, because the
1027           *reader process may not have drained the data yet!
1028          */
1029         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1030                 data_was_unread += skb->len;
1031                 __kfree_skb(skb);
1032         }
1033
1034         /* If socket has been already reset kill it. */
1035         if (sk->sk_state == DCCP_CLOSED)
1036                 goto adjudge_to_death;
1037
1038         if (data_was_unread) {
1039                 /* Unread data was tossed, send an appropriate Reset Code */
1040                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1041                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1042                 dccp_set_state(sk, DCCP_CLOSED);
1043         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1044                 /* Check zero linger _after_ checking for unread data. */
1045                 sk->sk_prot->disconnect(sk, 0);
1046         } else if (sk->sk_state != DCCP_CLOSED) {
1047                 /*
1048                  * Normal connection termination. May need to wait if there are
1049                  * still packets in the TX queue that are delayed by the CCID.
1050                  */
1051                 dccp_flush_write_queue(sk, &timeout);
1052                 dccp_terminate_connection(sk);
1053         }
1054
1055         /*
1056          * Flush write queue. This may be necessary in several cases:
1057          * - we have been closed by the peer but still have application data;
1058          * - abortive termination (unread data or zero linger time),
1059          * - normal termination but queue could not be flushed within time limit
1060          */
1061         __skb_queue_purge(&sk->sk_write_queue);
1062
1063         sk_stream_wait_close(sk, timeout);
1064
1065 adjudge_to_death:
1066         state = sk->sk_state;
1067         sock_hold(sk);
1068         sock_orphan(sk);
1069
1070         /*
1071          * It is the last release_sock in its life. It will remove backlog.
1072          */
1073         release_sock(sk);
1074         /*
1075          * Now socket is owned by kernel and we acquire BH lock
1076          * to finish close. No need to check for user refs.
1077          */
1078         local_bh_disable();
1079         bh_lock_sock(sk);
1080         WARN_ON(sock_owned_by_user(sk));
1081
1082         percpu_counter_inc(sk->sk_prot->orphan_count);
1083
1084         /* Have we already been destroyed by a softirq or backlog? */
1085         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1086                 goto out;
1087
1088         if (sk->sk_state == DCCP_CLOSED)
1089                 inet_csk_destroy_sock(sk);
1090
1091         /* Otherwise, socket is reprieved until protocol close. */
1092
1093 out:
1094         bh_unlock_sock(sk);
1095         local_bh_enable();
1096         sock_put(sk);
1097 }
1098
1099 EXPORT_SYMBOL_GPL(dccp_close);
1100
1101 void dccp_shutdown(struct sock *sk, int how)
1102 {
1103         dccp_pr_debug("called shutdown(%x)\n", how);
1104 }
1105
1106 EXPORT_SYMBOL_GPL(dccp_shutdown);
1107
1108 static inline int __init dccp_mib_init(void)
1109 {
1110         dccp_statistics = alloc_percpu(struct dccp_mib);
1111         if (!dccp_statistics)
1112                 return -ENOMEM;
1113         return 0;
1114 }
1115
1116 static inline void dccp_mib_exit(void)
1117 {
1118         free_percpu(dccp_statistics);
1119 }
1120
1121 static int thash_entries;
1122 module_param(thash_entries, int, 0444);
1123 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1124
1125 #ifdef CONFIG_IP_DCCP_DEBUG
1126 bool dccp_debug;
1127 module_param(dccp_debug, bool, 0644);
1128 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1129
1130 EXPORT_SYMBOL_GPL(dccp_debug);
1131 #endif
1132
1133 static int __init dccp_init(void)
1134 {
1135         unsigned long goal;
1136         int ehash_order, bhash_order, i;
1137         int rc;
1138
1139         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1140                      FIELD_SIZEOF(struct sk_buff, cb));
1141         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1142         if (rc)
1143                 goto out_fail;
1144         rc = -ENOBUFS;
1145         inet_hashinfo_init(&dccp_hashinfo);
1146         dccp_hashinfo.bind_bucket_cachep =
1147                 kmem_cache_create("dccp_bind_bucket",
1148                                   sizeof(struct inet_bind_bucket), 0,
1149                                   SLAB_HWCACHE_ALIGN, NULL);
1150         if (!dccp_hashinfo.bind_bucket_cachep)
1151                 goto out_free_percpu;
1152
1153         /*
1154          * Size and allocate the main established and bind bucket
1155          * hash tables.
1156          *
1157          * The methodology is similar to that of the buffer cache.
1158          */
1159         if (totalram_pages >= (128 * 1024))
1160                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1161         else
1162                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1163
1164         if (thash_entries)
1165                 goal = (thash_entries *
1166                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1167         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1168                 ;
1169         do {
1170                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1171                                         sizeof(struct inet_ehash_bucket);
1172
1173                 while (hash_size & (hash_size - 1))
1174                         hash_size--;
1175                 dccp_hashinfo.ehash_mask = hash_size - 1;
1176                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1177                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1178         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1179
1180         if (!dccp_hashinfo.ehash) {
1181                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1182                 goto out_free_bind_bucket_cachep;
1183         }
1184
1185         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1186                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1187
1188         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1189                         goto out_free_dccp_ehash;
1190
1191         bhash_order = ehash_order;
1192
1193         do {
1194                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1195                                         sizeof(struct inet_bind_hashbucket);
1196                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1197                     bhash_order > 0)
1198                         continue;
1199                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1200                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1201         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1202
1203         if (!dccp_hashinfo.bhash) {
1204                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1205                 goto out_free_dccp_locks;
1206         }
1207
1208         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1209                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1210                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1211         }
1212
1213         rc = dccp_mib_init();
1214         if (rc)
1215                 goto out_free_dccp_bhash;
1216
1217         rc = dccp_ackvec_init();
1218         if (rc)
1219                 goto out_free_dccp_mib;
1220
1221         rc = dccp_sysctl_init();
1222         if (rc)
1223                 goto out_ackvec_exit;
1224
1225         rc = ccid_initialize_builtins();
1226         if (rc)
1227                 goto out_sysctl_exit;
1228
1229         dccp_timestamping_init();
1230
1231         return 0;
1232
1233 out_sysctl_exit:
1234         dccp_sysctl_exit();
1235 out_ackvec_exit:
1236         dccp_ackvec_exit();
1237 out_free_dccp_mib:
1238         dccp_mib_exit();
1239 out_free_dccp_bhash:
1240         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1241 out_free_dccp_locks:
1242         inet_ehash_locks_free(&dccp_hashinfo);
1243 out_free_dccp_ehash:
1244         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1245 out_free_bind_bucket_cachep:
1246         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1247 out_free_percpu:
1248         percpu_counter_destroy(&dccp_orphan_count);
1249 out_fail:
1250         dccp_hashinfo.bhash = NULL;
1251         dccp_hashinfo.ehash = NULL;
1252         dccp_hashinfo.bind_bucket_cachep = NULL;
1253         return rc;
1254 }
1255
1256 static void __exit dccp_fini(void)
1257 {
1258         ccid_cleanup_builtins();
1259         dccp_mib_exit();
1260         free_pages((unsigned long)dccp_hashinfo.bhash,
1261                    get_order(dccp_hashinfo.bhash_size *
1262                              sizeof(struct inet_bind_hashbucket)));
1263         free_pages((unsigned long)dccp_hashinfo.ehash,
1264                    get_order((dccp_hashinfo.ehash_mask + 1) *
1265                              sizeof(struct inet_ehash_bucket)));
1266         inet_ehash_locks_free(&dccp_hashinfo);
1267         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1268         dccp_ackvec_exit();
1269         dccp_sysctl_exit();
1270         percpu_counter_destroy(&dccp_orphan_count);
1271 }
1272
1273 module_init(dccp_init);
1274 module_exit(dccp_fini);
1275
1276 MODULE_LICENSE("GPL");
1277 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1278 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");