Merge tag 'nds32-for-linus-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 #define CREATE_TRACE_POINTS
42 #include "trace.h"
43
44 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
45
46 EXPORT_SYMBOL_GPL(dccp_statistics);
47
48 struct percpu_counter dccp_orphan_count;
49 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50
51 struct inet_hashinfo dccp_hashinfo;
52 EXPORT_SYMBOL_GPL(dccp_hashinfo);
53
54 /* the maximum queue length for tx in packets. 0 is no limit */
55 int sysctl_dccp_tx_qlen __read_mostly = 5;
56
57 #ifdef CONFIG_IP_DCCP_DEBUG
58 static const char *dccp_state_name(const int state)
59 {
60         static const char *const dccp_state_names[] = {
61         [DCCP_OPEN]             = "OPEN",
62         [DCCP_REQUESTING]       = "REQUESTING",
63         [DCCP_PARTOPEN]         = "PARTOPEN",
64         [DCCP_LISTEN]           = "LISTEN",
65         [DCCP_RESPOND]          = "RESPOND",
66         [DCCP_CLOSING]          = "CLOSING",
67         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
68         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
69         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
70         [DCCP_TIME_WAIT]        = "TIME_WAIT",
71         [DCCP_CLOSED]           = "CLOSED",
72         };
73
74         if (state >= DCCP_MAX_STATES)
75                 return "INVALID STATE!";
76         else
77                 return dccp_state_names[state];
78 }
79 #endif
80
81 void dccp_set_state(struct sock *sk, const int state)
82 {
83         const int oldstate = sk->sk_state;
84
85         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
86                       dccp_state_name(oldstate), dccp_state_name(state));
87         WARN_ON(state == oldstate);
88
89         switch (state) {
90         case DCCP_OPEN:
91                 if (oldstate != DCCP_OPEN)
92                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
93                 /* Client retransmits all Confirm options until entering OPEN */
94                 if (oldstate == DCCP_PARTOPEN)
95                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
96                 break;
97
98         case DCCP_CLOSED:
99                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
100                     oldstate == DCCP_CLOSING)
101                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
102
103                 sk->sk_prot->unhash(sk);
104                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
105                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
106                         inet_put_port(sk);
107                 /* fall through */
108         default:
109                 if (oldstate == DCCP_OPEN)
110                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
111         }
112
113         /* Change state AFTER socket is unhashed to avoid closed
114          * socket sitting in hash tables.
115          */
116         inet_sk_set_state(sk, state);
117 }
118
119 EXPORT_SYMBOL_GPL(dccp_set_state);
120
121 static void dccp_finish_passive_close(struct sock *sk)
122 {
123         switch (sk->sk_state) {
124         case DCCP_PASSIVE_CLOSE:
125                 /* Node (client or server) has received Close packet. */
126                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
127                 dccp_set_state(sk, DCCP_CLOSED);
128                 break;
129         case DCCP_PASSIVE_CLOSEREQ:
130                 /*
131                  * Client received CloseReq. We set the `active' flag so that
132                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
133                  */
134                 dccp_send_close(sk, 1);
135                 dccp_set_state(sk, DCCP_CLOSING);
136         }
137 }
138
139 void dccp_done(struct sock *sk)
140 {
141         dccp_set_state(sk, DCCP_CLOSED);
142         dccp_clear_xmit_timers(sk);
143
144         sk->sk_shutdown = SHUTDOWN_MASK;
145
146         if (!sock_flag(sk, SOCK_DEAD))
147                 sk->sk_state_change(sk);
148         else
149                 inet_csk_destroy_sock(sk);
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_done);
153
154 const char *dccp_packet_name(const int type)
155 {
156         static const char *const dccp_packet_names[] = {
157                 [DCCP_PKT_REQUEST]  = "REQUEST",
158                 [DCCP_PKT_RESPONSE] = "RESPONSE",
159                 [DCCP_PKT_DATA]     = "DATA",
160                 [DCCP_PKT_ACK]      = "ACK",
161                 [DCCP_PKT_DATAACK]  = "DATAACK",
162                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
163                 [DCCP_PKT_CLOSE]    = "CLOSE",
164                 [DCCP_PKT_RESET]    = "RESET",
165                 [DCCP_PKT_SYNC]     = "SYNC",
166                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
167         };
168
169         if (type >= DCCP_NR_PKT_TYPES)
170                 return "INVALID";
171         else
172                 return dccp_packet_names[type];
173 }
174
175 EXPORT_SYMBOL_GPL(dccp_packet_name);
176
177 static void dccp_sk_destruct(struct sock *sk)
178 {
179         struct dccp_sock *dp = dccp_sk(sk);
180
181         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
182         dp->dccps_hc_tx_ccid = NULL;
183         inet_sock_destruct(sk);
184 }
185
186 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
187 {
188         struct dccp_sock *dp = dccp_sk(sk);
189         struct inet_connection_sock *icsk = inet_csk(sk);
190
191         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
192         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
193         sk->sk_state            = DCCP_CLOSED;
194         sk->sk_write_space      = dccp_write_space;
195         sk->sk_destruct         = dccp_sk_destruct;
196         icsk->icsk_sync_mss     = dccp_sync_mss;
197         dp->dccps_mss_cache     = 536;
198         dp->dccps_rate_last     = jiffies;
199         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
200         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
201         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
202
203         dccp_init_xmit_timers(sk);
204
205         INIT_LIST_HEAD(&dp->dccps_featneg);
206         /* control socket doesn't need feat nego */
207         if (likely(ctl_sock_initialized))
208                 return dccp_feat_init(sk);
209         return 0;
210 }
211
212 EXPORT_SYMBOL_GPL(dccp_init_sock);
213
214 void dccp_destroy_sock(struct sock *sk)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217
218         __skb_queue_purge(&sk->sk_write_queue);
219         if (sk->sk_send_head != NULL) {
220                 kfree_skb(sk->sk_send_head);
221                 sk->sk_send_head = NULL;
222         }
223
224         /* Clean up a referenced DCCP bind bucket. */
225         if (inet_csk(sk)->icsk_bind_hash != NULL)
226                 inet_put_port(sk);
227
228         kfree(dp->dccps_service_list);
229         dp->dccps_service_list = NULL;
230
231         if (dp->dccps_hc_rx_ackvec != NULL) {
232                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
233                 dp->dccps_hc_rx_ackvec = NULL;
234         }
235         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236         dp->dccps_hc_rx_ccid = NULL;
237
238         /* clean up feature negotiation state */
239         dccp_feat_list_purge(&dp->dccps_featneg);
240 }
241
242 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
243
244 static inline int dccp_listen_start(struct sock *sk, int backlog)
245 {
246         struct dccp_sock *dp = dccp_sk(sk);
247
248         dp->dccps_role = DCCP_ROLE_LISTEN;
249         /* do not start to listen if feature negotiation setup fails */
250         if (dccp_feat_finalise_settings(dp))
251                 return -EPROTO;
252         return inet_csk_listen_start(sk, backlog);
253 }
254
255 static inline int dccp_need_reset(int state)
256 {
257         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
258                state != DCCP_REQUESTING;
259 }
260
261 int dccp_disconnect(struct sock *sk, int flags)
262 {
263         struct inet_connection_sock *icsk = inet_csk(sk);
264         struct inet_sock *inet = inet_sk(sk);
265         struct dccp_sock *dp = dccp_sk(sk);
266         int err = 0;
267         const int old_state = sk->sk_state;
268
269         if (old_state != DCCP_CLOSED)
270                 dccp_set_state(sk, DCCP_CLOSED);
271
272         /*
273          * This corresponds to the ABORT function of RFC793, sec. 3.8
274          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
275          */
276         if (old_state == DCCP_LISTEN) {
277                 inet_csk_listen_stop(sk);
278         } else if (dccp_need_reset(old_state)) {
279                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
280                 sk->sk_err = ECONNRESET;
281         } else if (old_state == DCCP_REQUESTING)
282                 sk->sk_err = ECONNRESET;
283
284         dccp_clear_xmit_timers(sk);
285         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
286         dp->dccps_hc_rx_ccid = NULL;
287
288         __skb_queue_purge(&sk->sk_receive_queue);
289         __skb_queue_purge(&sk->sk_write_queue);
290         if (sk->sk_send_head != NULL) {
291                 __kfree_skb(sk->sk_send_head);
292                 sk->sk_send_head = NULL;
293         }
294
295         inet->inet_dport = 0;
296
297         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
298                 inet_reset_saddr(sk);
299
300         sk->sk_shutdown = 0;
301         sock_reset_flag(sk, SOCK_DONE);
302
303         icsk->icsk_backoff = 0;
304         inet_csk_delack_init(sk);
305         __sk_dst_reset(sk);
306
307         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
308
309         sk->sk_error_report(sk);
310         return err;
311 }
312
313 EXPORT_SYMBOL_GPL(dccp_disconnect);
314
315 /*
316  *      Wait for a DCCP event.
317  *
318  *      Note that we don't need to lock the socket, as the upper poll layers
319  *      take care of normal races (between the test and the event) and we don't
320  *      go look at any of the socket buffers directly.
321  */
322 __poll_t dccp_poll(struct file *file, struct socket *sock,
323                        poll_table *wait)
324 {
325         __poll_t mask;
326         struct sock *sk = sock->sk;
327
328         sock_poll_wait(file, sk_sleep(sk), wait);
329         if (sk->sk_state == DCCP_LISTEN)
330                 return inet_csk_listen_poll(sk);
331
332         /* Socket is not locked. We are protected from async events
333            by poll logic and correct handling of state changes
334            made by another threads is impossible in any case.
335          */
336
337         mask = 0;
338         if (sk->sk_err)
339                 mask = EPOLLERR;
340
341         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
342                 mask |= EPOLLHUP;
343         if (sk->sk_shutdown & RCV_SHUTDOWN)
344                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
345
346         /* Connected? */
347         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
348                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
349                         mask |= EPOLLIN | EPOLLRDNORM;
350
351                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
352                         if (sk_stream_is_writeable(sk)) {
353                                 mask |= EPOLLOUT | EPOLLWRNORM;
354                         } else {  /* send SIGIO later */
355                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
356                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
357
358                                 /* Race breaker. If space is freed after
359                                  * wspace test but before the flags are set,
360                                  * IO signal will be lost.
361                                  */
362                                 if (sk_stream_is_writeable(sk))
363                                         mask |= EPOLLOUT | EPOLLWRNORM;
364                         }
365                 }
366         }
367         return mask;
368 }
369
370 EXPORT_SYMBOL_GPL(dccp_poll);
371
372 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
373 {
374         int rc = -ENOTCONN;
375
376         lock_sock(sk);
377
378         if (sk->sk_state == DCCP_LISTEN)
379                 goto out;
380
381         switch (cmd) {
382         case SIOCINQ: {
383                 struct sk_buff *skb;
384                 unsigned long amount = 0;
385
386                 skb = skb_peek(&sk->sk_receive_queue);
387                 if (skb != NULL) {
388                         /*
389                          * We will only return the amount of this packet since
390                          * that is all that will be read.
391                          */
392                         amount = skb->len;
393                 }
394                 rc = put_user(amount, (int __user *)arg);
395         }
396                 break;
397         default:
398                 rc = -ENOIOCTLCMD;
399                 break;
400         }
401 out:
402         release_sock(sk);
403         return rc;
404 }
405
406 EXPORT_SYMBOL_GPL(dccp_ioctl);
407
408 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
409                                    char __user *optval, unsigned int optlen)
410 {
411         struct dccp_sock *dp = dccp_sk(sk);
412         struct dccp_service_list *sl = NULL;
413
414         if (service == DCCP_SERVICE_INVALID_VALUE ||
415             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
416                 return -EINVAL;
417
418         if (optlen > sizeof(service)) {
419                 sl = kmalloc(optlen, GFP_KERNEL);
420                 if (sl == NULL)
421                         return -ENOMEM;
422
423                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
424                 if (copy_from_user(sl->dccpsl_list,
425                                    optval + sizeof(service),
426                                    optlen - sizeof(service)) ||
427                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
428                         kfree(sl);
429                         return -EFAULT;
430                 }
431         }
432
433         lock_sock(sk);
434         dp->dccps_service = service;
435
436         kfree(dp->dccps_service_list);
437
438         dp->dccps_service_list = sl;
439         release_sock(sk);
440         return 0;
441 }
442
443 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
444 {
445         u8 *list, len;
446         int i, rc;
447
448         if (cscov < 0 || cscov > 15)
449                 return -EINVAL;
450         /*
451          * Populate a list of permissible values, in the range cscov...15. This
452          * is necessary since feature negotiation of single values only works if
453          * both sides incidentally choose the same value. Since the list starts
454          * lowest-value first, negotiation will pick the smallest shared value.
455          */
456         if (cscov == 0)
457                 return 0;
458         len = 16 - cscov;
459
460         list = kmalloc(len, GFP_KERNEL);
461         if (list == NULL)
462                 return -ENOBUFS;
463
464         for (i = 0; i < len; i++)
465                 list[i] = cscov++;
466
467         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
468
469         if (rc == 0) {
470                 if (rx)
471                         dccp_sk(sk)->dccps_pcrlen = cscov;
472                 else
473                         dccp_sk(sk)->dccps_pcslen = cscov;
474         }
475         kfree(list);
476         return rc;
477 }
478
479 static int dccp_setsockopt_ccid(struct sock *sk, int type,
480                                 char __user *optval, unsigned int optlen)
481 {
482         u8 *val;
483         int rc = 0;
484
485         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
486                 return -EINVAL;
487
488         val = memdup_user(optval, optlen);
489         if (IS_ERR(val))
490                 return PTR_ERR(val);
491
492         lock_sock(sk);
493         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
494                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
495
496         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
497                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
498         release_sock(sk);
499
500         kfree(val);
501         return rc;
502 }
503
504 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
505                 char __user *optval, unsigned int optlen)
506 {
507         struct dccp_sock *dp = dccp_sk(sk);
508         int val, err = 0;
509
510         switch (optname) {
511         case DCCP_SOCKOPT_PACKET_SIZE:
512                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
513                 return 0;
514         case DCCP_SOCKOPT_CHANGE_L:
515         case DCCP_SOCKOPT_CHANGE_R:
516                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
517                 return 0;
518         case DCCP_SOCKOPT_CCID:
519         case DCCP_SOCKOPT_RX_CCID:
520         case DCCP_SOCKOPT_TX_CCID:
521                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
522         }
523
524         if (optlen < (int)sizeof(int))
525                 return -EINVAL;
526
527         if (get_user(val, (int __user *)optval))
528                 return -EFAULT;
529
530         if (optname == DCCP_SOCKOPT_SERVICE)
531                 return dccp_setsockopt_service(sk, val, optval, optlen);
532
533         lock_sock(sk);
534         switch (optname) {
535         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
536                 if (dp->dccps_role != DCCP_ROLE_SERVER)
537                         err = -EOPNOTSUPP;
538                 else
539                         dp->dccps_server_timewait = (val != 0);
540                 break;
541         case DCCP_SOCKOPT_SEND_CSCOV:
542                 err = dccp_setsockopt_cscov(sk, val, false);
543                 break;
544         case DCCP_SOCKOPT_RECV_CSCOV:
545                 err = dccp_setsockopt_cscov(sk, val, true);
546                 break;
547         case DCCP_SOCKOPT_QPOLICY_ID:
548                 if (sk->sk_state != DCCP_CLOSED)
549                         err = -EISCONN;
550                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
551                         err = -EINVAL;
552                 else
553                         dp->dccps_qpolicy = val;
554                 break;
555         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
556                 if (val < 0)
557                         err = -EINVAL;
558                 else
559                         dp->dccps_tx_qlen = val;
560                 break;
561         default:
562                 err = -ENOPROTOOPT;
563                 break;
564         }
565         release_sock(sk);
566
567         return err;
568 }
569
570 int dccp_setsockopt(struct sock *sk, int level, int optname,
571                     char __user *optval, unsigned int optlen)
572 {
573         if (level != SOL_DCCP)
574                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
575                                                              optname, optval,
576                                                              optlen);
577         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
578 }
579
580 EXPORT_SYMBOL_GPL(dccp_setsockopt);
581
582 #ifdef CONFIG_COMPAT
583 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
584                            char __user *optval, unsigned int optlen)
585 {
586         if (level != SOL_DCCP)
587                 return inet_csk_compat_setsockopt(sk, level, optname,
588                                                   optval, optlen);
589         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
590 }
591
592 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
593 #endif
594
595 static int dccp_getsockopt_service(struct sock *sk, int len,
596                                    __be32 __user *optval,
597                                    int __user *optlen)
598 {
599         const struct dccp_sock *dp = dccp_sk(sk);
600         const struct dccp_service_list *sl;
601         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
602
603         lock_sock(sk);
604         if ((sl = dp->dccps_service_list) != NULL) {
605                 slen = sl->dccpsl_nr * sizeof(u32);
606                 total_len += slen;
607         }
608
609         err = -EINVAL;
610         if (total_len > len)
611                 goto out;
612
613         err = 0;
614         if (put_user(total_len, optlen) ||
615             put_user(dp->dccps_service, optval) ||
616             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
617                 err = -EFAULT;
618 out:
619         release_sock(sk);
620         return err;
621 }
622
623 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
624                     char __user *optval, int __user *optlen)
625 {
626         struct dccp_sock *dp;
627         int val, len;
628
629         if (get_user(len, optlen))
630                 return -EFAULT;
631
632         if (len < (int)sizeof(int))
633                 return -EINVAL;
634
635         dp = dccp_sk(sk);
636
637         switch (optname) {
638         case DCCP_SOCKOPT_PACKET_SIZE:
639                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
640                 return 0;
641         case DCCP_SOCKOPT_SERVICE:
642                 return dccp_getsockopt_service(sk, len,
643                                                (__be32 __user *)optval, optlen);
644         case DCCP_SOCKOPT_GET_CUR_MPS:
645                 val = dp->dccps_mss_cache;
646                 break;
647         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
648                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
649         case DCCP_SOCKOPT_TX_CCID:
650                 val = ccid_get_current_tx_ccid(dp);
651                 if (val < 0)
652                         return -ENOPROTOOPT;
653                 break;
654         case DCCP_SOCKOPT_RX_CCID:
655                 val = ccid_get_current_rx_ccid(dp);
656                 if (val < 0)
657                         return -ENOPROTOOPT;
658                 break;
659         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
660                 val = dp->dccps_server_timewait;
661                 break;
662         case DCCP_SOCKOPT_SEND_CSCOV:
663                 val = dp->dccps_pcslen;
664                 break;
665         case DCCP_SOCKOPT_RECV_CSCOV:
666                 val = dp->dccps_pcrlen;
667                 break;
668         case DCCP_SOCKOPT_QPOLICY_ID:
669                 val = dp->dccps_qpolicy;
670                 break;
671         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
672                 val = dp->dccps_tx_qlen;
673                 break;
674         case 128 ... 191:
675                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
676                                              len, (u32 __user *)optval, optlen);
677         case 192 ... 255:
678                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
679                                              len, (u32 __user *)optval, optlen);
680         default:
681                 return -ENOPROTOOPT;
682         }
683
684         len = sizeof(val);
685         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
686                 return -EFAULT;
687
688         return 0;
689 }
690
691 int dccp_getsockopt(struct sock *sk, int level, int optname,
692                     char __user *optval, int __user *optlen)
693 {
694         if (level != SOL_DCCP)
695                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
696                                                              optname, optval,
697                                                              optlen);
698         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
699 }
700
701 EXPORT_SYMBOL_GPL(dccp_getsockopt);
702
703 #ifdef CONFIG_COMPAT
704 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
705                            char __user *optval, int __user *optlen)
706 {
707         if (level != SOL_DCCP)
708                 return inet_csk_compat_getsockopt(sk, level, optname,
709                                                   optval, optlen);
710         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
711 }
712
713 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
714 #endif
715
716 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
717 {
718         struct cmsghdr *cmsg;
719
720         /*
721          * Assign an (opaque) qpolicy priority value to skb->priority.
722          *
723          * We are overloading this skb field for use with the qpolicy subystem.
724          * The skb->priority is normally used for the SO_PRIORITY option, which
725          * is initialised from sk_priority. Since the assignment of sk_priority
726          * to skb->priority happens later (on layer 3), we overload this field
727          * for use with queueing priorities as long as the skb is on layer 4.
728          * The default priority value (if nothing is set) is 0.
729          */
730         skb->priority = 0;
731
732         for_each_cmsghdr(cmsg, msg) {
733                 if (!CMSG_OK(msg, cmsg))
734                         return -EINVAL;
735
736                 if (cmsg->cmsg_level != SOL_DCCP)
737                         continue;
738
739                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
740                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
741                         return -EINVAL;
742
743                 switch (cmsg->cmsg_type) {
744                 case DCCP_SCM_PRIORITY:
745                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
746                                 return -EINVAL;
747                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
748                         break;
749                 default:
750                         return -EINVAL;
751                 }
752         }
753         return 0;
754 }
755
756 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
757 {
758         const struct dccp_sock *dp = dccp_sk(sk);
759         const int flags = msg->msg_flags;
760         const int noblock = flags & MSG_DONTWAIT;
761         struct sk_buff *skb;
762         int rc, size;
763         long timeo;
764
765         trace_dccp_probe(sk, len);
766
767         if (len > dp->dccps_mss_cache)
768                 return -EMSGSIZE;
769
770         lock_sock(sk);
771
772         if (dccp_qpolicy_full(sk)) {
773                 rc = -EAGAIN;
774                 goto out_release;
775         }
776
777         timeo = sock_sndtimeo(sk, noblock);
778
779         /*
780          * We have to use sk_stream_wait_connect here to set sk_write_pending,
781          * so that the trick in dccp_rcv_request_sent_state_process.
782          */
783         /* Wait for a connection to finish. */
784         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
785                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
786                         goto out_release;
787
788         size = sk->sk_prot->max_header + len;
789         release_sock(sk);
790         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
791         lock_sock(sk);
792         if (skb == NULL)
793                 goto out_release;
794
795         if (sk->sk_state == DCCP_CLOSED) {
796                 rc = -ENOTCONN;
797                 goto out_discard;
798         }
799
800         skb_reserve(skb, sk->sk_prot->max_header);
801         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
802         if (rc != 0)
803                 goto out_discard;
804
805         rc = dccp_msghdr_parse(msg, skb);
806         if (rc != 0)
807                 goto out_discard;
808
809         dccp_qpolicy_push(sk, skb);
810         /*
811          * The xmit_timer is set if the TX CCID is rate-based and will expire
812          * when congestion control permits to release further packets into the
813          * network. Window-based CCIDs do not use this timer.
814          */
815         if (!timer_pending(&dp->dccps_xmit_timer))
816                 dccp_write_xmit(sk);
817 out_release:
818         release_sock(sk);
819         return rc ? : len;
820 out_discard:
821         kfree_skb(skb);
822         goto out_release;
823 }
824
825 EXPORT_SYMBOL_GPL(dccp_sendmsg);
826
827 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
828                  int flags, int *addr_len)
829 {
830         const struct dccp_hdr *dh;
831         long timeo;
832
833         lock_sock(sk);
834
835         if (sk->sk_state == DCCP_LISTEN) {
836                 len = -ENOTCONN;
837                 goto out;
838         }
839
840         timeo = sock_rcvtimeo(sk, nonblock);
841
842         do {
843                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
844
845                 if (skb == NULL)
846                         goto verify_sock_status;
847
848                 dh = dccp_hdr(skb);
849
850                 switch (dh->dccph_type) {
851                 case DCCP_PKT_DATA:
852                 case DCCP_PKT_DATAACK:
853                         goto found_ok_skb;
854
855                 case DCCP_PKT_CLOSE:
856                 case DCCP_PKT_CLOSEREQ:
857                         if (!(flags & MSG_PEEK))
858                                 dccp_finish_passive_close(sk);
859                         /* fall through */
860                 case DCCP_PKT_RESET:
861                         dccp_pr_debug("found fin (%s) ok!\n",
862                                       dccp_packet_name(dh->dccph_type));
863                         len = 0;
864                         goto found_fin_ok;
865                 default:
866                         dccp_pr_debug("packet_type=%s\n",
867                                       dccp_packet_name(dh->dccph_type));
868                         sk_eat_skb(sk, skb);
869                 }
870 verify_sock_status:
871                 if (sock_flag(sk, SOCK_DONE)) {
872                         len = 0;
873                         break;
874                 }
875
876                 if (sk->sk_err) {
877                         len = sock_error(sk);
878                         break;
879                 }
880
881                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
882                         len = 0;
883                         break;
884                 }
885
886                 if (sk->sk_state == DCCP_CLOSED) {
887                         if (!sock_flag(sk, SOCK_DONE)) {
888                                 /* This occurs when user tries to read
889                                  * from never connected socket.
890                                  */
891                                 len = -ENOTCONN;
892                                 break;
893                         }
894                         len = 0;
895                         break;
896                 }
897
898                 if (!timeo) {
899                         len = -EAGAIN;
900                         break;
901                 }
902
903                 if (signal_pending(current)) {
904                         len = sock_intr_errno(timeo);
905                         break;
906                 }
907
908                 sk_wait_data(sk, &timeo, NULL);
909                 continue;
910         found_ok_skb:
911                 if (len > skb->len)
912                         len = skb->len;
913                 else if (len < skb->len)
914                         msg->msg_flags |= MSG_TRUNC;
915
916                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
917                         /* Exception. Bailout! */
918                         len = -EFAULT;
919                         break;
920                 }
921                 if (flags & MSG_TRUNC)
922                         len = skb->len;
923         found_fin_ok:
924                 if (!(flags & MSG_PEEK))
925                         sk_eat_skb(sk, skb);
926                 break;
927         } while (1);
928 out:
929         release_sock(sk);
930         return len;
931 }
932
933 EXPORT_SYMBOL_GPL(dccp_recvmsg);
934
935 int inet_dccp_listen(struct socket *sock, int backlog)
936 {
937         struct sock *sk = sock->sk;
938         unsigned char old_state;
939         int err;
940
941         lock_sock(sk);
942
943         err = -EINVAL;
944         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
945                 goto out;
946
947         old_state = sk->sk_state;
948         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
949                 goto out;
950
951         /* Really, if the socket is already in listen state
952          * we can only allow the backlog to be adjusted.
953          */
954         if (old_state != DCCP_LISTEN) {
955                 /*
956                  * FIXME: here it probably should be sk->sk_prot->listen_start
957                  * see tcp_listen_start
958                  */
959                 err = dccp_listen_start(sk, backlog);
960                 if (err)
961                         goto out;
962         }
963         sk->sk_max_ack_backlog = backlog;
964         err = 0;
965
966 out:
967         release_sock(sk);
968         return err;
969 }
970
971 EXPORT_SYMBOL_GPL(inet_dccp_listen);
972
973 static void dccp_terminate_connection(struct sock *sk)
974 {
975         u8 next_state = DCCP_CLOSED;
976
977         switch (sk->sk_state) {
978         case DCCP_PASSIVE_CLOSE:
979         case DCCP_PASSIVE_CLOSEREQ:
980                 dccp_finish_passive_close(sk);
981                 break;
982         case DCCP_PARTOPEN:
983                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
984                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
985                 /* fall through */
986         case DCCP_OPEN:
987                 dccp_send_close(sk, 1);
988
989                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
990                     !dccp_sk(sk)->dccps_server_timewait)
991                         next_state = DCCP_ACTIVE_CLOSEREQ;
992                 else
993                         next_state = DCCP_CLOSING;
994                 /* fall through */
995         default:
996                 dccp_set_state(sk, next_state);
997         }
998 }
999
1000 void dccp_close(struct sock *sk, long timeout)
1001 {
1002         struct dccp_sock *dp = dccp_sk(sk);
1003         struct sk_buff *skb;
1004         u32 data_was_unread = 0;
1005         int state;
1006
1007         lock_sock(sk);
1008
1009         sk->sk_shutdown = SHUTDOWN_MASK;
1010
1011         if (sk->sk_state == DCCP_LISTEN) {
1012                 dccp_set_state(sk, DCCP_CLOSED);
1013
1014                 /* Special case. */
1015                 inet_csk_listen_stop(sk);
1016
1017                 goto adjudge_to_death;
1018         }
1019
1020         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1021
1022         /*
1023          * We need to flush the recv. buffs.  We do this only on the
1024          * descriptor close, not protocol-sourced closes, because the
1025           *reader process may not have drained the data yet!
1026          */
1027         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1028                 data_was_unread += skb->len;
1029                 __kfree_skb(skb);
1030         }
1031
1032         /* If socket has been already reset kill it. */
1033         if (sk->sk_state == DCCP_CLOSED)
1034                 goto adjudge_to_death;
1035
1036         if (data_was_unread) {
1037                 /* Unread data was tossed, send an appropriate Reset Code */
1038                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1039                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1040                 dccp_set_state(sk, DCCP_CLOSED);
1041         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1042                 /* Check zero linger _after_ checking for unread data. */
1043                 sk->sk_prot->disconnect(sk, 0);
1044         } else if (sk->sk_state != DCCP_CLOSED) {
1045                 /*
1046                  * Normal connection termination. May need to wait if there are
1047                  * still packets in the TX queue that are delayed by the CCID.
1048                  */
1049                 dccp_flush_write_queue(sk, &timeout);
1050                 dccp_terminate_connection(sk);
1051         }
1052
1053         /*
1054          * Flush write queue. This may be necessary in several cases:
1055          * - we have been closed by the peer but still have application data;
1056          * - abortive termination (unread data or zero linger time),
1057          * - normal termination but queue could not be flushed within time limit
1058          */
1059         __skb_queue_purge(&sk->sk_write_queue);
1060
1061         sk_stream_wait_close(sk, timeout);
1062
1063 adjudge_to_death:
1064         state = sk->sk_state;
1065         sock_hold(sk);
1066         sock_orphan(sk);
1067
1068         /*
1069          * It is the last release_sock in its life. It will remove backlog.
1070          */
1071         release_sock(sk);
1072         /*
1073          * Now socket is owned by kernel and we acquire BH lock
1074          * to finish close. No need to check for user refs.
1075          */
1076         local_bh_disable();
1077         bh_lock_sock(sk);
1078         WARN_ON(sock_owned_by_user(sk));
1079
1080         percpu_counter_inc(sk->sk_prot->orphan_count);
1081
1082         /* Have we already been destroyed by a softirq or backlog? */
1083         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1084                 goto out;
1085
1086         if (sk->sk_state == DCCP_CLOSED)
1087                 inet_csk_destroy_sock(sk);
1088
1089         /* Otherwise, socket is reprieved until protocol close. */
1090
1091 out:
1092         bh_unlock_sock(sk);
1093         local_bh_enable();
1094         sock_put(sk);
1095 }
1096
1097 EXPORT_SYMBOL_GPL(dccp_close);
1098
1099 void dccp_shutdown(struct sock *sk, int how)
1100 {
1101         dccp_pr_debug("called shutdown(%x)\n", how);
1102 }
1103
1104 EXPORT_SYMBOL_GPL(dccp_shutdown);
1105
1106 static inline int __init dccp_mib_init(void)
1107 {
1108         dccp_statistics = alloc_percpu(struct dccp_mib);
1109         if (!dccp_statistics)
1110                 return -ENOMEM;
1111         return 0;
1112 }
1113
1114 static inline void dccp_mib_exit(void)
1115 {
1116         free_percpu(dccp_statistics);
1117 }
1118
1119 static int thash_entries;
1120 module_param(thash_entries, int, 0444);
1121 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1122
1123 #ifdef CONFIG_IP_DCCP_DEBUG
1124 bool dccp_debug;
1125 module_param(dccp_debug, bool, 0644);
1126 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1127
1128 EXPORT_SYMBOL_GPL(dccp_debug);
1129 #endif
1130
1131 static int __init dccp_init(void)
1132 {
1133         unsigned long goal;
1134         int ehash_order, bhash_order, i;
1135         int rc;
1136
1137         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1138                      FIELD_SIZEOF(struct sk_buff, cb));
1139         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1140         if (rc)
1141                 goto out_fail;
1142         rc = -ENOBUFS;
1143         inet_hashinfo_init(&dccp_hashinfo);
1144         dccp_hashinfo.bind_bucket_cachep =
1145                 kmem_cache_create("dccp_bind_bucket",
1146                                   sizeof(struct inet_bind_bucket), 0,
1147                                   SLAB_HWCACHE_ALIGN, NULL);
1148         if (!dccp_hashinfo.bind_bucket_cachep)
1149                 goto out_free_percpu;
1150
1151         /*
1152          * Size and allocate the main established and bind bucket
1153          * hash tables.
1154          *
1155          * The methodology is similar to that of the buffer cache.
1156          */
1157         if (totalram_pages >= (128 * 1024))
1158                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1159         else
1160                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1161
1162         if (thash_entries)
1163                 goal = (thash_entries *
1164                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1165         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1166                 ;
1167         do {
1168                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1169                                         sizeof(struct inet_ehash_bucket);
1170
1171                 while (hash_size & (hash_size - 1))
1172                         hash_size--;
1173                 dccp_hashinfo.ehash_mask = hash_size - 1;
1174                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1175                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1176         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1177
1178         if (!dccp_hashinfo.ehash) {
1179                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1180                 goto out_free_bind_bucket_cachep;
1181         }
1182
1183         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1184                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1185
1186         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1187                         goto out_free_dccp_ehash;
1188
1189         bhash_order = ehash_order;
1190
1191         do {
1192                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1193                                         sizeof(struct inet_bind_hashbucket);
1194                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1195                     bhash_order > 0)
1196                         continue;
1197                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1198                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1199         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1200
1201         if (!dccp_hashinfo.bhash) {
1202                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1203                 goto out_free_dccp_locks;
1204         }
1205
1206         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1207                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1208                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1209         }
1210
1211         rc = dccp_mib_init();
1212         if (rc)
1213                 goto out_free_dccp_bhash;
1214
1215         rc = dccp_ackvec_init();
1216         if (rc)
1217                 goto out_free_dccp_mib;
1218
1219         rc = dccp_sysctl_init();
1220         if (rc)
1221                 goto out_ackvec_exit;
1222
1223         rc = ccid_initialize_builtins();
1224         if (rc)
1225                 goto out_sysctl_exit;
1226
1227         dccp_timestamping_init();
1228
1229         return 0;
1230
1231 out_sysctl_exit:
1232         dccp_sysctl_exit();
1233 out_ackvec_exit:
1234         dccp_ackvec_exit();
1235 out_free_dccp_mib:
1236         dccp_mib_exit();
1237 out_free_dccp_bhash:
1238         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1239 out_free_dccp_locks:
1240         inet_ehash_locks_free(&dccp_hashinfo);
1241 out_free_dccp_ehash:
1242         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1243 out_free_bind_bucket_cachep:
1244         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1245 out_free_percpu:
1246         percpu_counter_destroy(&dccp_orphan_count);
1247 out_fail:
1248         dccp_hashinfo.bhash = NULL;
1249         dccp_hashinfo.ehash = NULL;
1250         dccp_hashinfo.bind_bucket_cachep = NULL;
1251         return rc;
1252 }
1253
1254 static void __exit dccp_fini(void)
1255 {
1256         ccid_cleanup_builtins();
1257         dccp_mib_exit();
1258         free_pages((unsigned long)dccp_hashinfo.bhash,
1259                    get_order(dccp_hashinfo.bhash_size *
1260                              sizeof(struct inet_bind_hashbucket)));
1261         free_pages((unsigned long)dccp_hashinfo.ehash,
1262                    get_order((dccp_hashinfo.ehash_mask + 1) *
1263                              sizeof(struct inet_ehash_bucket)));
1264         inet_ehash_locks_free(&dccp_hashinfo);
1265         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1266         dccp_ackvec_exit();
1267         dccp_sysctl_exit();
1268         percpu_counter_destroy(&dccp_orphan_count);
1269 }
1270
1271 module_init(dccp_init);
1272 module_exit(dccp_fini);
1273
1274 MODULE_LICENSE("GPL");
1275 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1276 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");