Merge tag 'libnvdimm-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
[linux-2.6-microblaze.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
46 EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 fallthrough;
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 void dccp_destruct_common(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180 }
181 EXPORT_SYMBOL_GPL(dccp_destruct_common);
182
183 static void dccp_sk_destruct(struct sock *sk)
184 {
185         dccp_destruct_common(sk);
186         inet_sock_destruct(sk);
187 }
188
189 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
190 {
191         struct dccp_sock *dp = dccp_sk(sk);
192         struct inet_connection_sock *icsk = inet_csk(sk);
193
194         pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, "
195                      "please contact the netdev mailing list\n");
196
197         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
198         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
199         sk->sk_state            = DCCP_CLOSED;
200         sk->sk_write_space      = dccp_write_space;
201         sk->sk_destruct         = dccp_sk_destruct;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
208
209         dccp_init_xmit_timers(sk);
210
211         INIT_LIST_HEAD(&dp->dccps_featneg);
212         /* control socket doesn't need feat nego */
213         if (likely(ctl_sock_initialized))
214                 return dccp_feat_init(sk);
215         return 0;
216 }
217
218 EXPORT_SYMBOL_GPL(dccp_init_sock);
219
220 void dccp_destroy_sock(struct sock *sk)
221 {
222         struct dccp_sock *dp = dccp_sk(sk);
223
224         __skb_queue_purge(&sk->sk_write_queue);
225         if (sk->sk_send_head != NULL) {
226                 kfree_skb(sk->sk_send_head);
227                 sk->sk_send_head = NULL;
228         }
229
230         /* Clean up a referenced DCCP bind bucket. */
231         if (inet_csk(sk)->icsk_bind_hash != NULL)
232                 inet_put_port(sk);
233
234         kfree(dp->dccps_service_list);
235         dp->dccps_service_list = NULL;
236
237         if (dp->dccps_hc_rx_ackvec != NULL) {
238                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                 dp->dccps_hc_rx_ackvec = NULL;
240         }
241         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
242         dp->dccps_hc_rx_ccid = NULL;
243
244         /* clean up feature negotiation state */
245         dccp_feat_list_purge(&dp->dccps_featneg);
246 }
247
248 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
249
250 static inline int dccp_need_reset(int state)
251 {
252         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253                state != DCCP_REQUESTING;
254 }
255
256 int dccp_disconnect(struct sock *sk, int flags)
257 {
258         struct inet_connection_sock *icsk = inet_csk(sk);
259         struct inet_sock *inet = inet_sk(sk);
260         struct dccp_sock *dp = dccp_sk(sk);
261         const int old_state = sk->sk_state;
262
263         if (old_state != DCCP_CLOSED)
264                 dccp_set_state(sk, DCCP_CLOSED);
265
266         /*
267          * This corresponds to the ABORT function of RFC793, sec. 3.8
268          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
269          */
270         if (old_state == DCCP_LISTEN) {
271                 inet_csk_listen_stop(sk);
272         } else if (dccp_need_reset(old_state)) {
273                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274                 sk->sk_err = ECONNRESET;
275         } else if (old_state == DCCP_REQUESTING)
276                 sk->sk_err = ECONNRESET;
277
278         dccp_clear_xmit_timers(sk);
279         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
280         dp->dccps_hc_rx_ccid = NULL;
281
282         __skb_queue_purge(&sk->sk_receive_queue);
283         __skb_queue_purge(&sk->sk_write_queue);
284         if (sk->sk_send_head != NULL) {
285                 __kfree_skb(sk->sk_send_head);
286                 sk->sk_send_head = NULL;
287         }
288
289         inet->inet_dport = 0;
290
291         inet_bhash2_reset_saddr(sk);
292
293         sk->sk_shutdown = 0;
294         sock_reset_flag(sk, SOCK_DONE);
295
296         icsk->icsk_backoff = 0;
297         inet_csk_delack_init(sk);
298         __sk_dst_reset(sk);
299
300         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
301
302         sk_error_report(sk);
303         return 0;
304 }
305
306 EXPORT_SYMBOL_GPL(dccp_disconnect);
307
308 /*
309  *      Wait for a DCCP event.
310  *
311  *      Note that we don't need to lock the socket, as the upper poll layers
312  *      take care of normal races (between the test and the event) and we don't
313  *      go look at any of the socket buffers directly.
314  */
315 __poll_t dccp_poll(struct file *file, struct socket *sock,
316                        poll_table *wait)
317 {
318         struct sock *sk = sock->sk;
319         __poll_t mask;
320         u8 shutdown;
321         int state;
322
323         sock_poll_wait(file, sock, wait);
324
325         state = inet_sk_state_load(sk);
326         if (state == DCCP_LISTEN)
327                 return inet_csk_listen_poll(sk);
328
329         /* Socket is not locked. We are protected from async events
330            by poll logic and correct handling of state changes
331            made by another threads is impossible in any case.
332          */
333
334         mask = 0;
335         if (READ_ONCE(sk->sk_err))
336                 mask = EPOLLERR;
337         shutdown = READ_ONCE(sk->sk_shutdown);
338
339         if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
340                 mask |= EPOLLHUP;
341         if (shutdown & RCV_SHUTDOWN)
342                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
343
344         /* Connected? */
345         if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
346                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
347                         mask |= EPOLLIN | EPOLLRDNORM;
348
349                 if (!(shutdown & SEND_SHUTDOWN)) {
350                         if (sk_stream_is_writeable(sk)) {
351                                 mask |= EPOLLOUT | EPOLLWRNORM;
352                         } else {  /* send SIGIO later */
353                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
354                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
355
356                                 /* Race breaker. If space is freed after
357                                  * wspace test but before the flags are set,
358                                  * IO signal will be lost.
359                                  */
360                                 if (sk_stream_is_writeable(sk))
361                                         mask |= EPOLLOUT | EPOLLWRNORM;
362                         }
363                 }
364         }
365         return mask;
366 }
367 EXPORT_SYMBOL_GPL(dccp_poll);
368
369 int dccp_ioctl(struct sock *sk, int cmd, int *karg)
370 {
371         int rc = -ENOTCONN;
372
373         lock_sock(sk);
374
375         if (sk->sk_state == DCCP_LISTEN)
376                 goto out;
377
378         switch (cmd) {
379         case SIOCOUTQ: {
380                 *karg = sk_wmem_alloc_get(sk);
381                 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
382                  * always 0, comparably to UDP.
383                  */
384
385                 rc = 0;
386         }
387                 break;
388         case SIOCINQ: {
389                 struct sk_buff *skb;
390                 *karg = 0;
391
392                 skb = skb_peek(&sk->sk_receive_queue);
393                 if (skb != NULL) {
394                         /*
395                          * We will only return the amount of this packet since
396                          * that is all that will be read.
397                          */
398                         *karg = skb->len;
399                 }
400                 rc = 0;
401         }
402                 break;
403         default:
404                 rc = -ENOIOCTLCMD;
405                 break;
406         }
407 out:
408         release_sock(sk);
409         return rc;
410 }
411
412 EXPORT_SYMBOL_GPL(dccp_ioctl);
413
414 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
415                                    sockptr_t optval, unsigned int optlen)
416 {
417         struct dccp_sock *dp = dccp_sk(sk);
418         struct dccp_service_list *sl = NULL;
419
420         if (service == DCCP_SERVICE_INVALID_VALUE ||
421             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
422                 return -EINVAL;
423
424         if (optlen > sizeof(service)) {
425                 sl = kmalloc(optlen, GFP_KERNEL);
426                 if (sl == NULL)
427                         return -ENOMEM;
428
429                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
430                 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
431                                 sizeof(service), optlen - sizeof(service)) ||
432                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
433                         kfree(sl);
434                         return -EFAULT;
435                 }
436         }
437
438         lock_sock(sk);
439         dp->dccps_service = service;
440
441         kfree(dp->dccps_service_list);
442
443         dp->dccps_service_list = sl;
444         release_sock(sk);
445         return 0;
446 }
447
448 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
449 {
450         u8 *list, len;
451         int i, rc;
452
453         if (cscov < 0 || cscov > 15)
454                 return -EINVAL;
455         /*
456          * Populate a list of permissible values, in the range cscov...15. This
457          * is necessary since feature negotiation of single values only works if
458          * both sides incidentally choose the same value. Since the list starts
459          * lowest-value first, negotiation will pick the smallest shared value.
460          */
461         if (cscov == 0)
462                 return 0;
463         len = 16 - cscov;
464
465         list = kmalloc(len, GFP_KERNEL);
466         if (list == NULL)
467                 return -ENOBUFS;
468
469         for (i = 0; i < len; i++)
470                 list[i] = cscov++;
471
472         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
473
474         if (rc == 0) {
475                 if (rx)
476                         dccp_sk(sk)->dccps_pcrlen = cscov;
477                 else
478                         dccp_sk(sk)->dccps_pcslen = cscov;
479         }
480         kfree(list);
481         return rc;
482 }
483
484 static int dccp_setsockopt_ccid(struct sock *sk, int type,
485                                 sockptr_t optval, unsigned int optlen)
486 {
487         u8 *val;
488         int rc = 0;
489
490         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
491                 return -EINVAL;
492
493         val = memdup_sockptr(optval, optlen);
494         if (IS_ERR(val))
495                 return PTR_ERR(val);
496
497         lock_sock(sk);
498         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
499                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
500
501         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
502                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
503         release_sock(sk);
504
505         kfree(val);
506         return rc;
507 }
508
509 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
510                 sockptr_t optval, unsigned int optlen)
511 {
512         struct dccp_sock *dp = dccp_sk(sk);
513         int val, err = 0;
514
515         switch (optname) {
516         case DCCP_SOCKOPT_PACKET_SIZE:
517                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
518                 return 0;
519         case DCCP_SOCKOPT_CHANGE_L:
520         case DCCP_SOCKOPT_CHANGE_R:
521                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
522                 return 0;
523         case DCCP_SOCKOPT_CCID:
524         case DCCP_SOCKOPT_RX_CCID:
525         case DCCP_SOCKOPT_TX_CCID:
526                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
527         }
528
529         if (optlen < (int)sizeof(int))
530                 return -EINVAL;
531
532         if (copy_from_sockptr(&val, optval, sizeof(int)))
533                 return -EFAULT;
534
535         if (optname == DCCP_SOCKOPT_SERVICE)
536                 return dccp_setsockopt_service(sk, val, optval, optlen);
537
538         lock_sock(sk);
539         switch (optname) {
540         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
541                 if (dp->dccps_role != DCCP_ROLE_SERVER)
542                         err = -EOPNOTSUPP;
543                 else
544                         dp->dccps_server_timewait = (val != 0);
545                 break;
546         case DCCP_SOCKOPT_SEND_CSCOV:
547                 err = dccp_setsockopt_cscov(sk, val, false);
548                 break;
549         case DCCP_SOCKOPT_RECV_CSCOV:
550                 err = dccp_setsockopt_cscov(sk, val, true);
551                 break;
552         case DCCP_SOCKOPT_QPOLICY_ID:
553                 if (sk->sk_state != DCCP_CLOSED)
554                         err = -EISCONN;
555                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
556                         err = -EINVAL;
557                 else
558                         dp->dccps_qpolicy = val;
559                 break;
560         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
561                 if (val < 0)
562                         err = -EINVAL;
563                 else
564                         dp->dccps_tx_qlen = val;
565                 break;
566         default:
567                 err = -ENOPROTOOPT;
568                 break;
569         }
570         release_sock(sk);
571
572         return err;
573 }
574
575 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
576                     unsigned int optlen)
577 {
578         if (level != SOL_DCCP)
579                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
580                                                              optname, optval,
581                                                              optlen);
582         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
583 }
584
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
586
587 static int dccp_getsockopt_service(struct sock *sk, int len,
588                                    __be32 __user *optval,
589                                    int __user *optlen)
590 {
591         const struct dccp_sock *dp = dccp_sk(sk);
592         const struct dccp_service_list *sl;
593         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
594
595         lock_sock(sk);
596         if ((sl = dp->dccps_service_list) != NULL) {
597                 slen = sl->dccpsl_nr * sizeof(u32);
598                 total_len += slen;
599         }
600
601         err = -EINVAL;
602         if (total_len > len)
603                 goto out;
604
605         err = 0;
606         if (put_user(total_len, optlen) ||
607             put_user(dp->dccps_service, optval) ||
608             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
609                 err = -EFAULT;
610 out:
611         release_sock(sk);
612         return err;
613 }
614
615 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
616                     char __user *optval, int __user *optlen)
617 {
618         struct dccp_sock *dp;
619         int val, len;
620
621         if (get_user(len, optlen))
622                 return -EFAULT;
623
624         if (len < (int)sizeof(int))
625                 return -EINVAL;
626
627         dp = dccp_sk(sk);
628
629         switch (optname) {
630         case DCCP_SOCKOPT_PACKET_SIZE:
631                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
632                 return 0;
633         case DCCP_SOCKOPT_SERVICE:
634                 return dccp_getsockopt_service(sk, len,
635                                                (__be32 __user *)optval, optlen);
636         case DCCP_SOCKOPT_GET_CUR_MPS:
637                 val = READ_ONCE(dp->dccps_mss_cache);
638                 break;
639         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
640                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
641         case DCCP_SOCKOPT_TX_CCID:
642                 val = ccid_get_current_tx_ccid(dp);
643                 if (val < 0)
644                         return -ENOPROTOOPT;
645                 break;
646         case DCCP_SOCKOPT_RX_CCID:
647                 val = ccid_get_current_rx_ccid(dp);
648                 if (val < 0)
649                         return -ENOPROTOOPT;
650                 break;
651         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
652                 val = dp->dccps_server_timewait;
653                 break;
654         case DCCP_SOCKOPT_SEND_CSCOV:
655                 val = dp->dccps_pcslen;
656                 break;
657         case DCCP_SOCKOPT_RECV_CSCOV:
658                 val = dp->dccps_pcrlen;
659                 break;
660         case DCCP_SOCKOPT_QPOLICY_ID:
661                 val = dp->dccps_qpolicy;
662                 break;
663         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
664                 val = dp->dccps_tx_qlen;
665                 break;
666         case 128 ... 191:
667                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
668                                              len, (u32 __user *)optval, optlen);
669         case 192 ... 255:
670                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
671                                              len, (u32 __user *)optval, optlen);
672         default:
673                 return -ENOPROTOOPT;
674         }
675
676         len = sizeof(val);
677         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
678                 return -EFAULT;
679
680         return 0;
681 }
682
683 int dccp_getsockopt(struct sock *sk, int level, int optname,
684                     char __user *optval, int __user *optlen)
685 {
686         if (level != SOL_DCCP)
687                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
688                                                              optname, optval,
689                                                              optlen);
690         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
691 }
692
693 EXPORT_SYMBOL_GPL(dccp_getsockopt);
694
695 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
696 {
697         struct cmsghdr *cmsg;
698
699         /*
700          * Assign an (opaque) qpolicy priority value to skb->priority.
701          *
702          * We are overloading this skb field for use with the qpolicy subystem.
703          * The skb->priority is normally used for the SO_PRIORITY option, which
704          * is initialised from sk_priority. Since the assignment of sk_priority
705          * to skb->priority happens later (on layer 3), we overload this field
706          * for use with queueing priorities as long as the skb is on layer 4.
707          * The default priority value (if nothing is set) is 0.
708          */
709         skb->priority = 0;
710
711         for_each_cmsghdr(cmsg, msg) {
712                 if (!CMSG_OK(msg, cmsg))
713                         return -EINVAL;
714
715                 if (cmsg->cmsg_level != SOL_DCCP)
716                         continue;
717
718                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
719                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
720                         return -EINVAL;
721
722                 switch (cmsg->cmsg_type) {
723                 case DCCP_SCM_PRIORITY:
724                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
725                                 return -EINVAL;
726                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
727                         break;
728                 default:
729                         return -EINVAL;
730                 }
731         }
732         return 0;
733 }
734
735 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
736 {
737         const struct dccp_sock *dp = dccp_sk(sk);
738         const int flags = msg->msg_flags;
739         const int noblock = flags & MSG_DONTWAIT;
740         struct sk_buff *skb;
741         int rc, size;
742         long timeo;
743
744         trace_dccp_probe(sk, len);
745
746         if (len > READ_ONCE(dp->dccps_mss_cache))
747                 return -EMSGSIZE;
748
749         lock_sock(sk);
750
751         timeo = sock_sndtimeo(sk, noblock);
752
753         /*
754          * We have to use sk_stream_wait_connect here to set sk_write_pending,
755          * so that the trick in dccp_rcv_request_sent_state_process.
756          */
757         /* Wait for a connection to finish. */
758         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
759                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
760                         goto out_release;
761
762         size = sk->sk_prot->max_header + len;
763         release_sock(sk);
764         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
765         lock_sock(sk);
766         if (skb == NULL)
767                 goto out_release;
768
769         if (dccp_qpolicy_full(sk)) {
770                 rc = -EAGAIN;
771                 goto out_discard;
772         }
773
774         if (sk->sk_state == DCCP_CLOSED) {
775                 rc = -ENOTCONN;
776                 goto out_discard;
777         }
778
779         /* We need to check dccps_mss_cache after socket is locked. */
780         if (len > dp->dccps_mss_cache) {
781                 rc = -EMSGSIZE;
782                 goto out_discard;
783         }
784
785         skb_reserve(skb, sk->sk_prot->max_header);
786         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
787         if (rc != 0)
788                 goto out_discard;
789
790         rc = dccp_msghdr_parse(msg, skb);
791         if (rc != 0)
792                 goto out_discard;
793
794         dccp_qpolicy_push(sk, skb);
795         /*
796          * The xmit_timer is set if the TX CCID is rate-based and will expire
797          * when congestion control permits to release further packets into the
798          * network. Window-based CCIDs do not use this timer.
799          */
800         if (!timer_pending(&dp->dccps_xmit_timer))
801                 dccp_write_xmit(sk);
802 out_release:
803         release_sock(sk);
804         return rc ? : len;
805 out_discard:
806         kfree_skb(skb);
807         goto out_release;
808 }
809
810 EXPORT_SYMBOL_GPL(dccp_sendmsg);
811
812 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
813                  int *addr_len)
814 {
815         const struct dccp_hdr *dh;
816         long timeo;
817
818         lock_sock(sk);
819
820         if (sk->sk_state == DCCP_LISTEN) {
821                 len = -ENOTCONN;
822                 goto out;
823         }
824
825         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
826
827         do {
828                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
829
830                 if (skb == NULL)
831                         goto verify_sock_status;
832
833                 dh = dccp_hdr(skb);
834
835                 switch (dh->dccph_type) {
836                 case DCCP_PKT_DATA:
837                 case DCCP_PKT_DATAACK:
838                         goto found_ok_skb;
839
840                 case DCCP_PKT_CLOSE:
841                 case DCCP_PKT_CLOSEREQ:
842                         if (!(flags & MSG_PEEK))
843                                 dccp_finish_passive_close(sk);
844                         fallthrough;
845                 case DCCP_PKT_RESET:
846                         dccp_pr_debug("found fin (%s) ok!\n",
847                                       dccp_packet_name(dh->dccph_type));
848                         len = 0;
849                         goto found_fin_ok;
850                 default:
851                         dccp_pr_debug("packet_type=%s\n",
852                                       dccp_packet_name(dh->dccph_type));
853                         sk_eat_skb(sk, skb);
854                 }
855 verify_sock_status:
856                 if (sock_flag(sk, SOCK_DONE)) {
857                         len = 0;
858                         break;
859                 }
860
861                 if (sk->sk_err) {
862                         len = sock_error(sk);
863                         break;
864                 }
865
866                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
867                         len = 0;
868                         break;
869                 }
870
871                 if (sk->sk_state == DCCP_CLOSED) {
872                         if (!sock_flag(sk, SOCK_DONE)) {
873                                 /* This occurs when user tries to read
874                                  * from never connected socket.
875                                  */
876                                 len = -ENOTCONN;
877                                 break;
878                         }
879                         len = 0;
880                         break;
881                 }
882
883                 if (!timeo) {
884                         len = -EAGAIN;
885                         break;
886                 }
887
888                 if (signal_pending(current)) {
889                         len = sock_intr_errno(timeo);
890                         break;
891                 }
892
893                 sk_wait_data(sk, &timeo, NULL);
894                 continue;
895         found_ok_skb:
896                 if (len > skb->len)
897                         len = skb->len;
898                 else if (len < skb->len)
899                         msg->msg_flags |= MSG_TRUNC;
900
901                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
902                         /* Exception. Bailout! */
903                         len = -EFAULT;
904                         break;
905                 }
906                 if (flags & MSG_TRUNC)
907                         len = skb->len;
908         found_fin_ok:
909                 if (!(flags & MSG_PEEK))
910                         sk_eat_skb(sk, skb);
911                 break;
912         } while (1);
913 out:
914         release_sock(sk);
915         return len;
916 }
917
918 EXPORT_SYMBOL_GPL(dccp_recvmsg);
919
920 int inet_dccp_listen(struct socket *sock, int backlog)
921 {
922         struct sock *sk = sock->sk;
923         unsigned char old_state;
924         int err;
925
926         lock_sock(sk);
927
928         err = -EINVAL;
929         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
930                 goto out;
931
932         old_state = sk->sk_state;
933         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
934                 goto out;
935
936         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
937         /* Really, if the socket is already in listen state
938          * we can only allow the backlog to be adjusted.
939          */
940         if (old_state != DCCP_LISTEN) {
941                 struct dccp_sock *dp = dccp_sk(sk);
942
943                 dp->dccps_role = DCCP_ROLE_LISTEN;
944
945                 /* do not start to listen if feature negotiation setup fails */
946                 if (dccp_feat_finalise_settings(dp)) {
947                         err = -EPROTO;
948                         goto out;
949                 }
950
951                 err = inet_csk_listen_start(sk);
952                 if (err)
953                         goto out;
954         }
955         err = 0;
956
957 out:
958         release_sock(sk);
959         return err;
960 }
961
962 EXPORT_SYMBOL_GPL(inet_dccp_listen);
963
964 static void dccp_terminate_connection(struct sock *sk)
965 {
966         u8 next_state = DCCP_CLOSED;
967
968         switch (sk->sk_state) {
969         case DCCP_PASSIVE_CLOSE:
970         case DCCP_PASSIVE_CLOSEREQ:
971                 dccp_finish_passive_close(sk);
972                 break;
973         case DCCP_PARTOPEN:
974                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
975                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
976                 fallthrough;
977         case DCCP_OPEN:
978                 dccp_send_close(sk, 1);
979
980                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
981                     !dccp_sk(sk)->dccps_server_timewait)
982                         next_state = DCCP_ACTIVE_CLOSEREQ;
983                 else
984                         next_state = DCCP_CLOSING;
985                 fallthrough;
986         default:
987                 dccp_set_state(sk, next_state);
988         }
989 }
990
991 void dccp_close(struct sock *sk, long timeout)
992 {
993         struct dccp_sock *dp = dccp_sk(sk);
994         struct sk_buff *skb;
995         u32 data_was_unread = 0;
996         int state;
997
998         lock_sock(sk);
999
1000         sk->sk_shutdown = SHUTDOWN_MASK;
1001
1002         if (sk->sk_state == DCCP_LISTEN) {
1003                 dccp_set_state(sk, DCCP_CLOSED);
1004
1005                 /* Special case. */
1006                 inet_csk_listen_stop(sk);
1007
1008                 goto adjudge_to_death;
1009         }
1010
1011         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1012
1013         /*
1014          * We need to flush the recv. buffs.  We do this only on the
1015          * descriptor close, not protocol-sourced closes, because the
1016           *reader process may not have drained the data yet!
1017          */
1018         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1019                 data_was_unread += skb->len;
1020                 __kfree_skb(skb);
1021         }
1022
1023         /* If socket has been already reset kill it. */
1024         if (sk->sk_state == DCCP_CLOSED)
1025                 goto adjudge_to_death;
1026
1027         if (data_was_unread) {
1028                 /* Unread data was tossed, send an appropriate Reset Code */
1029                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1030                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1031                 dccp_set_state(sk, DCCP_CLOSED);
1032         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1033                 /* Check zero linger _after_ checking for unread data. */
1034                 sk->sk_prot->disconnect(sk, 0);
1035         } else if (sk->sk_state != DCCP_CLOSED) {
1036                 /*
1037                  * Normal connection termination. May need to wait if there are
1038                  * still packets in the TX queue that are delayed by the CCID.
1039                  */
1040                 dccp_flush_write_queue(sk, &timeout);
1041                 dccp_terminate_connection(sk);
1042         }
1043
1044         /*
1045          * Flush write queue. This may be necessary in several cases:
1046          * - we have been closed by the peer but still have application data;
1047          * - abortive termination (unread data or zero linger time),
1048          * - normal termination but queue could not be flushed within time limit
1049          */
1050         __skb_queue_purge(&sk->sk_write_queue);
1051
1052         sk_stream_wait_close(sk, timeout);
1053
1054 adjudge_to_death:
1055         state = sk->sk_state;
1056         sock_hold(sk);
1057         sock_orphan(sk);
1058
1059         /*
1060          * It is the last release_sock in its life. It will remove backlog.
1061          */
1062         release_sock(sk);
1063         /*
1064          * Now socket is owned by kernel and we acquire BH lock
1065          * to finish close. No need to check for user refs.
1066          */
1067         local_bh_disable();
1068         bh_lock_sock(sk);
1069         WARN_ON(sock_owned_by_user(sk));
1070
1071         this_cpu_inc(dccp_orphan_count);
1072
1073         /* Have we already been destroyed by a softirq or backlog? */
1074         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1075                 goto out;
1076
1077         if (sk->sk_state == DCCP_CLOSED)
1078                 inet_csk_destroy_sock(sk);
1079
1080         /* Otherwise, socket is reprieved until protocol close. */
1081
1082 out:
1083         bh_unlock_sock(sk);
1084         local_bh_enable();
1085         sock_put(sk);
1086 }
1087
1088 EXPORT_SYMBOL_GPL(dccp_close);
1089
1090 void dccp_shutdown(struct sock *sk, int how)
1091 {
1092         dccp_pr_debug("called shutdown(%x)\n", how);
1093 }
1094
1095 EXPORT_SYMBOL_GPL(dccp_shutdown);
1096
1097 static inline int __init dccp_mib_init(void)
1098 {
1099         dccp_statistics = alloc_percpu(struct dccp_mib);
1100         if (!dccp_statistics)
1101                 return -ENOMEM;
1102         return 0;
1103 }
1104
1105 static inline void dccp_mib_exit(void)
1106 {
1107         free_percpu(dccp_statistics);
1108 }
1109
1110 static int thash_entries;
1111 module_param(thash_entries, int, 0444);
1112 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1113
1114 #ifdef CONFIG_IP_DCCP_DEBUG
1115 bool dccp_debug;
1116 module_param(dccp_debug, bool, 0644);
1117 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1118
1119 EXPORT_SYMBOL_GPL(dccp_debug);
1120 #endif
1121
1122 static int __init dccp_init(void)
1123 {
1124         unsigned long goal;
1125         unsigned long nr_pages = totalram_pages();
1126         int ehash_order, bhash_order, i;
1127         int rc;
1128
1129         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1130                      sizeof_field(struct sk_buff, cb));
1131         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1132         if (rc)
1133                 goto out_fail;
1134         rc = -ENOBUFS;
1135         dccp_hashinfo.bind_bucket_cachep =
1136                 kmem_cache_create("dccp_bind_bucket",
1137                                   sizeof(struct inet_bind_bucket), 0,
1138                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1139         if (!dccp_hashinfo.bind_bucket_cachep)
1140                 goto out_free_hashinfo2;
1141         dccp_hashinfo.bind2_bucket_cachep =
1142                 kmem_cache_create("dccp_bind2_bucket",
1143                                   sizeof(struct inet_bind2_bucket), 0,
1144                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1145         if (!dccp_hashinfo.bind2_bucket_cachep)
1146                 goto out_free_bind_bucket_cachep;
1147
1148         /*
1149          * Size and allocate the main established and bind bucket
1150          * hash tables.
1151          *
1152          * The methodology is similar to that of the buffer cache.
1153          */
1154         if (nr_pages >= (128 * 1024))
1155                 goal = nr_pages >> (21 - PAGE_SHIFT);
1156         else
1157                 goal = nr_pages >> (23 - PAGE_SHIFT);
1158
1159         if (thash_entries)
1160                 goal = (thash_entries *
1161                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1162         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1163                 ;
1164         do {
1165                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1166                                         sizeof(struct inet_ehash_bucket);
1167
1168                 while (hash_size & (hash_size - 1))
1169                         hash_size--;
1170                 dccp_hashinfo.ehash_mask = hash_size - 1;
1171                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1172                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1173         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1174
1175         if (!dccp_hashinfo.ehash) {
1176                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1177                 goto out_free_bind2_bucket_cachep;
1178         }
1179
1180         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1181                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1182
1183         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1184                         goto out_free_dccp_ehash;
1185
1186         bhash_order = ehash_order;
1187
1188         do {
1189                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1190                                         sizeof(struct inet_bind_hashbucket);
1191                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1192                     bhash_order > 0)
1193                         continue;
1194                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1195                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1196         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1197
1198         if (!dccp_hashinfo.bhash) {
1199                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1200                 goto out_free_dccp_locks;
1201         }
1202
1203         dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
1204                 __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
1205
1206         if (!dccp_hashinfo.bhash2) {
1207                 DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
1208                 goto out_free_dccp_bhash;
1209         }
1210
1211         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1212                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1213                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1214                 spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
1215                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
1216         }
1217
1218         dccp_hashinfo.pernet = false;
1219
1220         rc = dccp_mib_init();
1221         if (rc)
1222                 goto out_free_dccp_bhash2;
1223
1224         rc = dccp_ackvec_init();
1225         if (rc)
1226                 goto out_free_dccp_mib;
1227
1228         rc = dccp_sysctl_init();
1229         if (rc)
1230                 goto out_ackvec_exit;
1231
1232         rc = ccid_initialize_builtins();
1233         if (rc)
1234                 goto out_sysctl_exit;
1235
1236         dccp_timestamping_init();
1237
1238         return 0;
1239
1240 out_sysctl_exit:
1241         dccp_sysctl_exit();
1242 out_ackvec_exit:
1243         dccp_ackvec_exit();
1244 out_free_dccp_mib:
1245         dccp_mib_exit();
1246 out_free_dccp_bhash2:
1247         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1248 out_free_dccp_bhash:
1249         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1250 out_free_dccp_locks:
1251         inet_ehash_locks_free(&dccp_hashinfo);
1252 out_free_dccp_ehash:
1253         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1254 out_free_bind2_bucket_cachep:
1255         kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
1256 out_free_bind_bucket_cachep:
1257         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1258 out_free_hashinfo2:
1259         inet_hashinfo2_free_mod(&dccp_hashinfo);
1260 out_fail:
1261         dccp_hashinfo.bhash = NULL;
1262         dccp_hashinfo.bhash2 = NULL;
1263         dccp_hashinfo.ehash = NULL;
1264         dccp_hashinfo.bind_bucket_cachep = NULL;
1265         dccp_hashinfo.bind2_bucket_cachep = NULL;
1266         return rc;
1267 }
1268
1269 static void __exit dccp_fini(void)
1270 {
1271         int bhash_order = get_order(dccp_hashinfo.bhash_size *
1272                                     sizeof(struct inet_bind_hashbucket));
1273
1274         ccid_cleanup_builtins();
1275         dccp_mib_exit();
1276         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1277         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1278         free_pages((unsigned long)dccp_hashinfo.ehash,
1279                    get_order((dccp_hashinfo.ehash_mask + 1) *
1280                              sizeof(struct inet_ehash_bucket)));
1281         inet_ehash_locks_free(&dccp_hashinfo);
1282         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1283         dccp_ackvec_exit();
1284         dccp_sysctl_exit();
1285         inet_hashinfo2_free_mod(&dccp_hashinfo);
1286 }
1287
1288 module_init(dccp_init);
1289 module_exit(dccp_fini);
1290
1291 MODULE_LICENSE("GPL");
1292 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1293 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");