perf intel-pt: Fix duplicate branch after CBR
[linux-2.6-microblaze.git] / net / mptcp / subflow.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2017 - 2019, Intel Corporation.
5  */
6
7 #define pr_fmt(fmt) "MPTCP: " fmt
8
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/netdevice.h>
12 #include <crypto/algapi.h>
13 #include <crypto/sha.h>
14 #include <net/sock.h>
15 #include <net/inet_common.h>
16 #include <net/inet_hashtables.h>
17 #include <net/protocol.h>
18 #include <net/tcp.h>
19 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
20 #include <net/ip6_route.h>
21 #endif
22 #include <net/mptcp.h>
23 #include "protocol.h"
24 #include "mib.h"
25
26 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27                                   enum linux_mptcp_mib_field field)
28 {
29         MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30 }
31
32 static void subflow_req_destructor(struct request_sock *req)
33 {
34         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
35
36         pr_debug("subflow_req=%p", subflow_req);
37
38         if (subflow_req->msk)
39                 sock_put((struct sock *)subflow_req->msk);
40
41         mptcp_token_destroy_request(req);
42         tcp_request_sock_ops.destructor(req);
43 }
44
45 static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
46                                   void *hmac)
47 {
48         u8 msg[8];
49
50         put_unaligned_be32(nonce1, &msg[0]);
51         put_unaligned_be32(nonce2, &msg[4]);
52
53         mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
54 }
55
56 static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
57 {
58         return mptcp_is_fully_established((void *)msk) &&
59                READ_ONCE(msk->pm.accept_subflow);
60 }
61
62 /* validate received token and create truncated hmac and nonce for SYN-ACK */
63 static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
64                                                      const struct sk_buff *skb)
65 {
66         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
67         u8 hmac[SHA256_DIGEST_SIZE];
68         struct mptcp_sock *msk;
69         int local_id;
70
71         msk = mptcp_token_get_sock(subflow_req->token);
72         if (!msk) {
73                 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
74                 return NULL;
75         }
76
77         local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
78         if (local_id < 0) {
79                 sock_put((struct sock *)msk);
80                 return NULL;
81         }
82         subflow_req->local_id = local_id;
83
84         get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
85
86         subflow_generate_hmac(msk->local_key, msk->remote_key,
87                               subflow_req->local_nonce,
88                               subflow_req->remote_nonce, hmac);
89
90         subflow_req->thmac = get_unaligned_be64(hmac);
91         return msk;
92 }
93
94 static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
95 {
96         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
97
98         subflow_req->mp_capable = 0;
99         subflow_req->mp_join = 0;
100         subflow_req->msk = NULL;
101         mptcp_token_init_request(req);
102
103 #ifdef CONFIG_TCP_MD5SIG
104         /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
105          * TCP option space.
106          */
107         if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
108                 return -EINVAL;
109 #endif
110
111         return 0;
112 }
113
114 static void subflow_init_req(struct request_sock *req,
115                              const struct sock *sk_listener,
116                              struct sk_buff *skb)
117 {
118         struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
119         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
120         struct mptcp_options_received mp_opt;
121         int ret;
122
123         pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
124
125         ret = __subflow_init_req(req, sk_listener);
126         if (ret)
127                 return;
128
129         mptcp_get_options(skb, &mp_opt);
130
131         if (mp_opt.mp_capable) {
132                 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
133
134                 if (mp_opt.mp_join)
135                         return;
136         } else if (mp_opt.mp_join) {
137                 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
138         }
139
140         if (mp_opt.mp_capable && listener->request_mptcp) {
141                 int err, retries = 4;
142
143                 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
144 again:
145                 do {
146                         get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
147                 } while (subflow_req->local_key == 0);
148
149                 if (unlikely(req->syncookie)) {
150                         mptcp_crypto_key_sha(subflow_req->local_key,
151                                              &subflow_req->token,
152                                              &subflow_req->idsn);
153                         if (mptcp_token_exists(subflow_req->token)) {
154                                 if (retries-- > 0)
155                                         goto again;
156                         } else {
157                                 subflow_req->mp_capable = 1;
158                         }
159                         return;
160                 }
161
162                 err = mptcp_token_new_request(req);
163                 if (err == 0)
164                         subflow_req->mp_capable = 1;
165                 else if (retries-- > 0)
166                         goto again;
167
168         } else if (mp_opt.mp_join && listener->request_mptcp) {
169                 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
170                 subflow_req->mp_join = 1;
171                 subflow_req->backup = mp_opt.backup;
172                 subflow_req->remote_id = mp_opt.join_id;
173                 subflow_req->token = mp_opt.token;
174                 subflow_req->remote_nonce = mp_opt.nonce;
175                 subflow_req->msk = subflow_token_join_request(req, skb);
176
177                 if (unlikely(req->syncookie) && subflow_req->msk) {
178                         if (mptcp_can_accept_new_subflow(subflow_req->msk))
179                                 subflow_init_req_cookie_join_save(subflow_req, skb);
180                 }
181
182                 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
183                          subflow_req->remote_nonce, subflow_req->msk);
184         }
185 }
186
187 int mptcp_subflow_init_cookie_req(struct request_sock *req,
188                                   const struct sock *sk_listener,
189                                   struct sk_buff *skb)
190 {
191         struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
192         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
193         struct mptcp_options_received mp_opt;
194         int err;
195
196         err = __subflow_init_req(req, sk_listener);
197         if (err)
198                 return err;
199
200         mptcp_get_options(skb, &mp_opt);
201
202         if (mp_opt.mp_capable && mp_opt.mp_join)
203                 return -EINVAL;
204
205         if (mp_opt.mp_capable && listener->request_mptcp) {
206                 if (mp_opt.sndr_key == 0)
207                         return -EINVAL;
208
209                 subflow_req->local_key = mp_opt.rcvr_key;
210                 err = mptcp_token_new_request(req);
211                 if (err)
212                         return err;
213
214                 subflow_req->mp_capable = 1;
215                 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
216         } else if (mp_opt.mp_join && listener->request_mptcp) {
217                 if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
218                         return -EINVAL;
219
220                 if (mptcp_can_accept_new_subflow(subflow_req->msk))
221                         subflow_req->mp_join = 1;
222
223                 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
224         }
225
226         return 0;
227 }
228 EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
229
230 static void subflow_v4_init_req(struct request_sock *req,
231                                 const struct sock *sk_listener,
232                                 struct sk_buff *skb)
233 {
234         tcp_rsk(req)->is_mptcp = 1;
235
236         tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
237
238         subflow_init_req(req, sk_listener, skb);
239 }
240
241 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
242 static void subflow_v6_init_req(struct request_sock *req,
243                                 const struct sock *sk_listener,
244                                 struct sk_buff *skb)
245 {
246         tcp_rsk(req)->is_mptcp = 1;
247
248         tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
249
250         subflow_init_req(req, sk_listener, skb);
251 }
252 #endif
253
254 /* validate received truncated hmac and create hmac for third ACK */
255 static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
256 {
257         u8 hmac[SHA256_DIGEST_SIZE];
258         u64 thmac;
259
260         subflow_generate_hmac(subflow->remote_key, subflow->local_key,
261                               subflow->remote_nonce, subflow->local_nonce,
262                               hmac);
263
264         thmac = get_unaligned_be64(hmac);
265         pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
266                  subflow, subflow->token,
267                  (unsigned long long)thmac,
268                  (unsigned long long)subflow->thmac);
269
270         return thmac == subflow->thmac;
271 }
272
273 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
274 {
275         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
276         struct mptcp_options_received mp_opt;
277         struct sock *parent = subflow->conn;
278
279         subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
280
281         if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
282                 inet_sk_state_store(parent, TCP_ESTABLISHED);
283                 parent->sk_state_change(parent);
284         }
285
286         /* be sure no special action on any packet other than syn-ack */
287         if (subflow->conn_finished)
288                 return;
289
290         subflow->rel_write_seq = 1;
291         subflow->conn_finished = 1;
292         subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
293         pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
294
295         mptcp_get_options(skb, &mp_opt);
296         if (subflow->request_mptcp) {
297                 if (!mp_opt.mp_capable) {
298                         MPTCP_INC_STATS(sock_net(sk),
299                                         MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
300                         mptcp_do_fallback(sk);
301                         pr_fallback(mptcp_sk(subflow->conn));
302                         goto fallback;
303                 }
304
305                 subflow->mp_capable = 1;
306                 subflow->can_ack = 1;
307                 subflow->remote_key = mp_opt.sndr_key;
308                 pr_debug("subflow=%p, remote_key=%llu", subflow,
309                          subflow->remote_key);
310                 mptcp_finish_connect(sk);
311         } else if (subflow->request_join) {
312                 u8 hmac[SHA256_DIGEST_SIZE];
313
314                 if (!mp_opt.mp_join)
315                         goto do_reset;
316
317                 subflow->thmac = mp_opt.thmac;
318                 subflow->remote_nonce = mp_opt.nonce;
319                 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
320                          subflow->thmac, subflow->remote_nonce);
321
322                 if (!subflow_thmac_valid(subflow)) {
323                         MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
324                         goto do_reset;
325                 }
326
327                 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
328                                       subflow->local_nonce,
329                                       subflow->remote_nonce,
330                                       hmac);
331                 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
332
333                 if (!mptcp_finish_join(sk))
334                         goto do_reset;
335
336                 subflow->mp_join = 1;
337                 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
338         } else if (mptcp_check_fallback(sk)) {
339 fallback:
340                 mptcp_rcv_space_init(mptcp_sk(parent), sk);
341         }
342         return;
343
344 do_reset:
345         tcp_send_active_reset(sk, GFP_ATOMIC);
346         tcp_done(sk);
347 }
348
349 struct request_sock_ops mptcp_subflow_request_sock_ops;
350 EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
351 static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
352
353 static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
354 {
355         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
356
357         pr_debug("subflow=%p", subflow);
358
359         /* Never answer to SYNs sent to broadcast or multicast */
360         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
361                 goto drop;
362
363         return tcp_conn_request(&mptcp_subflow_request_sock_ops,
364                                 &subflow_request_sock_ipv4_ops,
365                                 sk, skb);
366 drop:
367         tcp_listendrop(sk);
368         return 0;
369 }
370
371 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
372 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
373 static struct inet_connection_sock_af_ops subflow_v6_specific;
374 static struct inet_connection_sock_af_ops subflow_v6m_specific;
375
376 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
377 {
378         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
379
380         pr_debug("subflow=%p", subflow);
381
382         if (skb->protocol == htons(ETH_P_IP))
383                 return subflow_v4_conn_request(sk, skb);
384
385         if (!ipv6_unicast_destination(skb))
386                 goto drop;
387
388         return tcp_conn_request(&mptcp_subflow_request_sock_ops,
389                                 &subflow_request_sock_ipv6_ops, sk, skb);
390
391 drop:
392         tcp_listendrop(sk);
393         return 0; /* don't send reset */
394 }
395 #endif
396
397 /* validate hmac received in third ACK */
398 static bool subflow_hmac_valid(const struct request_sock *req,
399                                const struct mptcp_options_received *mp_opt)
400 {
401         const struct mptcp_subflow_request_sock *subflow_req;
402         u8 hmac[SHA256_DIGEST_SIZE];
403         struct mptcp_sock *msk;
404
405         subflow_req = mptcp_subflow_rsk(req);
406         msk = subflow_req->msk;
407         if (!msk)
408                 return false;
409
410         subflow_generate_hmac(msk->remote_key, msk->local_key,
411                               subflow_req->remote_nonce,
412                               subflow_req->local_nonce, hmac);
413
414         return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
415 }
416
417 static void mptcp_sock_destruct(struct sock *sk)
418 {
419         /* if new mptcp socket isn't accepted, it is free'd
420          * from the tcp listener sockets request queue, linked
421          * from req->sk.  The tcp socket is released.
422          * This calls the ULP release function which will
423          * also remove the mptcp socket, via
424          * sock_put(ctx->conn).
425          *
426          * Problem is that the mptcp socket will not be in
427          * SYN_RECV state and doesn't have SOCK_DEAD flag.
428          * Both result in warnings from inet_sock_destruct.
429          */
430
431         if (sk->sk_state == TCP_SYN_RECV) {
432                 sk->sk_state = TCP_CLOSE;
433                 WARN_ON_ONCE(sk->sk_socket);
434                 sock_orphan(sk);
435         }
436
437         mptcp_token_destroy(mptcp_sk(sk));
438         inet_sock_destruct(sk);
439 }
440
441 static void mptcp_force_close(struct sock *sk)
442 {
443         inet_sk_state_store(sk, TCP_CLOSE);
444         sk_common_release(sk);
445 }
446
447 static void subflow_ulp_fallback(struct sock *sk,
448                                  struct mptcp_subflow_context *old_ctx)
449 {
450         struct inet_connection_sock *icsk = inet_csk(sk);
451
452         mptcp_subflow_tcp_fallback(sk, old_ctx);
453         icsk->icsk_ulp_ops = NULL;
454         rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
455         tcp_sk(sk)->is_mptcp = 0;
456 }
457
458 static void subflow_drop_ctx(struct sock *ssk)
459 {
460         struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
461
462         if (!ctx)
463                 return;
464
465         subflow_ulp_fallback(ssk, ctx);
466         if (ctx->conn)
467                 sock_put(ctx->conn);
468
469         kfree_rcu(ctx, rcu);
470 }
471
472 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
473                                      struct mptcp_options_received *mp_opt)
474 {
475         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
476
477         subflow->remote_key = mp_opt->sndr_key;
478         subflow->fully_established = 1;
479         subflow->can_ack = 1;
480         WRITE_ONCE(msk->fully_established, true);
481 }
482
483 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
484                                           struct sk_buff *skb,
485                                           struct request_sock *req,
486                                           struct dst_entry *dst,
487                                           struct request_sock *req_unhash,
488                                           bool *own_req)
489 {
490         struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
491         struct mptcp_subflow_request_sock *subflow_req;
492         struct mptcp_options_received mp_opt;
493         bool fallback, fallback_is_fatal;
494         struct sock *new_msk = NULL;
495         struct sock *child;
496
497         pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
498
499         /* After child creation we must look for 'mp_capable' even when options
500          * are not parsed
501          */
502         mp_opt.mp_capable = 0;
503
504         /* hopefully temporary handling for MP_JOIN+syncookie */
505         subflow_req = mptcp_subflow_rsk(req);
506         fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
507         fallback = !tcp_rsk(req)->is_mptcp;
508         if (fallback)
509                 goto create_child;
510
511         /* if the sk is MP_CAPABLE, we try to fetch the client key */
512         if (subflow_req->mp_capable) {
513                 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
514                         /* here we can receive and accept an in-window,
515                          * out-of-order pkt, which will not carry the MP_CAPABLE
516                          * opt even on mptcp enabled paths
517                          */
518                         goto create_msk;
519                 }
520
521                 mptcp_get_options(skb, &mp_opt);
522                 if (!mp_opt.mp_capable) {
523                         fallback = true;
524                         goto create_child;
525                 }
526
527 create_msk:
528                 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
529                 if (!new_msk)
530                         fallback = true;
531         } else if (subflow_req->mp_join) {
532                 mptcp_get_options(skb, &mp_opt);
533                 if (!mp_opt.mp_join ||
534                     !mptcp_can_accept_new_subflow(subflow_req->msk) ||
535                     !subflow_hmac_valid(req, &mp_opt)) {
536                         SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
537                         fallback = true;
538                 }
539         }
540
541 create_child:
542         child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
543                                                      req_unhash, own_req);
544
545         if (child && *own_req) {
546                 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
547
548                 tcp_rsk(req)->drop_req = false;
549
550                 /* we need to fallback on ctx allocation failure and on pre-reqs
551                  * checking above. In the latter scenario we additionally need
552                  * to reset the context to non MPTCP status.
553                  */
554                 if (!ctx || fallback) {
555                         if (fallback_is_fatal)
556                                 goto dispose_child;
557
558                         subflow_drop_ctx(child);
559                         goto out;
560                 }
561
562                 if (ctx->mp_capable) {
563                         /* this can't race with mptcp_close(), as the msk is
564                          * not yet exposted to user-space
565                          */
566                         inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
567
568                         /* new mpc subflow takes ownership of the newly
569                          * created mptcp socket
570                          */
571                         new_msk->sk_destruct = mptcp_sock_destruct;
572                         mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
573                         mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
574                         ctx->conn = new_msk;
575                         new_msk = NULL;
576
577                         /* with OoO packets we can reach here without ingress
578                          * mpc option
579                          */
580                         if (mp_opt.mp_capable)
581                                 mptcp_subflow_fully_established(ctx, &mp_opt);
582                 } else if (ctx->mp_join) {
583                         struct mptcp_sock *owner;
584
585                         owner = subflow_req->msk;
586                         if (!owner)
587                                 goto dispose_child;
588
589                         /* move the msk reference ownership to the subflow */
590                         subflow_req->msk = NULL;
591                         ctx->conn = (struct sock *)owner;
592                         if (!mptcp_finish_join(child))
593                                 goto dispose_child;
594
595                         SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
596                         tcp_rsk(req)->drop_req = true;
597                 }
598         }
599
600 out:
601         /* dispose of the left over mptcp master, if any */
602         if (unlikely(new_msk))
603                 mptcp_force_close(new_msk);
604
605         /* check for expected invariant - should never trigger, just help
606          * catching eariler subtle bugs
607          */
608         WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
609                      (!mptcp_subflow_ctx(child) ||
610                       !mptcp_subflow_ctx(child)->conn));
611         return child;
612
613 dispose_child:
614         subflow_drop_ctx(child);
615         tcp_rsk(req)->drop_req = true;
616         inet_csk_prepare_for_destroy_sock(child);
617         tcp_done(child);
618         req->rsk_ops->send_reset(sk, skb);
619
620         /* The last child reference will be released by the caller */
621         return child;
622 }
623
624 static struct inet_connection_sock_af_ops subflow_specific;
625
626 enum mapping_status {
627         MAPPING_OK,
628         MAPPING_INVALID,
629         MAPPING_EMPTY,
630         MAPPING_DATA_FIN,
631         MAPPING_DUMMY
632 };
633
634 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
635 {
636         if ((u32)seq == (u32)old_seq)
637                 return old_seq;
638
639         /* Assume map covers data not mapped yet. */
640         return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
641 }
642
643 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
644 {
645         WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
646                   ssn, subflow->map_subflow_seq, subflow->map_data_len);
647 }
648
649 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
650 {
651         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
652         unsigned int skb_consumed;
653
654         skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
655         if (WARN_ON_ONCE(skb_consumed >= skb->len))
656                 return true;
657
658         return skb->len - skb_consumed <= subflow->map_data_len -
659                                           mptcp_subflow_get_map_offset(subflow);
660 }
661
662 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
663 {
664         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
665         u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
666
667         if (unlikely(before(ssn, subflow->map_subflow_seq))) {
668                 /* Mapping covers data later in the subflow stream,
669                  * currently unsupported.
670                  */
671                 warn_bad_map(subflow, ssn);
672                 return false;
673         }
674         if (unlikely(!before(ssn, subflow->map_subflow_seq +
675                                   subflow->map_data_len))) {
676                 /* Mapping does covers past subflow data, invalid */
677                 warn_bad_map(subflow, ssn + skb->len);
678                 return false;
679         }
680         return true;
681 }
682
683 static enum mapping_status get_mapping_status(struct sock *ssk,
684                                               struct mptcp_sock *msk)
685 {
686         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
687         struct mptcp_ext *mpext;
688         struct sk_buff *skb;
689         u16 data_len;
690         u64 map_seq;
691
692         skb = skb_peek(&ssk->sk_receive_queue);
693         if (!skb)
694                 return MAPPING_EMPTY;
695
696         if (mptcp_check_fallback(ssk))
697                 return MAPPING_DUMMY;
698
699         mpext = mptcp_get_ext(skb);
700         if (!mpext || !mpext->use_map) {
701                 if (!subflow->map_valid && !skb->len) {
702                         /* the TCP stack deliver 0 len FIN pkt to the receive
703                          * queue, that is the only 0len pkts ever expected here,
704                          * and we can admit no mapping only for 0 len pkts
705                          */
706                         if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
707                                 WARN_ONCE(1, "0len seq %d:%d flags %x",
708                                           TCP_SKB_CB(skb)->seq,
709                                           TCP_SKB_CB(skb)->end_seq,
710                                           TCP_SKB_CB(skb)->tcp_flags);
711                         sk_eat_skb(ssk, skb);
712                         return MAPPING_EMPTY;
713                 }
714
715                 if (!subflow->map_valid)
716                         return MAPPING_INVALID;
717
718                 goto validate_seq;
719         }
720
721         pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
722                  mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
723                  mpext->data_len, mpext->data_fin);
724
725         data_len = mpext->data_len;
726         if (data_len == 0) {
727                 pr_err("Infinite mapping not handled");
728                 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
729                 return MAPPING_INVALID;
730         }
731
732         if (mpext->data_fin == 1) {
733                 if (data_len == 1) {
734                         mptcp_update_rcv_data_fin(msk, mpext->data_seq);
735                         pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
736                         if (subflow->map_valid) {
737                                 /* A DATA_FIN might arrive in a DSS
738                                  * option before the previous mapping
739                                  * has been fully consumed. Continue
740                                  * handling the existing mapping.
741                                  */
742                                 skb_ext_del(skb, SKB_EXT_MPTCP);
743                                 return MAPPING_OK;
744                         } else {
745                                 return MAPPING_DATA_FIN;
746                         }
747                 } else {
748                         mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
749                         pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
750                 }
751
752                 /* Adjust for DATA_FIN using 1 byte of sequence space */
753                 data_len--;
754         }
755
756         if (!mpext->dsn64) {
757                 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
758                                      mpext->data_seq);
759                 subflow->use_64bit_ack = 0;
760                 pr_debug("expanded seq=%llu", subflow->map_seq);
761         } else {
762                 map_seq = mpext->data_seq;
763                 subflow->use_64bit_ack = 1;
764         }
765
766         if (subflow->map_valid) {
767                 /* Allow replacing only with an identical map */
768                 if (subflow->map_seq == map_seq &&
769                     subflow->map_subflow_seq == mpext->subflow_seq &&
770                     subflow->map_data_len == data_len) {
771                         skb_ext_del(skb, SKB_EXT_MPTCP);
772                         return MAPPING_OK;
773                 }
774
775                 /* If this skb data are fully covered by the current mapping,
776                  * the new map would need caching, which is not supported
777                  */
778                 if (skb_is_fully_mapped(ssk, skb)) {
779                         MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
780                         return MAPPING_INVALID;
781                 }
782
783                 /* will validate the next map after consuming the current one */
784                 return MAPPING_OK;
785         }
786
787         subflow->map_seq = map_seq;
788         subflow->map_subflow_seq = mpext->subflow_seq;
789         subflow->map_data_len = data_len;
790         subflow->map_valid = 1;
791         subflow->mpc_map = mpext->mpc_map;
792         pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
793                  subflow->map_seq, subflow->map_subflow_seq,
794                  subflow->map_data_len);
795
796 validate_seq:
797         /* we revalidate valid mapping on new skb, because we must ensure
798          * the current skb is completely covered by the available mapping
799          */
800         if (!validate_mapping(ssk, skb))
801                 return MAPPING_INVALID;
802
803         skb_ext_del(skb, SKB_EXT_MPTCP);
804         return MAPPING_OK;
805 }
806
807 static int subflow_read_actor(read_descriptor_t *desc,
808                               struct sk_buff *skb,
809                               unsigned int offset, size_t len)
810 {
811         size_t copy_len = min(desc->count, len);
812
813         desc->count -= copy_len;
814
815         pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
816         return copy_len;
817 }
818
819 static bool subflow_check_data_avail(struct sock *ssk)
820 {
821         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
822         enum mapping_status status;
823         struct mptcp_sock *msk;
824         struct sk_buff *skb;
825
826         pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
827                  subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
828         if (subflow->data_avail)
829                 return true;
830
831         msk = mptcp_sk(subflow->conn);
832         for (;;) {
833                 u32 map_remaining;
834                 size_t delta;
835                 u64 ack_seq;
836                 u64 old_ack;
837
838                 status = get_mapping_status(ssk, msk);
839                 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
840                 if (status == MAPPING_INVALID) {
841                         ssk->sk_err = EBADMSG;
842                         goto fatal;
843                 }
844                 if (status == MAPPING_DUMMY) {
845                         __mptcp_do_fallback(msk);
846                         skb = skb_peek(&ssk->sk_receive_queue);
847                         subflow->map_valid = 1;
848                         subflow->map_seq = READ_ONCE(msk->ack_seq);
849                         subflow->map_data_len = skb->len;
850                         subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
851                                                    subflow->ssn_offset;
852                         return true;
853                 }
854
855                 if (status != MAPPING_OK)
856                         return false;
857
858                 skb = skb_peek(&ssk->sk_receive_queue);
859                 if (WARN_ON_ONCE(!skb))
860                         return false;
861
862                 /* if msk lacks the remote key, this subflow must provide an
863                  * MP_CAPABLE-based mapping
864                  */
865                 if (unlikely(!READ_ONCE(msk->can_ack))) {
866                         if (!subflow->mpc_map) {
867                                 ssk->sk_err = EBADMSG;
868                                 goto fatal;
869                         }
870                         WRITE_ONCE(msk->remote_key, subflow->remote_key);
871                         WRITE_ONCE(msk->ack_seq, subflow->map_seq);
872                         WRITE_ONCE(msk->can_ack, true);
873                 }
874
875                 old_ack = READ_ONCE(msk->ack_seq);
876                 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
877                 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
878                          ack_seq);
879                 if (ack_seq == old_ack)
880                         break;
881
882                 /* only accept in-sequence mapping. Old values are spurious
883                  * retransmission; we can hit "future" values on active backup
884                  * subflow switch, we relay on retransmissions to get
885                  * in-sequence data.
886                  * Cuncurrent subflows support will require subflow data
887                  * reordering
888                  */
889                 map_remaining = subflow->map_data_len -
890                                 mptcp_subflow_get_map_offset(subflow);
891                 if (before64(ack_seq, old_ack))
892                         delta = min_t(size_t, old_ack - ack_seq, map_remaining);
893                 else
894                         delta = min_t(size_t, ack_seq - old_ack, map_remaining);
895
896                 /* discard mapped data */
897                 pr_debug("discarding %zu bytes, current map len=%d", delta,
898                          map_remaining);
899                 if (delta) {
900                         read_descriptor_t desc = {
901                                 .count = delta,
902                         };
903                         int ret;
904
905                         ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
906                         if (ret < 0) {
907                                 ssk->sk_err = -ret;
908                                 goto fatal;
909                         }
910                         if (ret < delta)
911                                 return false;
912                         if (delta == map_remaining)
913                                 subflow->map_valid = 0;
914                 }
915         }
916         return true;
917
918 fatal:
919         /* fatal protocol error, close the socket */
920         /* This barrier is coupled with smp_rmb() in tcp_poll() */
921         smp_wmb();
922         ssk->sk_error_report(ssk);
923         tcp_set_state(ssk, TCP_CLOSE);
924         tcp_send_active_reset(ssk, GFP_ATOMIC);
925         return false;
926 }
927
928 bool mptcp_subflow_data_available(struct sock *sk)
929 {
930         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
931         struct sk_buff *skb;
932
933         /* check if current mapping is still valid */
934         if (subflow->map_valid &&
935             mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
936                 subflow->map_valid = 0;
937                 subflow->data_avail = 0;
938
939                 pr_debug("Done with mapping: seq=%u data_len=%u",
940                          subflow->map_subflow_seq,
941                          subflow->map_data_len);
942         }
943
944         if (!subflow_check_data_avail(sk)) {
945                 subflow->data_avail = 0;
946                 return false;
947         }
948
949         skb = skb_peek(&sk->sk_receive_queue);
950         subflow->data_avail = skb &&
951                        before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
952         return subflow->data_avail;
953 }
954
955 /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
956  * not the ssk one.
957  *
958  * In mptcp, rwin is about the mptcp-level connection data.
959  *
960  * Data that is still on the ssk rx queue can thus be ignored,
961  * as far as mptcp peer is concerened that data is still inflight.
962  * DSS ACK is updated when skb is moved to the mptcp rx queue.
963  */
964 void mptcp_space(const struct sock *ssk, int *space, int *full_space)
965 {
966         const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
967         const struct sock *sk = subflow->conn;
968
969         *space = tcp_space(sk);
970         *full_space = tcp_full_space(sk);
971 }
972
973 static void subflow_data_ready(struct sock *sk)
974 {
975         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
976         u16 state = 1 << inet_sk_state_load(sk);
977         struct sock *parent = subflow->conn;
978         struct mptcp_sock *msk;
979
980         msk = mptcp_sk(parent);
981         if (state & TCPF_LISTEN) {
982                 set_bit(MPTCP_DATA_READY, &msk->flags);
983                 parent->sk_data_ready(parent);
984                 return;
985         }
986
987         WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
988                      !subflow->mp_join && !(state & TCPF_CLOSE));
989
990         if (mptcp_subflow_data_available(sk))
991                 mptcp_data_ready(parent, sk);
992 }
993
994 static void subflow_write_space(struct sock *sk)
995 {
996         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
997         struct sock *parent = subflow->conn;
998
999         sk_stream_write_space(sk);
1000         if (sk_stream_is_writeable(sk)) {
1001                 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
1002                 smp_mb__after_atomic();
1003                 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
1004                 sk_stream_write_space(parent);
1005         }
1006 }
1007
1008 static struct inet_connection_sock_af_ops *
1009 subflow_default_af_ops(struct sock *sk)
1010 {
1011 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1012         if (sk->sk_family == AF_INET6)
1013                 return &subflow_v6_specific;
1014 #endif
1015         return &subflow_specific;
1016 }
1017
1018 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1019 void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
1020 {
1021         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1022         struct inet_connection_sock *icsk = inet_csk(sk);
1023         struct inet_connection_sock_af_ops *target;
1024
1025         target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
1026
1027         pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
1028                  subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
1029
1030         if (likely(icsk->icsk_af_ops == target))
1031                 return;
1032
1033         subflow->icsk_af_ops = icsk->icsk_af_ops;
1034         icsk->icsk_af_ops = target;
1035 }
1036 #endif
1037
1038 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
1039                                 struct sockaddr_storage *addr)
1040 {
1041         memset(addr, 0, sizeof(*addr));
1042         addr->ss_family = info->family;
1043         if (addr->ss_family == AF_INET) {
1044                 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
1045
1046                 in_addr->sin_addr = info->addr;
1047                 in_addr->sin_port = info->port;
1048         }
1049 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1050         else if (addr->ss_family == AF_INET6) {
1051                 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
1052
1053                 in6_addr->sin6_addr = info->addr6;
1054                 in6_addr->sin6_port = info->port;
1055         }
1056 #endif
1057 }
1058
1059 int __mptcp_subflow_connect(struct sock *sk, int ifindex,
1060                             const struct mptcp_addr_info *loc,
1061                             const struct mptcp_addr_info *remote)
1062 {
1063         struct mptcp_sock *msk = mptcp_sk(sk);
1064         struct mptcp_subflow_context *subflow;
1065         struct sockaddr_storage addr;
1066         int local_id = loc->id;
1067         struct socket *sf;
1068         struct sock *ssk;
1069         u32 remote_token;
1070         int addrlen;
1071         int err;
1072
1073         if (!mptcp_is_fully_established(sk))
1074                 return -ENOTCONN;
1075
1076         err = mptcp_subflow_create_socket(sk, &sf);
1077         if (err)
1078                 return err;
1079
1080         ssk = sf->sk;
1081         subflow = mptcp_subflow_ctx(ssk);
1082         do {
1083                 get_random_bytes(&subflow->local_nonce, sizeof(u32));
1084         } while (!subflow->local_nonce);
1085
1086         if (!local_id) {
1087                 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
1088                 if (err < 0)
1089                         goto failed;
1090
1091                 local_id = err;
1092         }
1093
1094         subflow->remote_key = msk->remote_key;
1095         subflow->local_key = msk->local_key;
1096         subflow->token = msk->token;
1097         mptcp_info2sockaddr(loc, &addr);
1098
1099         addrlen = sizeof(struct sockaddr_in);
1100 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1101         if (loc->family == AF_INET6)
1102                 addrlen = sizeof(struct sockaddr_in6);
1103 #endif
1104         ssk->sk_bound_dev_if = ifindex;
1105         err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1106         if (err)
1107                 goto failed;
1108
1109         mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1110         pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token,
1111                  local_id);
1112         subflow->remote_token = remote_token;
1113         subflow->local_id = local_id;
1114         subflow->request_join = 1;
1115         subflow->request_bkup = 1;
1116         mptcp_info2sockaddr(remote, &addr);
1117
1118         err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1119         if (err && err != -EINPROGRESS)
1120                 goto failed;
1121
1122         spin_lock_bh(&msk->join_list_lock);
1123         list_add_tail(&subflow->node, &msk->join_list);
1124         spin_unlock_bh(&msk->join_list_lock);
1125
1126         return err;
1127
1128 failed:
1129         sock_release(sf);
1130         return err;
1131 }
1132
1133 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1134 {
1135         struct mptcp_subflow_context *subflow;
1136         struct net *net = sock_net(sk);
1137         struct socket *sf;
1138         int err;
1139
1140         /* un-accepted server sockets can reach here - on bad configuration
1141          * bail early to avoid greater trouble later
1142          */
1143         if (unlikely(!sk->sk_socket))
1144                 return -EINVAL;
1145
1146         err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1147                                &sf);
1148         if (err)
1149                 return err;
1150
1151         lock_sock(sf->sk);
1152
1153         /* kernel sockets do not by default acquire net ref, but TCP timer
1154          * needs it.
1155          */
1156         sf->sk->sk_net_refcnt = 1;
1157         get_net(net);
1158 #ifdef CONFIG_PROC_FS
1159         this_cpu_add(*net->core.sock_inuse, 1);
1160 #endif
1161         err = tcp_set_ulp(sf->sk, "mptcp");
1162         release_sock(sf->sk);
1163
1164         if (err) {
1165                 sock_release(sf);
1166                 return err;
1167         }
1168
1169         /* the newly created socket really belongs to the owning MPTCP master
1170          * socket, even if for additional subflows the allocation is performed
1171          * by a kernel workqueue. Adjust inode references, so that the
1172          * procfs/diag interaces really show this one belonging to the correct
1173          * user.
1174          */
1175         SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1176         SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1177         SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1178
1179         subflow = mptcp_subflow_ctx(sf->sk);
1180         pr_debug("subflow=%p", subflow);
1181
1182         *new_sock = sf;
1183         sock_hold(sk);
1184         subflow->conn = sk;
1185
1186         return 0;
1187 }
1188
1189 static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1190                                                         gfp_t priority)
1191 {
1192         struct inet_connection_sock *icsk = inet_csk(sk);
1193         struct mptcp_subflow_context *ctx;
1194
1195         ctx = kzalloc(sizeof(*ctx), priority);
1196         if (!ctx)
1197                 return NULL;
1198
1199         rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1200         INIT_LIST_HEAD(&ctx->node);
1201
1202         pr_debug("subflow=%p", ctx);
1203
1204         ctx->tcp_sock = sk;
1205
1206         return ctx;
1207 }
1208
1209 static void __subflow_state_change(struct sock *sk)
1210 {
1211         struct socket_wq *wq;
1212
1213         rcu_read_lock();
1214         wq = rcu_dereference(sk->sk_wq);
1215         if (skwq_has_sleeper(wq))
1216                 wake_up_interruptible_all(&wq->wait);
1217         rcu_read_unlock();
1218 }
1219
1220 static bool subflow_is_done(const struct sock *sk)
1221 {
1222         return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1223 }
1224
1225 static void subflow_state_change(struct sock *sk)
1226 {
1227         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1228         struct sock *parent = subflow->conn;
1229
1230         __subflow_state_change(sk);
1231
1232         if (subflow_simultaneous_connect(sk)) {
1233                 mptcp_do_fallback(sk);
1234                 mptcp_rcv_space_init(mptcp_sk(parent), sk);
1235                 pr_fallback(mptcp_sk(parent));
1236                 subflow->conn_finished = 1;
1237                 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1238                         inet_sk_state_store(parent, TCP_ESTABLISHED);
1239                         parent->sk_state_change(parent);
1240                 }
1241         }
1242
1243         /* as recvmsg() does not acquire the subflow socket for ssk selection
1244          * a fin packet carrying a DSS can be unnoticed if we don't trigger
1245          * the data available machinery here.
1246          */
1247         if (mptcp_subflow_data_available(sk))
1248                 mptcp_data_ready(parent, sk);
1249
1250         if (__mptcp_check_fallback(mptcp_sk(parent)) &&
1251             !(parent->sk_shutdown & RCV_SHUTDOWN) &&
1252             !subflow->rx_eof && subflow_is_done(sk)) {
1253                 subflow->rx_eof = 1;
1254                 mptcp_subflow_eof(parent);
1255         }
1256 }
1257
1258 static int subflow_ulp_init(struct sock *sk)
1259 {
1260         struct inet_connection_sock *icsk = inet_csk(sk);
1261         struct mptcp_subflow_context *ctx;
1262         struct tcp_sock *tp = tcp_sk(sk);
1263         int err = 0;
1264
1265         /* disallow attaching ULP to a socket unless it has been
1266          * created with sock_create_kern()
1267          */
1268         if (!sk->sk_kern_sock) {
1269                 err = -EOPNOTSUPP;
1270                 goto out;
1271         }
1272
1273         ctx = subflow_create_ctx(sk, GFP_KERNEL);
1274         if (!ctx) {
1275                 err = -ENOMEM;
1276                 goto out;
1277         }
1278
1279         pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1280
1281         tp->is_mptcp = 1;
1282         ctx->icsk_af_ops = icsk->icsk_af_ops;
1283         icsk->icsk_af_ops = subflow_default_af_ops(sk);
1284         ctx->tcp_data_ready = sk->sk_data_ready;
1285         ctx->tcp_state_change = sk->sk_state_change;
1286         ctx->tcp_write_space = sk->sk_write_space;
1287         sk->sk_data_ready = subflow_data_ready;
1288         sk->sk_write_space = subflow_write_space;
1289         sk->sk_state_change = subflow_state_change;
1290 out:
1291         return err;
1292 }
1293
1294 static void subflow_ulp_release(struct sock *sk)
1295 {
1296         struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1297
1298         if (!ctx)
1299                 return;
1300
1301         if (ctx->conn)
1302                 sock_put(ctx->conn);
1303
1304         kfree_rcu(ctx, rcu);
1305 }
1306
1307 static void subflow_ulp_clone(const struct request_sock *req,
1308                               struct sock *newsk,
1309                               const gfp_t priority)
1310 {
1311         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1312         struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1313         struct mptcp_subflow_context *new_ctx;
1314
1315         if (!tcp_rsk(req)->is_mptcp ||
1316             (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1317                 subflow_ulp_fallback(newsk, old_ctx);
1318                 return;
1319         }
1320
1321         new_ctx = subflow_create_ctx(newsk, priority);
1322         if (!new_ctx) {
1323                 subflow_ulp_fallback(newsk, old_ctx);
1324                 return;
1325         }
1326
1327         new_ctx->conn_finished = 1;
1328         new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1329         new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1330         new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1331         new_ctx->tcp_write_space = old_ctx->tcp_write_space;
1332         new_ctx->rel_write_seq = 1;
1333         new_ctx->tcp_sock = newsk;
1334
1335         if (subflow_req->mp_capable) {
1336                 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1337                  * is fully established only after we receive the remote key
1338                  */
1339                 new_ctx->mp_capable = 1;
1340                 new_ctx->local_key = subflow_req->local_key;
1341                 new_ctx->token = subflow_req->token;
1342                 new_ctx->ssn_offset = subflow_req->ssn_offset;
1343                 new_ctx->idsn = subflow_req->idsn;
1344         } else if (subflow_req->mp_join) {
1345                 new_ctx->ssn_offset = subflow_req->ssn_offset;
1346                 new_ctx->mp_join = 1;
1347                 new_ctx->fully_established = 1;
1348                 new_ctx->backup = subflow_req->backup;
1349                 new_ctx->local_id = subflow_req->local_id;
1350                 new_ctx->token = subflow_req->token;
1351                 new_ctx->thmac = subflow_req->thmac;
1352         }
1353 }
1354
1355 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1356         .name           = "mptcp",
1357         .owner          = THIS_MODULE,
1358         .init           = subflow_ulp_init,
1359         .release        = subflow_ulp_release,
1360         .clone          = subflow_ulp_clone,
1361 };
1362
1363 static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1364 {
1365         subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1366         subflow_ops->slab_name = "request_sock_subflow";
1367
1368         subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1369                                               subflow_ops->obj_size, 0,
1370                                               SLAB_ACCOUNT |
1371                                               SLAB_TYPESAFE_BY_RCU,
1372                                               NULL);
1373         if (!subflow_ops->slab)
1374                 return -ENOMEM;
1375
1376         subflow_ops->destructor = subflow_req_destructor;
1377
1378         return 0;
1379 }
1380
1381 void __init mptcp_subflow_init(void)
1382 {
1383         mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
1384         if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
1385                 panic("MPTCP: failed to init subflow request sock ops\n");
1386
1387         subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1388         subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1389
1390         subflow_specific = ipv4_specific;
1391         subflow_specific.conn_request = subflow_v4_conn_request;
1392         subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1393         subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1394
1395 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1396         subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1397         subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1398
1399         subflow_v6_specific = ipv6_specific;
1400         subflow_v6_specific.conn_request = subflow_v6_conn_request;
1401         subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1402         subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1403
1404         subflow_v6m_specific = subflow_v6_specific;
1405         subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1406         subflow_v6m_specific.send_check = ipv4_specific.send_check;
1407         subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1408         subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1409         subflow_v6m_specific.net_frag_header_len = 0;
1410 #endif
1411
1412         mptcp_diag_subflow_init(&subflow_ulp_ops);
1413
1414         if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1415                 panic("MPTCP: failed to register subflows to ULP\n");
1416 }