c7eb61d0564c915062fc974963eb2adc018b34e3
[linux-2.6-microblaze.git] / net / mptcp / options.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2017 - 2019, Intel Corporation.
5  */
6
7 #define pr_fmt(fmt) "MPTCP: " fmt
8
9 #include <linux/kernel.h>
10 #include <crypto/sha2.h>
11 #include <net/tcp.h>
12 #include <net/mptcp.h>
13 #include "protocol.h"
14 #include "mib.h"
15
16 static bool mptcp_cap_flag_sha256(u8 flags)
17 {
18         return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
19 }
20
21 static void mptcp_parse_option(const struct sk_buff *skb,
22                                const unsigned char *ptr, int opsize,
23                                struct mptcp_options_received *mp_opt)
24 {
25         u8 subtype = *ptr >> 4;
26         int expected_opsize;
27         u8 version;
28         u8 flags;
29         u8 i;
30
31         switch (subtype) {
32         case MPTCPOPT_MP_CAPABLE:
33                 /* strict size checking */
34                 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
35                         if (skb->len > tcp_hdr(skb)->doff << 2)
36                                 expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
37                         else
38                                 expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
39                 } else {
40                         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
41                                 expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
42                         else
43                                 expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
44                 }
45                 if (opsize != expected_opsize)
46                         break;
47
48                 /* try to be gentle vs future versions on the initial syn */
49                 version = *ptr++ & MPTCP_VERSION_MASK;
50                 if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
51                         if (version != MPTCP_SUPPORTED_VERSION)
52                                 break;
53                 } else if (version < MPTCP_SUPPORTED_VERSION) {
54                         break;
55                 }
56
57                 flags = *ptr++;
58                 if (!mptcp_cap_flag_sha256(flags) ||
59                     (flags & MPTCP_CAP_EXTENSIBILITY))
60                         break;
61
62                 /* RFC 6824, Section 3.1:
63                  * "For the Checksum Required bit (labeled "A"), if either
64                  * host requires the use of checksums, checksums MUST be used.
65                  * In other words, the only way for checksums not to be used
66                  * is if both hosts in their SYNs set A=0."
67                  *
68                  * Section 3.3.0:
69                  * "If a checksum is not present when its use has been
70                  * negotiated, the receiver MUST close the subflow with a RST as
71                  * it is considered broken."
72                  *
73                  * We don't implement DSS checksum - fall back to TCP.
74                  */
75                 if (flags & MPTCP_CAP_CHECKSUM_REQD)
76                         break;
77
78                 mp_opt->mp_capable = 1;
79                 if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
80                         mp_opt->sndr_key = get_unaligned_be64(ptr);
81                         ptr += 8;
82                 }
83                 if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
84                         mp_opt->rcvr_key = get_unaligned_be64(ptr);
85                         ptr += 8;
86                 }
87                 if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
88                         /* Section 3.1.:
89                          * "the data parameters in a MP_CAPABLE are semantically
90                          * equivalent to those in a DSS option and can be used
91                          * interchangeably."
92                          */
93                         mp_opt->dss = 1;
94                         mp_opt->use_map = 1;
95                         mp_opt->mpc_map = 1;
96                         mp_opt->data_len = get_unaligned_be16(ptr);
97                         ptr += 2;
98                 }
99                 pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
100                          version, flags, opsize, mp_opt->sndr_key,
101                          mp_opt->rcvr_key, mp_opt->data_len);
102                 break;
103
104         case MPTCPOPT_MP_JOIN:
105                 mp_opt->mp_join = 1;
106                 if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
107                         mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
108                         mp_opt->join_id = *ptr++;
109                         mp_opt->token = get_unaligned_be32(ptr);
110                         ptr += 4;
111                         mp_opt->nonce = get_unaligned_be32(ptr);
112                         ptr += 4;
113                         pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
114                                  mp_opt->backup, mp_opt->join_id,
115                                  mp_opt->token, mp_opt->nonce);
116                 } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
117                         mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
118                         mp_opt->join_id = *ptr++;
119                         mp_opt->thmac = get_unaligned_be64(ptr);
120                         ptr += 8;
121                         mp_opt->nonce = get_unaligned_be32(ptr);
122                         ptr += 4;
123                         pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
124                                  mp_opt->backup, mp_opt->join_id,
125                                  mp_opt->thmac, mp_opt->nonce);
126                 } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
127                         ptr += 2;
128                         memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
129                         pr_debug("MP_JOIN hmac");
130                 } else {
131                         pr_warn("MP_JOIN bad option size");
132                         mp_opt->mp_join = 0;
133                 }
134                 break;
135
136         case MPTCPOPT_DSS:
137                 pr_debug("DSS");
138                 ptr++;
139
140                 /* we must clear 'mpc_map' be able to detect MP_CAPABLE
141                  * map vs DSS map in mptcp_incoming_options(), and reconstruct
142                  * map info accordingly
143                  */
144                 mp_opt->mpc_map = 0;
145                 flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
146                 mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
147                 mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
148                 mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0;
149                 mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
150                 mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
151
152                 pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
153                          mp_opt->data_fin, mp_opt->dsn64,
154                          mp_opt->use_map, mp_opt->ack64,
155                          mp_opt->use_ack);
156
157                 expected_opsize = TCPOLEN_MPTCP_DSS_BASE;
158
159                 if (mp_opt->use_ack) {
160                         if (mp_opt->ack64)
161                                 expected_opsize += TCPOLEN_MPTCP_DSS_ACK64;
162                         else
163                                 expected_opsize += TCPOLEN_MPTCP_DSS_ACK32;
164                 }
165
166                 if (mp_opt->use_map) {
167                         if (mp_opt->dsn64)
168                                 expected_opsize += TCPOLEN_MPTCP_DSS_MAP64;
169                         else
170                                 expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
171                 }
172
173                 /* RFC 6824, Section 3.3:
174                  * If a checksum is present, but its use had
175                  * not been negotiated in the MP_CAPABLE handshake,
176                  * the checksum field MUST be ignored.
177                  */
178                 if (opsize != expected_opsize &&
179                     opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
180                         break;
181
182                 mp_opt->dss = 1;
183
184                 if (mp_opt->use_ack) {
185                         if (mp_opt->ack64) {
186                                 mp_opt->data_ack = get_unaligned_be64(ptr);
187                                 ptr += 8;
188                         } else {
189                                 mp_opt->data_ack = get_unaligned_be32(ptr);
190                                 ptr += 4;
191                         }
192
193                         pr_debug("data_ack=%llu", mp_opt->data_ack);
194                 }
195
196                 if (mp_opt->use_map) {
197                         if (mp_opt->dsn64) {
198                                 mp_opt->data_seq = get_unaligned_be64(ptr);
199                                 ptr += 8;
200                         } else {
201                                 mp_opt->data_seq = get_unaligned_be32(ptr);
202                                 ptr += 4;
203                         }
204
205                         mp_opt->subflow_seq = get_unaligned_be32(ptr);
206                         ptr += 4;
207
208                         mp_opt->data_len = get_unaligned_be16(ptr);
209                         ptr += 2;
210
211                         pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
212                                  mp_opt->data_seq, mp_opt->subflow_seq,
213                                  mp_opt->data_len);
214                 }
215
216                 break;
217
218         case MPTCPOPT_ADD_ADDR:
219                 mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO;
220                 if (!mp_opt->echo) {
221                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
222                             opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
223                                 mp_opt->addr.family = AF_INET;
224 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
225                         else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
226                                  opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
227                                 mp_opt->addr.family = AF_INET6;
228 #endif
229                         else
230                                 break;
231                 } else {
232                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
233                             opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
234                                 mp_opt->addr.family = AF_INET;
235 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
236                         else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
237                                  opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
238                                 mp_opt->addr.family = AF_INET6;
239 #endif
240                         else
241                                 break;
242                 }
243
244                 mp_opt->add_addr = 1;
245                 mp_opt->addr.id = *ptr++;
246                 if (mp_opt->addr.family == AF_INET) {
247                         memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
248                         ptr += 4;
249                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
250                             opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
251                                 mp_opt->addr.port = htons(get_unaligned_be16(ptr));
252                                 ptr += 2;
253                         }
254                 }
255 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
256                 else {
257                         memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16);
258                         ptr += 16;
259                         if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
260                             opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
261                                 mp_opt->addr.port = htons(get_unaligned_be16(ptr));
262                                 ptr += 2;
263                         }
264                 }
265 #endif
266                 if (!mp_opt->echo) {
267                         mp_opt->ahmac = get_unaligned_be64(ptr);
268                         ptr += 8;
269                 }
270                 pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
271                          (mp_opt->addr.family == AF_INET6) ? "6" : "",
272                          mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port));
273                 break;
274
275         case MPTCPOPT_RM_ADDR:
276                 if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 ||
277                     opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX)
278                         break;
279
280                 ptr++;
281
282                 mp_opt->rm_addr = 1;
283                 mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
284                 for (i = 0; i < mp_opt->rm_list.nr; i++)
285                         mp_opt->rm_list.ids[i] = *ptr++;
286                 pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
287                 break;
288
289         case MPTCPOPT_MP_PRIO:
290                 if (opsize != TCPOLEN_MPTCP_PRIO)
291                         break;
292
293                 mp_opt->mp_prio = 1;
294                 mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
295                 pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
296                 break;
297
298         case MPTCPOPT_MP_FASTCLOSE:
299                 if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
300                         break;
301
302                 ptr += 2;
303                 mp_opt->rcvr_key = get_unaligned_be64(ptr);
304                 ptr += 8;
305                 mp_opt->fastclose = 1;
306                 break;
307
308         case MPTCPOPT_RST:
309                 if (opsize != TCPOLEN_MPTCP_RST)
310                         break;
311
312                 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
313                         break;
314                 mp_opt->reset = 1;
315                 flags = *ptr++;
316                 mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
317                 mp_opt->reset_reason = *ptr;
318                 break;
319
320         default:
321                 break;
322         }
323 }
324
325 void mptcp_get_options(const struct sk_buff *skb,
326                        struct mptcp_options_received *mp_opt)
327 {
328         const struct tcphdr *th = tcp_hdr(skb);
329         const unsigned char *ptr;
330         int length;
331
332         /* initialize option status */
333         mp_opt->mp_capable = 0;
334         mp_opt->mp_join = 0;
335         mp_opt->add_addr = 0;
336         mp_opt->ahmac = 0;
337         mp_opt->fastclose = 0;
338         mp_opt->addr.port = 0;
339         mp_opt->rm_addr = 0;
340         mp_opt->dss = 0;
341         mp_opt->mp_prio = 0;
342         mp_opt->reset = 0;
343
344         length = (th->doff * 4) - sizeof(struct tcphdr);
345         ptr = (const unsigned char *)(th + 1);
346
347         while (length > 0) {
348                 int opcode = *ptr++;
349                 int opsize;
350
351                 switch (opcode) {
352                 case TCPOPT_EOL:
353                         return;
354                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
355                         length--;
356                         continue;
357                 default:
358                         opsize = *ptr++;
359                         if (opsize < 2) /* "silly options" */
360                                 return;
361                         if (opsize > length)
362                                 return; /* don't parse partial options */
363                         if (opcode == TCPOPT_MPTCP)
364                                 mptcp_parse_option(skb, ptr, opsize, mp_opt);
365                         ptr += opsize - 2;
366                         length -= opsize;
367                 }
368         }
369 }
370
371 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
372                        unsigned int *size, struct mptcp_out_options *opts)
373 {
374         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
375
376         /* we will use snd_isn to detect first pkt [re]transmission
377          * in mptcp_established_options_mp()
378          */
379         subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
380         if (subflow->request_mptcp) {
381                 opts->suboptions = OPTION_MPTCP_MPC_SYN;
382                 *size = TCPOLEN_MPTCP_MPC_SYN;
383                 return true;
384         } else if (subflow->request_join) {
385                 pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
386                          subflow->local_nonce);
387                 opts->suboptions = OPTION_MPTCP_MPJ_SYN;
388                 opts->join_id = subflow->local_id;
389                 opts->token = subflow->remote_token;
390                 opts->nonce = subflow->local_nonce;
391                 opts->backup = subflow->request_bkup;
392                 *size = TCPOLEN_MPTCP_MPJ_SYN;
393                 return true;
394         }
395         return false;
396 }
397
398 /* MP_JOIN client subflow must wait for 4th ack before sending any data:
399  * TCP can't schedule delack timer before the subflow is fully established.
400  * MPTCP uses the delack timer to do 3rd ack retransmissions
401  */
402 static void schedule_3rdack_retransmission(struct sock *sk)
403 {
404         struct inet_connection_sock *icsk = inet_csk(sk);
405         struct tcp_sock *tp = tcp_sk(sk);
406         unsigned long timeout;
407
408         /* reschedule with a timeout above RTT, as we must look only for drop */
409         if (tp->srtt_us)
410                 timeout = tp->srtt_us << 1;
411         else
412                 timeout = TCP_TIMEOUT_INIT;
413
414         WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
415         icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
416         icsk->icsk_ack.timeout = timeout;
417         sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
418 }
419
420 static void clear_3rdack_retransmission(struct sock *sk)
421 {
422         struct inet_connection_sock *icsk = inet_csk(sk);
423
424         sk_stop_timer(sk, &icsk->icsk_delack_timer);
425         icsk->icsk_ack.timeout = 0;
426         icsk->icsk_ack.ato = 0;
427         icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER);
428 }
429
430 static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
431                                          bool snd_data_fin_enable,
432                                          unsigned int *size,
433                                          unsigned int remaining,
434                                          struct mptcp_out_options *opts)
435 {
436         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
437         struct mptcp_ext *mpext;
438         unsigned int data_len;
439
440         /* When skb is not available, we better over-estimate the emitted
441          * options len. A full DSS option (28 bytes) is longer than
442          * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so
443          * tell the caller to defer the estimate to
444          * mptcp_established_options_dss(), which will reserve enough space.
445          */
446         if (!skb)
447                 return false;
448
449         /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
450         if (subflow->fully_established || snd_data_fin_enable ||
451             subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
452             sk->sk_state != TCP_ESTABLISHED)
453                 return false;
454
455         if (subflow->mp_capable) {
456                 mpext = mptcp_get_ext(skb);
457                 data_len = mpext ? mpext->data_len : 0;
458
459                 /* we will check ext_copy.data_len in mptcp_write_options() to
460                  * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
461                  * TCPOLEN_MPTCP_MPC_ACK
462                  */
463                 opts->ext_copy.data_len = data_len;
464                 opts->suboptions = OPTION_MPTCP_MPC_ACK;
465                 opts->sndr_key = subflow->local_key;
466                 opts->rcvr_key = subflow->remote_key;
467
468                 /* Section 3.1.
469                  * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
470                  * packets that start the first subflow of an MPTCP connection,
471                  * as well as the first packet that carries data
472                  */
473                 if (data_len > 0)
474                         *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
475                 else
476                         *size = TCPOLEN_MPTCP_MPC_ACK;
477
478                 pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
479                          subflow, subflow->local_key, subflow->remote_key,
480                          data_len);
481
482                 return true;
483         } else if (subflow->mp_join) {
484                 opts->suboptions = OPTION_MPTCP_MPJ_ACK;
485                 memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
486                 *size = TCPOLEN_MPTCP_MPJ_ACK;
487                 pr_debug("subflow=%p", subflow);
488
489                 schedule_3rdack_retransmission(sk);
490                 return true;
491         }
492         return false;
493 }
494
495 static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
496                                  struct sk_buff *skb, struct mptcp_ext *ext)
497 {
498         /* The write_seq value has already been incremented, so the actual
499          * sequence number for the DATA_FIN is one less.
500          */
501         u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1;
502
503         if (!ext->use_map || !skb->len) {
504                 /* RFC6824 requires a DSS mapping with specific values
505                  * if DATA_FIN is set but no data payload is mapped
506                  */
507                 ext->data_fin = 1;
508                 ext->use_map = 1;
509                 ext->dsn64 = 1;
510                 ext->data_seq = data_fin_tx_seq;
511                 ext->subflow_seq = 0;
512                 ext->data_len = 1;
513         } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) {
514                 /* If there's an existing DSS mapping and it is the
515                  * final mapping, DATA_FIN consumes 1 additional byte of
516                  * mapping space.
517                  */
518                 ext->data_fin = 1;
519                 ext->data_len++;
520         }
521 }
522
523 static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
524                                           bool snd_data_fin_enable,
525                                           unsigned int *size,
526                                           unsigned int remaining,
527                                           struct mptcp_out_options *opts)
528 {
529         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
530         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
531         unsigned int dss_size = 0;
532         struct mptcp_ext *mpext;
533         unsigned int ack_size;
534         bool ret = false;
535         u64 ack_seq;
536
537         mpext = skb ? mptcp_get_ext(skb) : NULL;
538
539         if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
540                 unsigned int map_size;
541
542                 map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
543
544                 remaining -= map_size;
545                 dss_size = map_size;
546                 if (mpext)
547                         opts->ext_copy = *mpext;
548
549                 if (skb && snd_data_fin_enable)
550                         mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
551                 ret = true;
552         }
553
554         /* passive sockets msk will set the 'can_ack' after accept(), even
555          * if the first subflow may have the already the remote key handy
556          */
557         opts->ext_copy.use_ack = 0;
558         if (!READ_ONCE(msk->can_ack)) {
559                 *size = ALIGN(dss_size, 4);
560                 return ret;
561         }
562
563         ack_seq = READ_ONCE(msk->ack_seq);
564         if (READ_ONCE(msk->use_64bit_ack)) {
565                 ack_size = TCPOLEN_MPTCP_DSS_ACK64;
566                 opts->ext_copy.data_ack = ack_seq;
567                 opts->ext_copy.ack64 = 1;
568         } else {
569                 ack_size = TCPOLEN_MPTCP_DSS_ACK32;
570                 opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
571                 opts->ext_copy.ack64 = 0;
572         }
573         opts->ext_copy.use_ack = 1;
574         WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
575
576         /* Add kind/length/subtype/flag overhead if mapping is not populated */
577         if (dss_size == 0)
578                 ack_size += TCPOLEN_MPTCP_DSS_BASE;
579
580         dss_size += ack_size;
581
582         *size = ALIGN(dss_size, 4);
583         return true;
584 }
585
586 static u64 add_addr_generate_hmac(u64 key1, u64 key2,
587                                   struct mptcp_addr_info *addr)
588 {
589         u16 port = ntohs(addr->port);
590         u8 hmac[SHA256_DIGEST_SIZE];
591         u8 msg[19];
592         int i = 0;
593
594         msg[i++] = addr->id;
595         if (addr->family == AF_INET) {
596                 memcpy(&msg[i], &addr->addr.s_addr, 4);
597                 i += 4;
598         }
599 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
600         else if (addr->family == AF_INET6) {
601                 memcpy(&msg[i], &addr->addr6.s6_addr, 16);
602                 i += 16;
603         }
604 #endif
605         msg[i++] = port >> 8;
606         msg[i++] = port & 0xFF;
607
608         mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac);
609
610         return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
611 }
612
613 static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
614                                                unsigned int *size,
615                                                unsigned int remaining,
616                                                struct mptcp_out_options *opts)
617 {
618         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
619         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
620         bool drop_other_suboptions = false;
621         unsigned int opt_size = *size;
622         bool echo;
623         bool port;
624         int len;
625
626         if ((mptcp_pm_should_add_signal_ipv6(msk) ||
627              mptcp_pm_should_add_signal_port(msk)) &&
628             skb && skb_is_tcp_pure_ack(skb)) {
629                 pr_debug("drop other suboptions");
630                 opts->suboptions = 0;
631                 opts->ext_copy.use_ack = 0;
632                 opts->ext_copy.use_map = 0;
633                 remaining += opt_size;
634                 drop_other_suboptions = true;
635         }
636
637         if (!mptcp_pm_should_add_signal(msk) ||
638             !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port)))
639                 return false;
640
641         len = mptcp_add_addr_len(opts->addr.family, echo, port);
642         if (remaining < len)
643                 return false;
644
645         *size = len;
646         if (drop_other_suboptions)
647                 *size -= opt_size;
648         opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
649         if (!echo) {
650                 opts->ahmac = add_addr_generate_hmac(msk->local_key,
651                                                      msk->remote_key,
652                                                      &opts->addr);
653         }
654         pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
655                  opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
656
657         return true;
658 }
659
660 static bool mptcp_established_options_rm_addr(struct sock *sk,
661                                               unsigned int *size,
662                                               unsigned int remaining,
663                                               struct mptcp_out_options *opts)
664 {
665         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
666         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
667         struct mptcp_rm_list rm_list;
668         int i, len;
669
670         if (!mptcp_pm_should_rm_signal(msk) ||
671             !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list)))
672                 return false;
673
674         len = mptcp_rm_addr_len(&rm_list);
675         if (len < 0)
676                 return false;
677         if (remaining < len)
678                 return false;
679
680         *size = len;
681         opts->suboptions |= OPTION_MPTCP_RM_ADDR;
682         opts->rm_list = rm_list;
683
684         for (i = 0; i < opts->rm_list.nr; i++)
685                 pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
686
687         return true;
688 }
689
690 static bool mptcp_established_options_mp_prio(struct sock *sk,
691                                               unsigned int *size,
692                                               unsigned int remaining,
693                                               struct mptcp_out_options *opts)
694 {
695         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
696
697         if (!subflow->send_mp_prio)
698                 return false;
699
700         /* account for the trailing 'nop' option */
701         if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN)
702                 return false;
703
704         *size = TCPOLEN_MPTCP_PRIO_ALIGN;
705         opts->suboptions |= OPTION_MPTCP_PRIO;
706         opts->backup = subflow->request_bkup;
707
708         pr_debug("prio=%d", opts->backup);
709
710         return true;
711 }
712
713 static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
714                                                    unsigned int *size,
715                                                    unsigned int remaining,
716                                                    struct mptcp_out_options *opts)
717 {
718         const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
719
720         if (remaining < TCPOLEN_MPTCP_RST)
721                 return;
722
723         *size = TCPOLEN_MPTCP_RST;
724         opts->suboptions |= OPTION_MPTCP_RST;
725         opts->reset_transient = subflow->reset_transient;
726         opts->reset_reason = subflow->reset_reason;
727 }
728
729 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
730                                unsigned int *size, unsigned int remaining,
731                                struct mptcp_out_options *opts)
732 {
733         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
734         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
735         unsigned int opt_size = 0;
736         bool snd_data_fin;
737         bool ret = false;
738
739         opts->suboptions = 0;
740
741         if (unlikely(__mptcp_check_fallback(msk)))
742                 return false;
743
744         if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
745                 mptcp_established_options_rst(sk, skb, size, remaining, opts);
746                 return true;
747         }
748
749         snd_data_fin = mptcp_data_fin_enabled(msk);
750         if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
751                 ret = true;
752         else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
753                 ret = true;
754
755         /* we reserved enough space for the above options, and exceeding the
756          * TCP option space would be fatal
757          */
758         if (WARN_ON_ONCE(opt_size > remaining))
759                 return false;
760
761         *size += opt_size;
762         remaining -= opt_size;
763         if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
764                 *size += opt_size;
765                 remaining -= opt_size;
766                 ret = true;
767         } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) {
768                 *size += opt_size;
769                 remaining -= opt_size;
770                 ret = true;
771         }
772
773         if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) {
774                 *size += opt_size;
775                 remaining -= opt_size;
776                 ret = true;
777         }
778
779         return ret;
780 }
781
782 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
783                           struct mptcp_out_options *opts)
784 {
785         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
786
787         if (subflow_req->mp_capable) {
788                 opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
789                 opts->sndr_key = subflow_req->local_key;
790                 *size = TCPOLEN_MPTCP_MPC_SYNACK;
791                 pr_debug("subflow_req=%p, local_key=%llu",
792                          subflow_req, subflow_req->local_key);
793                 return true;
794         } else if (subflow_req->mp_join) {
795                 opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
796                 opts->backup = subflow_req->backup;
797                 opts->join_id = subflow_req->local_id;
798                 opts->thmac = subflow_req->thmac;
799                 opts->nonce = subflow_req->local_nonce;
800                 pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
801                          subflow_req, opts->backup, opts->join_id,
802                          opts->thmac, opts->nonce);
803                 *size = TCPOLEN_MPTCP_MPJ_SYNACK;
804                 return true;
805         }
806         return false;
807 }
808
809 static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
810                                     struct mptcp_subflow_context *subflow,
811                                     struct sk_buff *skb,
812                                     struct mptcp_options_received *mp_opt)
813 {
814         /* here we can process OoO, in-window pkts, only in-sequence 4th ack
815          * will make the subflow fully established
816          */
817         if (likely(subflow->fully_established)) {
818                 /* on passive sockets, check for 3rd ack retransmission
819                  * note that msk is always set by subflow_syn_recv_sock()
820                  * for mp_join subflows
821                  */
822                 if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
823                     TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
824                     subflow->mp_join && mp_opt->mp_join &&
825                     READ_ONCE(msk->pm.server_side))
826                         tcp_send_ack(ssk);
827                 goto fully_established;
828         }
829
830         /* we must process OoO packets before the first subflow is fully
831          * established. OoO packets are instead a protocol violation
832          * for MP_JOIN subflows as the peer must not send any data
833          * before receiving the forth ack - cfr. RFC 8684 section 3.2.
834          */
835         if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
836                 if (subflow->mp_join)
837                         goto reset;
838                 return subflow->mp_capable;
839         }
840
841         if (mp_opt->dss && mp_opt->use_ack) {
842                 /* subflows are fully established as soon as we get any
843                  * additional ack.
844                  */
845                 subflow->fully_established = 1;
846                 WRITE_ONCE(msk->fully_established, true);
847                 goto fully_established;
848         }
849
850         if (mp_opt->add_addr) {
851                 WRITE_ONCE(msk->fully_established, true);
852                 return true;
853         }
854
855         /* If the first established packet does not contain MP_CAPABLE + data
856          * then fallback to TCP. Fallback scenarios requires a reset for
857          * MP_JOIN subflows.
858          */
859         if (!mp_opt->mp_capable) {
860                 if (subflow->mp_join)
861                         goto reset;
862                 subflow->mp_capable = 0;
863                 pr_fallback(msk);
864                 __mptcp_do_fallback(msk);
865                 return false;
866         }
867
868         if (unlikely(!READ_ONCE(msk->pm.server_side)))
869                 pr_warn_once("bogus mpc option on established client sk");
870         mptcp_subflow_fully_established(subflow, mp_opt);
871
872 fully_established:
873         /* if the subflow is not already linked into the conn_list, we can't
874          * notify the PM: this subflow is still on the listener queue
875          * and the PM possibly acquiring the subflow lock could race with
876          * the listener close
877          */
878         if (likely(subflow->pm_notified) || list_empty(&subflow->node))
879                 return true;
880
881         subflow->pm_notified = 1;
882         if (subflow->mp_join) {
883                 clear_3rdack_retransmission(ssk);
884                 mptcp_pm_subflow_established(msk);
885         } else {
886                 mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
887         }
888         return true;
889
890 reset:
891         mptcp_subflow_reset(ssk);
892         return false;
893 }
894
895 static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
896 {
897         u32 old_ack32, cur_ack32;
898
899         if (use_64bit)
900                 return cur_ack;
901
902         old_ack32 = (u32)old_ack;
903         cur_ack32 = (u32)cur_ack;
904         cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
905         if (unlikely(before(cur_ack32, old_ack32)))
906                 return cur_ack + (1LL << 32);
907         return cur_ack;
908 }
909
910 static void ack_update_msk(struct mptcp_sock *msk,
911                            struct sock *ssk,
912                            struct mptcp_options_received *mp_opt)
913 {
914         u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
915         struct sock *sk = (struct sock *)msk;
916         u64 old_snd_una;
917
918         mptcp_data_lock(sk);
919
920         /* avoid ack expansion on update conflict, to reduce the risk of
921          * wrongly expanding to a future ack sequence number, which is way
922          * more dangerous than missing an ack
923          */
924         old_snd_una = msk->snd_una;
925         new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
926
927         /* ACK for data not even sent yet? Ignore. */
928         if (after64(new_snd_una, snd_nxt))
929                 new_snd_una = old_snd_una;
930
931         new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
932
933         if (after64(new_wnd_end, msk->wnd_end))
934                 msk->wnd_end = new_wnd_end;
935
936         /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
937         if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
938                 __mptcp_check_push(sk, ssk);
939
940         if (after64(new_snd_una, old_snd_una)) {
941                 msk->snd_una = new_snd_una;
942                 __mptcp_data_acked(sk);
943         }
944         mptcp_data_unlock(sk);
945 }
946
947 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
948 {
949         /* Skip if DATA_FIN was already received.
950          * If updating simultaneously with the recvmsg loop, values
951          * should match. If they mismatch, the peer is misbehaving and
952          * we will prefer the most recent information.
953          */
954         if (READ_ONCE(msk->rcv_data_fin))
955                 return false;
956
957         WRITE_ONCE(msk->rcv_data_fin_seq,
958                    expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
959         WRITE_ONCE(msk->rcv_data_fin, 1);
960
961         return true;
962 }
963
964 static bool add_addr_hmac_valid(struct mptcp_sock *msk,
965                                 struct mptcp_options_received *mp_opt)
966 {
967         u64 hmac = 0;
968
969         if (mp_opt->echo)
970                 return true;
971
972         hmac = add_addr_generate_hmac(msk->remote_key,
973                                       msk->local_key,
974                                       &mp_opt->addr);
975
976         pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
977                  msk, (unsigned long long)hmac,
978                  (unsigned long long)mp_opt->ahmac);
979
980         return hmac == mp_opt->ahmac;
981 }
982
983 void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
984 {
985         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
986         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
987         struct mptcp_options_received mp_opt;
988         struct mptcp_ext *mpext;
989
990         if (__mptcp_check_fallback(msk)) {
991                 /* Keep it simple and unconditionally trigger send data cleanup and
992                  * pending queue spooling. We will need to acquire the data lock
993                  * for more accurate checks, and once the lock is acquired, such
994                  * helpers are cheap.
995                  */
996                 mptcp_data_lock(subflow->conn);
997                 if (sk_stream_memory_free(sk))
998                         __mptcp_check_push(subflow->conn, sk);
999                 __mptcp_data_acked(subflow->conn);
1000                 mptcp_data_unlock(subflow->conn);
1001                 return;
1002         }
1003
1004         mptcp_get_options(skb, &mp_opt);
1005         if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
1006                 return;
1007
1008         if (mp_opt.fastclose &&
1009             msk->local_key == mp_opt.rcvr_key) {
1010                 WRITE_ONCE(msk->rcv_fastclose, true);
1011                 mptcp_schedule_work((struct sock *)msk);
1012         }
1013
1014         if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
1015                 if (!mp_opt.echo) {
1016                         mptcp_pm_add_addr_received(msk, &mp_opt.addr);
1017                         MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
1018                 } else {
1019                         mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
1020                         mptcp_pm_del_add_timer(msk, &mp_opt.addr);
1021                         MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
1022                 }
1023
1024                 if (mp_opt.addr.port)
1025                         MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
1026
1027                 mp_opt.add_addr = 0;
1028         }
1029
1030         if (mp_opt.rm_addr) {
1031                 mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
1032                 mp_opt.rm_addr = 0;
1033         }
1034
1035         if (mp_opt.mp_prio) {
1036                 mptcp_pm_mp_prio_received(sk, mp_opt.backup);
1037                 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
1038                 mp_opt.mp_prio = 0;
1039         }
1040
1041         if (mp_opt.reset) {
1042                 subflow->reset_seen = 1;
1043                 subflow->reset_reason = mp_opt.reset_reason;
1044                 subflow->reset_transient = mp_opt.reset_transient;
1045         }
1046
1047         if (!mp_opt.dss)
1048                 return;
1049
1050         /* we can't wait for recvmsg() to update the ack_seq, otherwise
1051          * monodirectional flows will stuck
1052          */
1053         if (mp_opt.use_ack)
1054                 ack_update_msk(msk, sk, &mp_opt);
1055
1056         /* Zero-data-length packets are dropped by the caller and not
1057          * propagated to the MPTCP layer, so the skb extension does not
1058          * need to be allocated or populated. DATA_FIN information, if
1059          * present, needs to be updated here before the skb is freed.
1060          */
1061         if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
1062                 if (mp_opt.data_fin && mp_opt.data_len == 1 &&
1063                     mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
1064                     schedule_work(&msk->work))
1065                         sock_hold(subflow->conn);
1066
1067                 return;
1068         }
1069
1070         mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
1071         if (!mpext)
1072                 return;
1073
1074         memset(mpext, 0, sizeof(*mpext));
1075
1076         if (mp_opt.use_map) {
1077                 if (mp_opt.mpc_map) {
1078                         /* this is an MP_CAPABLE carrying MPTCP data
1079                          * we know this map the first chunk of data
1080                          */
1081                         mptcp_crypto_key_sha(subflow->remote_key, NULL,
1082                                              &mpext->data_seq);
1083                         mpext->data_seq++;
1084                         mpext->subflow_seq = 1;
1085                         mpext->dsn64 = 1;
1086                         mpext->mpc_map = 1;
1087                         mpext->data_fin = 0;
1088                 } else {
1089                         mpext->data_seq = mp_opt.data_seq;
1090                         mpext->subflow_seq = mp_opt.subflow_seq;
1091                         mpext->dsn64 = mp_opt.dsn64;
1092                         mpext->data_fin = mp_opt.data_fin;
1093                 }
1094                 mpext->data_len = mp_opt.data_len;
1095                 mpext->use_map = 1;
1096         }
1097 }
1098
1099 static void mptcp_set_rwin(const struct tcp_sock *tp)
1100 {
1101         const struct sock *ssk = (const struct sock *)tp;
1102         const struct mptcp_subflow_context *subflow;
1103         struct mptcp_sock *msk;
1104         u64 ack_seq;
1105
1106         subflow = mptcp_subflow_ctx(ssk);
1107         msk = mptcp_sk(subflow->conn);
1108
1109         ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
1110
1111         if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
1112                 WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
1113 }
1114
1115 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
1116                          struct mptcp_out_options *opts)
1117 {
1118         if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
1119              OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
1120                 u8 len;
1121
1122                 if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
1123                         len = TCPOLEN_MPTCP_MPC_SYN;
1124                 else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
1125                         len = TCPOLEN_MPTCP_MPC_SYNACK;
1126                 else if (opts->ext_copy.data_len)
1127                         len = TCPOLEN_MPTCP_MPC_ACK_DATA;
1128                 else
1129                         len = TCPOLEN_MPTCP_MPC_ACK;
1130
1131                 *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
1132                                       MPTCP_SUPPORTED_VERSION,
1133                                       MPTCP_CAP_HMAC_SHA256);
1134
1135                 if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
1136                     opts->suboptions))
1137                         goto mp_capable_done;
1138
1139                 put_unaligned_be64(opts->sndr_key, ptr);
1140                 ptr += 2;
1141                 if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
1142                         goto mp_capable_done;
1143
1144                 put_unaligned_be64(opts->rcvr_key, ptr);
1145                 ptr += 2;
1146                 if (!opts->ext_copy.data_len)
1147                         goto mp_capable_done;
1148
1149                 put_unaligned_be32(opts->ext_copy.data_len << 16 |
1150                                    TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1151                 ptr += 1;
1152         }
1153
1154 mp_capable_done:
1155         if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
1156                 u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
1157                 u8 echo = MPTCP_ADDR_ECHO;
1158
1159 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1160                 if (opts->addr.family == AF_INET6)
1161                         len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
1162 #endif
1163
1164                 if (opts->addr.port)
1165                         len += TCPOLEN_MPTCP_PORT_LEN;
1166
1167                 if (opts->ahmac) {
1168                         len += sizeof(opts->ahmac);
1169                         echo = 0;
1170                 }
1171
1172                 *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
1173                                       len, echo, opts->addr.id);
1174                 if (opts->addr.family == AF_INET) {
1175                         memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4);
1176                         ptr += 1;
1177                 }
1178 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1179                 else if (opts->addr.family == AF_INET6) {
1180                         memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16);
1181                         ptr += 4;
1182                 }
1183 #endif
1184
1185                 if (!opts->addr.port) {
1186                         if (opts->ahmac) {
1187                                 put_unaligned_be64(opts->ahmac, ptr);
1188                                 ptr += 2;
1189                         }
1190                 } else {
1191                         u16 port = ntohs(opts->addr.port);
1192
1193                         if (opts->ahmac) {
1194                                 u8 *bptr = (u8 *)ptr;
1195
1196                                 put_unaligned_be16(port, bptr);
1197                                 bptr += 2;
1198                                 put_unaligned_be64(opts->ahmac, bptr);
1199                                 bptr += 8;
1200                                 put_unaligned_be16(TCPOPT_NOP << 8 |
1201                                                    TCPOPT_NOP, bptr);
1202
1203                                 ptr += 3;
1204                         } else {
1205                                 put_unaligned_be32(port << 16 |
1206                                                    TCPOPT_NOP << 8 |
1207                                                    TCPOPT_NOP, ptr);
1208                                 ptr += 1;
1209                         }
1210                 }
1211         }
1212
1213         if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
1214                 u8 i = 1;
1215
1216                 *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
1217                                       TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr,
1218                                       0, opts->rm_list.ids[0]);
1219
1220                 while (i < opts->rm_list.nr) {
1221                         u8 id1, id2, id3, id4;
1222
1223                         id1 = opts->rm_list.ids[i];
1224                         id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP;
1225                         id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP;
1226                         id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP;
1227                         put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr);
1228                         ptr += 1;
1229                         i += 4;
1230                 }
1231         }
1232
1233         if (OPTION_MPTCP_PRIO & opts->suboptions) {
1234                 const struct sock *ssk = (const struct sock *)tp;
1235                 struct mptcp_subflow_context *subflow;
1236
1237                 subflow = mptcp_subflow_ctx(ssk);
1238                 subflow->send_mp_prio = 0;
1239
1240                 *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
1241                                       TCPOLEN_MPTCP_PRIO,
1242                                       opts->backup, TCPOPT_NOP);
1243         }
1244
1245         if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
1246                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1247                                       TCPOLEN_MPTCP_MPJ_SYN,
1248                                       opts->backup, opts->join_id);
1249                 put_unaligned_be32(opts->token, ptr);
1250                 ptr += 1;
1251                 put_unaligned_be32(opts->nonce, ptr);
1252                 ptr += 1;
1253         }
1254
1255         if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
1256                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1257                                       TCPOLEN_MPTCP_MPJ_SYNACK,
1258                                       opts->backup, opts->join_id);
1259                 put_unaligned_be64(opts->thmac, ptr);
1260                 ptr += 2;
1261                 put_unaligned_be32(opts->nonce, ptr);
1262                 ptr += 1;
1263         }
1264
1265         if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
1266                 *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
1267                                       TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
1268                 memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
1269                 ptr += 5;
1270         }
1271
1272         if (OPTION_MPTCP_RST & opts->suboptions)
1273                 *ptr++ = mptcp_option(MPTCPOPT_RST,
1274                                       TCPOLEN_MPTCP_RST,
1275                                       opts->reset_transient,
1276                                       opts->reset_reason);
1277
1278         if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
1279                 struct mptcp_ext *mpext = &opts->ext_copy;
1280                 u8 len = TCPOLEN_MPTCP_DSS_BASE;
1281                 u8 flags = 0;
1282
1283                 if (mpext->use_ack) {
1284                         flags = MPTCP_DSS_HAS_ACK;
1285                         if (mpext->ack64) {
1286                                 len += TCPOLEN_MPTCP_DSS_ACK64;
1287                                 flags |= MPTCP_DSS_ACK64;
1288                         } else {
1289                                 len += TCPOLEN_MPTCP_DSS_ACK32;
1290                         }
1291                 }
1292
1293                 if (mpext->use_map) {
1294                         len += TCPOLEN_MPTCP_DSS_MAP64;
1295
1296                         /* Use only 64-bit mapping flags for now, add
1297                          * support for optional 32-bit mappings later.
1298                          */
1299                         flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
1300                         if (mpext->data_fin)
1301                                 flags |= MPTCP_DSS_DATA_FIN;
1302                 }
1303
1304                 *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
1305
1306                 if (mpext->use_ack) {
1307                         if (mpext->ack64) {
1308                                 put_unaligned_be64(mpext->data_ack, ptr);
1309                                 ptr += 2;
1310                         } else {
1311                                 put_unaligned_be32(mpext->data_ack32, ptr);
1312                                 ptr += 1;
1313                         }
1314                 }
1315
1316                 if (mpext->use_map) {
1317                         put_unaligned_be64(mpext->data_seq, ptr);
1318                         ptr += 2;
1319                         put_unaligned_be32(mpext->subflow_seq, ptr);
1320                         ptr += 1;
1321                         put_unaligned_be32(mpext->data_len << 16 |
1322                                            TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
1323                 }
1324         }
1325
1326         if (tp)
1327                 mptcp_set_rwin(tp);
1328 }
1329
1330 __be32 mptcp_get_reset_option(const struct sk_buff *skb)
1331 {
1332         const struct mptcp_ext *ext = mptcp_get_ext(skb);
1333         u8 flags, reason;
1334
1335         if (ext) {
1336                 flags = ext->reset_transient;
1337                 reason = ext->reset_reason;
1338
1339                 return mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST,
1340                                     flags, reason);
1341         }
1342
1343         return htonl(0u);
1344 }
1345 EXPORT_SYMBOL_GPL(mptcp_get_reset_option);