mptcp: push pending frames when subflow has free space
[linux-2.6-microblaze.git] / net / mptcp / options.c
index 90cd52d..e0d21c0 100644 (file)
@@ -242,7 +242,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 
                mp_opt->add_addr = 1;
                mp_opt->addr_id = *ptr++;
-               pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo);
                if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
                        memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
                        ptr += 4;
@@ -267,6 +266,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                        mp_opt->ahmac = get_unaligned_be64(ptr);
                        ptr += 8;
                }
+               pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
+                        (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
+                        mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
                break;
 
        case MPTCPOPT_RM_ADDR:
@@ -280,6 +282,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
                break;
 
+       case MPTCPOPT_MP_FASTCLOSE:
+               if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
+                       break;
+
+               ptr += 2;
+               mp_opt->rcvr_key = get_unaligned_be64(ptr);
+               ptr += 8;
+               mp_opt->fastclose = 1;
+               break;
+
        default:
                break;
        }
@@ -297,6 +309,7 @@ void mptcp_get_options(const struct sk_buff *skb,
        mp_opt->mp_join = 0;
        mp_opt->add_addr = 0;
        mp_opt->ahmac = 0;
+       mp_opt->fastclose = 0;
        mp_opt->port = 0;
        mp_opt->rm_addr = 0;
        mp_opt->dss = 0;
@@ -492,7 +505,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
        bool ret = false;
 
        mpext = skb ? mptcp_get_ext(skb) : NULL;
-       snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable);
+       snd_data_fin_enable = mptcp_data_fin_enabled(msk);
 
        if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
                unsigned int map_size;
@@ -528,6 +541,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
                opts->ext_copy.ack64 = 0;
        }
        opts->ext_copy.use_ack = 1;
+       WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
 
        /* Add kind/length/subtype/flag overhead if mapping is not populated */
        if (dss_size == 0)
@@ -573,27 +587,45 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
 }
 #endif
 
-static bool mptcp_established_options_add_addr(struct sock *sk,
+static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
                                               unsigned int *size,
                                               unsigned int remaining,
                                               struct mptcp_out_options *opts)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+       bool drop_other_suboptions = false;
+       unsigned int opt_size = *size;
        struct mptcp_addr_info saddr;
        bool echo;
+       bool port;
        int len;
 
+       if ((mptcp_pm_should_add_signal_ipv6(msk) ||
+            mptcp_pm_should_add_signal_port(msk)) &&
+           skb && skb_is_tcp_pure_ack(skb)) {
+               pr_debug("drop other suboptions");
+               opts->suboptions = 0;
+               opts->ext_copy.use_ack = 0;
+               opts->ext_copy.use_map = 0;
+               remaining += opt_size;
+               drop_other_suboptions = true;
+       }
+
        if (!mptcp_pm_should_add_signal(msk) ||
-           !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo)))
+           !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
                return false;
 
-       len = mptcp_add_addr_len(saddr.family, echo);
+       len = mptcp_add_addr_len(saddr.family, echo, port);
        if (remaining < len)
                return false;
 
        *size = len;
+       if (drop_other_suboptions)
+               *size -= opt_size;
        opts->addr_id = saddr.id;
+       if (port)
+               opts->port = ntohs(saddr.port);
        if (saddr.family == AF_INET) {
                opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
                opts->addr = saddr.addr;
@@ -616,7 +648,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk,
                }
        }
 #endif
-       pr_debug("addr_id=%d, ahmac=%llu, echo=%d", opts->addr_id, opts->ahmac, echo);
+       pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
+                opts->addr_id, opts->ahmac, echo, opts->port);
 
        return true;
 }
@@ -678,7 +711,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 
        *size += opt_size;
        remaining -= opt_size;
-       if (mptcp_established_options_add_addr(sk, &opt_size, remaining, opts)) {
+       if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
                *size += opt_size;
                remaining -= opt_size;
                ret = true;
@@ -759,6 +792,11 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
                goto fully_established;
        }
 
+       if (mp_opt->add_addr) {
+               WRITE_ONCE(msk->fully_established, true);
+               return true;
+       }
+
        /* If the first established packet does not contain MP_CAPABLE + data
         * then fallback to TCP. Fallback scenarios requires a reset for
         * MP_JOIN subflows.
@@ -777,7 +815,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
        mptcp_subflow_fully_established(subflow, mp_opt);
 
 fully_established:
-       if (likely(subflow->pm_notified))
+       /* if the subflow is not already linked into the conn_list, we can't
+        * notify the PM: this subflow is still on the listener queue
+        * and the PM possibly acquiring the subflow lock could race with
+        * the listener close
+        */
+       if (likely(subflow->pm_notified) || list_empty(&subflow->node))
                return true;
 
        subflow->pm_notified = 1;
@@ -809,31 +852,42 @@ static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
        return cur_ack;
 }
 
-static void update_una(struct mptcp_sock *msk,
-                      struct mptcp_options_received *mp_opt)
+static void ack_update_msk(struct mptcp_sock *msk,
+                          struct sock *ssk,
+                          struct mptcp_options_received *mp_opt)
 {
-       u64 new_snd_una, snd_una, old_snd_una = atomic64_read(&msk->snd_una);
-       u64 write_seq = READ_ONCE(msk->write_seq);
+       u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
+       struct sock *sk = (struct sock *)msk;
+       u64 old_snd_una;
+
+       mptcp_data_lock(sk);
 
        /* avoid ack expansion on update conflict, to reduce the risk of
         * wrongly expanding to a future ack sequence number, which is way
         * more dangerous than missing an ack
         */
+       old_snd_una = msk->snd_una;
        new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
 
        /* ACK for data not even sent yet? Ignore. */
-       if (after64(new_snd_una, write_seq))
+       if (after64(new_snd_una, snd_nxt))
                new_snd_una = old_snd_una;
 
-       while (after64(new_snd_una, old_snd_una)) {
-               snd_una = old_snd_una;
-               old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una,
-                                              new_snd_una);
-               if (old_snd_una == snd_una) {
-                       mptcp_data_acked((struct sock *)msk);
-                       break;
-               }
+       new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
+
+       if (after64(new_wnd_end, msk->wnd_end))
+               msk->wnd_end = new_wnd_end;
+
+       /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
+       if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
+           sk_stream_memory_free(ssk))
+               __mptcp_check_push(sk, ssk);
+
+       if (after64(new_snd_una, old_snd_una)) {
+               msk->snd_una = new_snd_una;
+               __mptcp_data_acked(sk);
        }
+       mptcp_data_unlock(sk);
 }
 
 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -886,13 +940,30 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
        struct mptcp_options_received mp_opt;
        struct mptcp_ext *mpext;
 
-       if (__mptcp_check_fallback(msk))
+       if (__mptcp_check_fallback(msk)) {
+               /* Keep it simple and unconditionally trigger send data cleanup and
+                * pending queue spooling. We will need to acquire the data lock
+                * for more accurate checks, and once the lock is acquired, such
+                * helpers are cheap.
+                */
+               mptcp_data_lock(subflow->conn);
+               if (sk_stream_memory_free(sk))
+                       __mptcp_check_push(subflow->conn, sk);
+               __mptcp_data_acked(subflow->conn);
+               mptcp_data_unlock(subflow->conn);
                return;
+       }
 
        mptcp_get_options(skb, &mp_opt);
        if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
                return;
 
+       if (mp_opt.fastclose &&
+           msk->local_key == mp_opt.rcvr_key) {
+               WRITE_ONCE(msk->rcv_fastclose, true);
+               mptcp_schedule_work((struct sock *)msk);
+       }
+
        if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
                struct mptcp_addr_info addr;
 
@@ -930,7 +1001,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
         * monodirectional flows will stuck
         */
        if (mp_opt.use_ack)
-               update_una(msk, &mp_opt);
+               ack_update_msk(msk, sk, &mp_opt);
 
        /* Zero-data-length packets are dropped by the caller and not
         * propagated to the MPTCP layer, so the skb extension does not
@@ -975,7 +1046,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
        }
 }
 
-void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
+static void mptcp_set_rwin(const struct tcp_sock *tp)
+{
+       const struct sock *ssk = (const struct sock *)tp;
+       const struct mptcp_subflow_context *subflow;
+       struct mptcp_sock *msk;
+       u64 ack_seq;
+
+       subflow = mptcp_subflow_ctx(ssk);
+       msk = mptcp_sk(subflow->conn);
+
+       ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
+
+       if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
+               WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+}
+
+void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+                        struct mptcp_out_options *opts)
 {
        if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
             OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
@@ -1014,44 +1102,66 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
        }
 
 mp_capable_done:
-       if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
-               if (opts->ahmac)
-                       *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
-                                             TCPOLEN_MPTCP_ADD_ADDR, 0,
-                                             opts->addr_id);
-               else
-                       *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
-                                             TCPOLEN_MPTCP_ADD_ADDR_BASE,
-                                             MPTCP_ADDR_ECHO,
-                                             opts->addr_id);
-               memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
-               ptr += 1;
+       if ((OPTION_MPTCP_ADD_ADDR
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+            | OPTION_MPTCP_ADD_ADDR6
+#endif
+           ) & opts->suboptions) {
+               u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
+               u8 echo = MPTCP_ADDR_ECHO;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+               if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
+                       len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
+#endif
+
+               if (opts->port)
+                       len += TCPOLEN_MPTCP_PORT_LEN;
+
                if (opts->ahmac) {
-                       put_unaligned_be64(opts->ahmac, ptr);
-                       ptr += 2;
+                       len += sizeof(opts->ahmac);
+                       echo = 0;
                }
-       }
 
+               *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
+                                     len, echo, opts->addr_id);
+               if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+                       memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
+                       ptr += 1;
+               }
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-       if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
-               if (opts->ahmac)
-                       *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
-                                             TCPOLEN_MPTCP_ADD_ADDR6, 0,
-                                             opts->addr_id);
-               else
-                       *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
-                                             TCPOLEN_MPTCP_ADD_ADDR6_BASE,
-                                             MPTCP_ADDR_ECHO,
-                                             opts->addr_id);
-               memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
-               ptr += 4;
-               if (opts->ahmac) {
-                       put_unaligned_be64(opts->ahmac, ptr);
-                       ptr += 2;
+               else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
+                       memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
+                       ptr += 4;
                }
-       }
 #endif
 
+               if (!opts->port) {
+                       if (opts->ahmac) {
+                               put_unaligned_be64(opts->ahmac, ptr);
+                               ptr += 2;
+                       }
+               } else {
+                       if (opts->ahmac) {
+                               u8 *bptr = (u8 *)ptr;
+
+                               put_unaligned_be16(opts->port, bptr);
+                               bptr += 2;
+                               put_unaligned_be64(opts->ahmac, bptr);
+                               bptr += 8;
+                               put_unaligned_be16(TCPOPT_NOP << 8 |
+                                                  TCPOPT_NOP, bptr);
+
+                               ptr += 3;
+                       } else {
+                               put_unaligned_be32(opts->port << 16 |
+                                                  TCPOPT_NOP << 8 |
+                                                  TCPOPT_NOP, ptr);
+                               ptr += 1;
+                       }
+               }
+       }
+
        if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
                *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
                                      TCPOLEN_MPTCP_RM_ADDR_BASE,
@@ -1132,4 +1242,7 @@ mp_capable_done:
                                           TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
                }
        }
+
+       if (tp)
+               mptcp_set_rwin(tp);
 }