Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorJakub Kicinski <kuba@kernel.org>
Thu, 19 May 2022 18:23:59 +0000 (11:23 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 19 May 2022 18:23:59 +0000 (11:23 -0700)
drivers/net/ethernet/mellanox/mlx5/core/main.c
  b33886971dbc ("net/mlx5: Initialize flow steering during driver probe")
  40379a0084c2 ("net/mlx5_fpga: Drop INNOVA TLS support")
  f2b41b32cde8 ("net/mlx5: Remove ipsec_ops function table")
https://lore.kernel.org/all/20220519040345.6yrjromcdistu7vh@sx1/
  16d42d313350 ("net/mlx5: Drain fw_reset when removing device")
  8324a02c342a ("net/mlx5: Add exit route when waiting for FW")
https://lore.kernel.org/all/20220519114119.060ce014@canb.auug.org.au/

tools/testing/selftests/net/mptcp/mptcp_join.sh
  e274f7154008 ("selftests: mptcp: add subflow limits test-cases")
  b6e074e171bc ("selftests: mptcp: add infinite map testcase")
  5ac1d2d63451 ("selftests: mptcp: Add tests for userspace PM type")
https://lore.kernel.org/all/20220516111918.366d747f@canb.auug.org.au/

net/mptcp/options.c
  ba2c89e0ea74 ("mptcp: fix checksum byte order")
  1e39e5a32ad7 ("mptcp: infinite mapping sending")
  ea66758c1795 ("tcp: allow MPTCP to update the announced window")
https://lore.kernel.org/all/20220519115146.751c3a37@canb.auug.org.au/

net/mptcp/pm.c
  95d686517884 ("mptcp: fix subflow accounting on close")
  4d25247d3ae4 ("mptcp: bypass in-kernel PM restrictions for non-kernel PMs")
https://lore.kernel.org/all/20220516111435.72f35dca@canb.auug.org.au/

net/mptcp/subflow.c
  ae66fb2ba6c3 ("mptcp: Do TCP fallback on early DSS checksum failure")
  0348c690ed37 ("mptcp: add the fallback check")
  f8d4bcacff3b ("mptcp: infinite mapping receiving")
https://lore.kernel.org/all/20220519115837.380bb8d4@canb.auug.org.au/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
36 files changed:
1  2 
MAINTAINERS
arch/arm/boot/dts/aspeed-g6.dtsi
drivers/net/can/m_can/m_can.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.c
drivers/net/ipa/gsi.c
drivers/net/ipa/ipa_endpoint.c
drivers/net/ppp/pppoe.c
include/linux/netdevice.h
include/net/ip.h
include/net/xfrm.h
net/core/dev.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/ipv4/route.c
net/ipv4/tcp_ipv4.c
net/ipv6/tcp_ipv6.c
net/key/af_key.c
net/mptcp/options.c
net/mptcp/pm.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_flow_offload.c
net/sched/act_pedit.c
tools/testing/selftests/net/forwarding/Makefile
tools/testing/selftests/net/mptcp/mptcp_join.sh

diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
@@@ -1181,7 -1192,15 +1190,7 @@@ static int mlx5_load(struct mlx5_core_d
                goto err_fpga_start;
        }
  
-       err = mlx5_init_fs(dev);
 -      mlx5_accel_ipsec_init(dev);
 -
 -      err = mlx5_accel_tls_init(dev);
 -      if (err) {
 -              mlx5_core_err(dev, "TLS device start failed %d\n", err);
 -              goto err_tls_start;
 -      }
 -
+       err = mlx5_fs_core_init(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to init flow steering\n");
                goto err_fs;
@@@ -1226,8 -1245,11 +1235,8 @@@ err_ec
  err_vhca:
        mlx5_vhca_event_stop(dev);
  err_set_hca:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
  err_fs:
 -      mlx5_accel_tls_cleanup(dev);
 -err_tls_start:
 -      mlx5_accel_ipsec_cleanup(dev);
        mlx5_fpga_device_stop(dev);
  err_fpga_start:
        mlx5_rsc_dump_cleanup(dev);
@@@ -1252,7 -1274,9 +1261,7 @@@ static void mlx5_unload(struct mlx5_cor
        mlx5_ec_cleanup(dev);
        mlx5_sf_hw_table_destroy(dev);
        mlx5_vhca_event_stop(dev);
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
 -      mlx5_accel_ipsec_cleanup(dev);
 -      mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
        mlx5_rsc_dump_cleanup(dev);
        mlx5_hv_vhca_cleanup(dev->hv_vhca);
@@@ -1608,7 -1627,10 +1617,11 @@@ static void remove_one(struct pci_dev *
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct devlink *devlink = priv_to_devlink(dev);
  
+       /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+        * fw_reset before unregistering the devlink.
+        */
+       mlx5_drain_fw_reset(dev);
 +      set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
        devlink_unregister(devlink);
        mlx5_sriov_disable(pdev);
        mlx5_crdump_disable(dev);
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc net/core/dev.c
Simple merge
diff --cc net/dccp/ipv4.c
Simple merge
diff --cc net/dccp/ipv6.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -1236,53 -1234,13 +1236,53 @@@ static void mptcp_set_rwin(struct tcp_s
        subflow = mptcp_subflow_ctx(ssk);
        msk = mptcp_sk(subflow->conn);
  
 -      ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
 +      ack_seq = READ_ONCE(msk->ack_seq);
 +      rcv_wnd_new = ack_seq + tp->rcv_wnd;
 +
 +      rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
 +      if (after64(rcv_wnd_new, rcv_wnd_old)) {
 +              u64 rcv_wnd;
 +
 +              for (;;) {
 +                      rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new);
 +
 +                      if (rcv_wnd == rcv_wnd_old)
 +                              break;
 +                      if (before64(rcv_wnd_new, rcv_wnd)) {
 +                              MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
 +                              goto raise_win;
 +                      }
 +                      MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
 +                      rcv_wnd_old = rcv_wnd;
 +              }
 +              return;
 +      }
 +
 +      if (rcv_wnd_new != rcv_wnd_old) {
 +raise_win:
 +              win = rcv_wnd_old - ack_seq;
 +              tp->rcv_wnd = min_t(u64, win, U32_MAX);
 +              new_win = tp->rcv_wnd;
  
 -      if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
 -              WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
 +              /* Make sure we do not exceed the maximum possible
 +               * scaled window.
 +               */
 +              if (unlikely(th->syn))
 +                      new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
 +              if (!tp->rx_opt.rcv_wscale &&
 +                  sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
 +                      new_win = min(new_win, MAX_TCP_WINDOW);
 +              else
 +                      new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
 +
 +              /* RFC1323 scaling applied */
 +              new_win >>= tp->rx_opt.rcv_wscale;
 +              th->window = htons(new_win);
 +              MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
 +      }
  }
  
u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
  {
        struct csum_pseudo_header header;
        __wsum csum;
@@@ -1307,7 -1265,17 +1307,17 @@@ static __sum16 mptcp_make_csum(const st
                                 ~csum_unfold(mpext->csum));
  }
  
 -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+ static void put_len_csum(u16 len, __sum16 csum, void *data)
+ {
+       __sum16 *sumptr = data + 2;
+       __be16 *ptr = data;
+       put_unaligned_be16(len, ptr);
+       put_unaligned(csum, sumptr);
+ }
 +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
                         struct mptcp_out_options *opts)
  {
        const struct sock *ssk = (const struct sock *)tp;
                        put_unaligned_be32(mpext->subflow_seq, ptr);
                        ptr += 1;
                        if (opts->csum_reqd) {
-                               put_unaligned_be32(mpext->data_len << 16 |
-                                                  (mpext->data_len ? mptcp_make_csum(mpext) : 0),
-                                                  ptr);
 +                              /* data_len == 0 is reserved for the infinite mapping,
 +                               * the checksum will also be set to 0.
 +                               */
 -                                           mptcp_make_csum(mpext),
+                               put_len_csum(mpext->data_len,
++                                           (mpext->data_len ? mptcp_make_csum(mpext) : 0),
+                                            ptr);
                        } else {
                                put_unaligned_be32(mpext->data_len << 16 |
                                                   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
diff --cc net/mptcp/pm.c
@@@ -181,9 -178,7 +181,8 @@@ void mptcp_pm_subflow_check_next(struc
        struct mptcp_pm_data *pm = &msk->pm;
        bool update_subflows;
  
-       update_subflows = (ssk->sk_state == TCP_CLOSE) &&
-                         (subflow->request_join || subflow->mp_join) &&
 -      update_subflows = subflow->request_join || subflow->mp_join;
++      update_subflows = (subflow->request_join || subflow->mp_join) &&
 +                        mptcp_pm_is_kernel(msk);
        if (!READ_ONCE(pm->work_pending) && !update_subflows)
                return;
  
@@@ -466,9 -443,9 +466,10 @@@ struct mptcp_subflow_context 
                can_ack : 1,        /* only after processing the remote a key */
                disposable : 1,     /* ctx can be free at ulp release time */
                stale : 1,          /* unable to snd/rcv data, do not use for xmit */
-               local_id_valid : 1; /* local_id is correctly initialized */
+               local_id_valid : 1, /* local_id is correctly initialized */
+               valid_csum_seen : 1;        /* at least one csum validated */
        enum mptcp_data_avail data_avail;
 +      bool    mp_fail_response_expect;
        u32     remote_nonce;
        u64     thmac;
        u32     local_nonce;
@@@ -1215,45 -1218,35 +1230,45 @@@ no_data
        return false;
  
  fallback:
 -      /* RFC 8684 section 3.7. */
 -      if (subflow->send_mp_fail) {
 -              if (mptcp_has_another_subflow(ssk)) {
 -                      while ((skb = skb_peek(&ssk->sk_receive_queue)))
 -                              sk_eat_skb(ssk, skb);
 +      if (!__mptcp_check_fallback(msk)) {
 +              /* RFC 8684 section 3.7. */
 +              if (subflow->send_mp_fail) {
 +                      if (mptcp_has_another_subflow(ssk) ||
 +                          !READ_ONCE(msk->allow_infinite_fallback)) {
 +                              ssk->sk_err = EBADMSG;
 +                              tcp_set_state(ssk, TCP_CLOSE);
 +                              subflow->reset_transient = 0;
 +                              subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
 +                              tcp_send_active_reset(ssk, GFP_ATOMIC);
 +                              while ((skb = skb_peek(&ssk->sk_receive_queue)))
 +                                      sk_eat_skb(ssk, skb);
 +                      } else {
 +                              WRITE_ONCE(subflow->mp_fail_response_expect, true);
 +                              /* The data lock is acquired in __mptcp_move_skbs() */
 +                              sk_reset_timer((struct sock *)msk,
 +                                             &((struct sock *)msk)->sk_timer,
 +                                             jiffies + TCP_RTO_MAX);
 +                      }
 +                      WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
 +                      return true;
                }
 -              ssk->sk_err = EBADMSG;
 -              tcp_set_state(ssk, TCP_CLOSE);
 -              subflow->reset_transient = 0;
 -              subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
 -              tcp_send_active_reset(ssk, GFP_ATOMIC);
 -              WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
 -              return true;
 -      }
  
-               if ((subflow->mp_join || subflow->fully_established) && subflow->map_data_len) {
 -      if (!subflow_can_fallback(subflow)) {
 -              /* fatal protocol error, close the socket.
 -               * subflow_error_report() will introduce the appropriate barriers
 -               */
 -              ssk->sk_err = EBADMSG;
 -              tcp_set_state(ssk, TCP_CLOSE);
 -              subflow->reset_transient = 0;
 -              subflow->reset_reason = MPTCP_RST_EMPTCP;
 -              tcp_send_active_reset(ssk, GFP_ATOMIC);
 -              WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
 -              return false;
++              if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
 +                      /* fatal protocol error, close the socket.
 +                       * subflow_error_report() will introduce the appropriate barriers
 +                       */
 +                      ssk->sk_err = EBADMSG;
 +                      tcp_set_state(ssk, TCP_CLOSE);
 +                      subflow->reset_transient = 0;
 +                      subflow->reset_reason = MPTCP_RST_EMPTCP;
 +                      tcp_send_active_reset(ssk, GFP_ATOMIC);
 +                      WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
 +                      return false;
 +              }
 +
 +              __mptcp_do_fallback(msk);
        }
  
 -      __mptcp_do_fallback(msk);
        skb = skb_peek(&ssk->sk_receive_queue);
        subflow->map_valid = 1;
        subflow->map_seq = READ_ONCE(msk->ack_seq);
Simple merge
Simple merge
Simple merge
@@@ -2690,92 -2583,7 +2717,92 @@@ fastclose_tests(
        fi
  }
  
- implicit_tests()
 +pedit_action_pkts()
 +{
 +      tc -n $ns2 -j -s action show action pedit index 100 | \
 +              grep "packets" | \
 +              sed 's/.*"packets":\([0-9]\+\),.*/\1/'
 +}
 +
 +fail_tests()
 +{
 +      # single subflow
 +      if reset_with_fail "Infinite map" 1; then
 +              run_tests $ns1 $ns2 10.0.1.1 128
 +              chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
 +              chk_fail_nr 1 -1 invert
 +      fi
 +}
 +
 +userspace_tests()
 +{
 +      # userspace pm type prevents add_addr
 +      if reset "userspace pm type prevents add_addr"; then
 +              set_userspace_pm $ns1
 +              pm_nl_set_limits $ns1 0 2
 +              pm_nl_set_limits $ns2 0 2
 +              pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 +              run_tests $ns1 $ns2 10.0.1.1
 +              chk_join_nr 0 0 0
 +              chk_add_nr 0 0
 +      fi
 +
 +      # userspace pm type does not echo add_addr without daemon
 +      if reset "userspace pm no echo w/o daemon"; then
 +              set_userspace_pm $ns2
 +              pm_nl_set_limits $ns1 0 2
 +              pm_nl_set_limits $ns2 0 2
 +              pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 +              run_tests $ns1 $ns2 10.0.1.1
 +              chk_join_nr 0 0 0
 +              chk_add_nr 1 0
 +      fi
 +
 +      # userspace pm type rejects join
 +      if reset "userspace pm type rejects join"; then
 +              set_userspace_pm $ns1
 +              pm_nl_set_limits $ns1 1 1
 +              pm_nl_set_limits $ns2 1 1
 +              pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 +              run_tests $ns1 $ns2 10.0.1.1
 +              chk_join_nr 1 1 0
 +      fi
 +
 +      # userspace pm type does not send join
 +      if reset "userspace pm type does not send join"; then
 +              set_userspace_pm $ns2
 +              pm_nl_set_limits $ns1 1 1
 +              pm_nl_set_limits $ns2 1 1
 +              pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 +              run_tests $ns1 $ns2 10.0.1.1
 +              chk_join_nr 0 0 0
 +      fi
 +
 +      # userspace pm type prevents mp_prio
 +      if reset "userspace pm type prevents mp_prio"; then
 +              set_userspace_pm $ns1
 +              pm_nl_set_limits $ns1 1 1
 +              pm_nl_set_limits $ns2 1 1
 +              pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 +              run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
 +              chk_join_nr 1 1 0
 +              chk_prio_nr 0 0
 +      fi
 +
 +      # userspace pm type prevents rm_addr
 +      if reset "userspace pm type prevents rm_addr"; then
 +              set_userspace_pm $ns1
 +              set_userspace_pm $ns2
 +              pm_nl_set_limits $ns1 0 1
 +              pm_nl_set_limits $ns2 0 1
 +              pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 +              run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
 +              chk_join_nr 0 0 0
 +              chk_rm_nr 0 0
 +      fi
 +}
 +
+ endpoint_tests()
  {
        # userspace pm type prevents add_addr
        if reset "implicit EP"; then
@@@ -2843,9 -2668,7 +2887,9 @@@ all_tests_sorted=
        d@deny_join_id0_tests
        m@fullmesh_tests
        z@fastclose_tests
-       I@implicit_tests
 +      F@fail_tests
 +      u@userspace_tests
+       I@endpoint_tests
  )
  
  all_tests_args=""