Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Dec 2019 04:35:03 +0000 (20:35 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 2 Dec 2019 04:35:03 +0000 (20:35 -0800)
Pull networking fixes from David Miller:

 1) Fix several scatter gather list issues in kTLS code, from Jakub
    Kicinski.

 2) macb driver device remove has to kill the hresp_err_tasklet. From
    Chuhong Yuan.

 3) Several memory leak and reference count bug fixes in tipc, from Tung
    Nguyen.

 4) Fix mlx5 build error w/o ipv6, from Yue Haibing.

 5) Fix jumbo frame and other regressions in r8169, from Heiner
    Kallweit.

 6) Undo some BUG_ON()'s and replace them with WARN_ON_ONCE and proper
    error propagation/handling. From Paolo Abeni.

* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (24 commits)
  openvswitch: remove another BUG_ON()
  openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info()
  net: phy: realtek: fix using paged operations with RTL8105e / RTL8208
  r8169: fix resume on cable plug-in
  r8169: fix jumbo configuration for RTL8168evl
  net: emulex: benet: indent a Kconfig depends continuation line
  selftests: forwarding: fix race between packet receive and tc check
  net: sched: fix `tc -s class show` no bstats on class with nolock subqueues
  net: ethernet: ti: ale: ensure vlan/mdb deleted when no members
  net/mlx5e: Fix build error without IPV6
  selftests: pmtu: use -oneline for ip route list cache
  tipc: fix duplicate SYN messages under link congestion
  tipc: fix wrong timeout input for tipc_wait_for_cond()
  tipc: fix wrong socket reference counter after tipc_sk_timeout() returns
  tipc: fix potential memory leak in __tipc_sendmsg()
  net: macb: add missed tasklet_kill
  selftests: bpf: correct perror strings
  selftests: bpf: test_sockmap: handle file creation failures gracefully
  net/tls: use sg_next() to walk sg entries
  net/tls: remove the dead inplace_crypto code
  ...

24 files changed:
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/emulex/benet/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/ti/cpsw_ale.c
drivers/net/phy/realtek.c
include/linux/skmsg.h
include/net/tls.h
net/core/filter.c
net/core/skmsg.c
net/ipv4/tcp_bpf.c
net/openvswitch/datapath.c
net/sched/sch_mq.c
net/sched/sch_mqprio.c
net/sched/sch_multiq.c
net/sched/sch_prio.c
net/tipc/socket.c
net/tls/tls_main.c
net/tls/tls_sw.c
tools/testing/selftests/bpf/test_sockmap.c
tools/testing/selftests/bpf/xdping.c
tools/testing/selftests/net/forwarding/tc_common.sh
tools/testing/selftests/net/pmtu.sh
tools/testing/selftests/net/tls.c

index d5ae2e1..9c767ee 100644 (file)
@@ -4422,6 +4422,7 @@ static int macb_remove(struct platform_device *pdev)
                mdiobus_free(bp->mii_bus);
 
                unregister_netdev(dev);
+               tasklet_kill(&bp->hresp_err_tasklet);
                pm_runtime_disable(&pdev->dev);
                pm_runtime_dont_use_autosuspend(&pdev->dev);
                if (!pm_runtime_suspended(&pdev->dev)) {
index 17d300e..f51dca1 100644 (file)
@@ -49,4 +49,4 @@ config BE2NET_SKYHAWK
 
 comment "WARNING: be2net is useless without any enabled chip"
        depends on BE2NET_BE2=n && BE2NET_BE3=n && BE2NET_LANCER=n && \
-       BE2NET_SKYHAWK=n && BE2NET
+               BE2NET_SKYHAWK=n && BE2NET
index 784b1e2..6ed8753 100644 (file)
@@ -130,42 +130,6 @@ static const char *mlx5e_netdev_kind(struct net_device *dev)
                return "unknown";
 }
 
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
-                                  struct net_device *mirred_dev,
-                                  struct net_device **out_dev,
-                                  struct net_device **route_dev,
-                                  struct flowi6 *fl6,
-                                  struct neighbour **out_n,
-                                  u8 *out_ttl)
-{
-       struct dst_entry *dst;
-       struct neighbour *n;
-
-       int ret;
-
-       ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
-                                        fl6);
-       if (ret < 0)
-               return ret;
-
-       if (!(*out_ttl))
-               *out_ttl = ip6_dst_hoplimit(dst);
-
-       ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
-       if (ret < 0) {
-               dst_release(dst);
-               return ret;
-       }
-
-       n = dst_neigh_lookup(dst, &fl6->daddr);
-       dst_release(dst);
-       if (!n)
-               return -ENOMEM;
-
-       *out_n = n;
-       return 0;
-}
-
 static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
                                      struct mlx5e_encap_entry *e)
 {
@@ -319,6 +283,43 @@ release_neigh:
        return err;
 }
 
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+                                  struct net_device *mirred_dev,
+                                  struct net_device **out_dev,
+                                  struct net_device **route_dev,
+                                  struct flowi6 *fl6,
+                                  struct neighbour **out_n,
+                                  u8 *out_ttl)
+{
+       struct dst_entry *dst;
+       struct neighbour *n;
+
+       int ret;
+
+       ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+                                        fl6);
+       if (ret < 0)
+               return ret;
+
+       if (!(*out_ttl))
+               *out_ttl = ip6_dst_hoplimit(dst);
+
+       ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
+       if (ret < 0) {
+               dst_release(dst);
+               return ret;
+       }
+
+       n = dst_neigh_lookup(dst, &fl6->daddr);
+       dst_release(dst);
+       if (!n)
+               return -ENOMEM;
+
+       *out_n = n;
+       return 0;
+}
+
 int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
                                    struct net_device *mirred_dev,
                                    struct mlx5e_encap_entry *e)
@@ -436,6 +437,7 @@ release_neigh:
        neigh_release(n);
        return err;
 }
+#endif
 
 bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
                                    struct net_device *netdev)
index d47a038..38d2126 100644 (file)
@@ -1542,6 +1542,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
        rtl_lock_config_regs(tp);
 
        device_set_wakeup_enable(tp_to_dev(tp), wolopts);
+       tp->dev->wol_enabled = wolopts ? 1 : 0;
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -3872,7 +3873,7 @@ static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28:
                r8168dp_hw_jumbo_enable(tp);
                break;
-       case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_34:
+       case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_33:
                r8168e_hw_jumbo_enable(tp);
                break;
        default:
index 929f3d3..ecdbde5 100644 (file)
@@ -384,7 +384,7 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask,
                       int flags, u16 vid)
 {
        u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
-       int mcast_members;
+       int mcast_members = 0;
        int idx;
 
        idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
@@ -397,11 +397,13 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask,
                mcast_members = cpsw_ale_get_port_mask(ale_entry,
                                                       ale->port_mask_bits);
                mcast_members &= ~port_mask;
+       }
+
+       if (mcast_members)
                cpsw_ale_set_port_mask(ale_entry, mcast_members,
                                       ale->port_mask_bits);
-       } else {
+       else
                cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
-       }
 
        cpsw_ale_write(ale, idx, ale_entry);
        return 0;
@@ -478,6 +480,10 @@ static void cpsw_ale_del_vlan_modify(struct cpsw_ale *ale, u32 *ale_entry,
        members = cpsw_ale_get_vlan_member_list(ale_entry,
                                                ale->vlan_field_bits);
        members &= ~port_mask;
+       if (!members) {
+               cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
+               return;
+       }
 
        untag = cpsw_ale_get_vlan_untag_force(ale_entry,
                                              ale->vlan_field_bits);
index 677c459..476db53 100644 (file)
@@ -439,6 +439,15 @@ static struct phy_driver realtek_drvs[] = {
                .resume         = genphy_resume,
                .read_page      = rtl821x_read_page,
                .write_page     = rtl821x_write_page,
+       }, {
+               PHY_ID_MATCH_MODEL(0x001cc880),
+               .name           = "RTL8208 Fast Ethernet",
+               .read_mmd       = genphy_read_mmd_unsupported,
+               .write_mmd      = genphy_write_mmd_unsupported,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .read_page      = rtl821x_read_page,
+               .write_page     = rtl821x_write_page,
        }, {
                PHY_ID_MATCH_EXACT(0x001cc910),
                .name           = "RTL8211 Gigabit Ethernet",
index 6cb077b..ef7031f 100644 (file)
@@ -14,6 +14,7 @@
 #include <net/strparser.h>
 
 #define MAX_MSG_FRAGS                  MAX_SKB_FRAGS
+#define NR_MSG_FRAG_IDS                        (MAX_MSG_FRAGS + 1)
 
 enum __sk_action {
        __SK_DROP = 0,
@@ -29,13 +30,15 @@ struct sk_msg_sg {
        u32                             size;
        u32                             copybreak;
        unsigned long                   copy;
-       /* The extra element is used for chaining the front and sections when
-        * the list becomes partitioned (e.g. end < start). The crypto APIs
-        * require the chaining.
+       /* The extra two elements:
+        * 1) used for chaining the front and sections when the list becomes
+        *    partitioned (e.g. end < start). The crypto APIs require the
+        *    chaining;
+        * 2) to chain tailer SG entries after the message.
         */
-       struct scatterlist              data[MAX_MSG_FRAGS + 1];
+       struct scatterlist              data[MAX_MSG_FRAGS + 2];
 };
-static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS);
+static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS);
 
 /* UAPI in filter.c depends on struct sk_msg_sg being first element. */
 struct sk_msg {
@@ -142,13 +145,13 @@ static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
 
 static inline u32 sk_msg_iter_dist(u32 start, u32 end)
 {
-       return end >= start ? end - start : end + (MAX_MSG_FRAGS - start);
+       return end >= start ? end - start : end + (NR_MSG_FRAG_IDS - start);
 }
 
 #define sk_msg_iter_var_prev(var)                      \
        do {                                            \
                if (var == 0)                           \
-                       var = MAX_MSG_FRAGS - 1;        \
+                       var = NR_MSG_FRAG_IDS - 1;      \
                else                                    \
                        var--;                          \
        } while (0)
@@ -156,7 +159,7 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end)
 #define sk_msg_iter_var_next(var)                      \
        do {                                            \
                var++;                                  \
-               if (var == MAX_MSG_FRAGS)               \
+               if (var == NR_MSG_FRAG_IDS)             \
                        var = 0;                        \
        } while (0)
 
@@ -173,9 +176,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg)
 
 static inline void sk_msg_init(struct sk_msg *msg)
 {
-       BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS);
+       BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS);
        memset(msg, 0, sizeof(*msg));
-       sg_init_marker(msg->sg.data, MAX_MSG_FRAGS);
+       sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
 }
 
 static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
@@ -196,14 +199,11 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
 
 static inline bool sk_msg_full(const struct sk_msg *msg)
 {
-       return (msg->sg.end == msg->sg.start) && msg->sg.size;
+       return sk_msg_iter_dist(msg->sg.start, msg->sg.end) == MAX_MSG_FRAGS;
 }
 
 static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
 {
-       if (sk_msg_full(msg))
-               return MAX_MSG_FRAGS;
-
        return sk_msg_iter_dist(msg->sg.start, msg->sg.end);
 }
 
index 6ed91e8..df630f5 100644 (file)
@@ -100,7 +100,6 @@ struct tls_rec {
        struct list_head list;
        int tx_ready;
        int tx_flags;
-       int inplace_crypto;
 
        struct sk_msg msg_plaintext;
        struct sk_msg msg_encrypted;
@@ -377,7 +376,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx,
                int flags);
 int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
                            int flags);
-bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
+void tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
 
 static inline struct tls_msg *tls_msg(struct sk_buff *skb)
 {
index b0ed048..f1e703e 100644 (file)
@@ -2299,7 +2299,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
        WARN_ON_ONCE(last_sge == first_sge);
        shift = last_sge > first_sge ?
                last_sge - first_sge - 1 :
-               MAX_SKB_FRAGS - first_sge + last_sge - 1;
+               NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
        if (!shift)
                goto out;
 
@@ -2308,8 +2308,8 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
        do {
                u32 move_from;
 
-               if (i + shift >= MAX_MSG_FRAGS)
-                       move_from = i + shift - MAX_MSG_FRAGS;
+               if (i + shift >= NR_MSG_FRAG_IDS)
+                       move_from = i + shift - NR_MSG_FRAG_IDS;
                else
                        move_from = i + shift;
                if (move_from == msg->sg.end)
@@ -2323,7 +2323,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
        } while (1);
 
        msg->sg.end = msg->sg.end - shift > msg->sg.end ?
-                     msg->sg.end - shift + MAX_MSG_FRAGS :
+                     msg->sg.end - shift + NR_MSG_FRAG_IDS :
                      msg->sg.end - shift;
 out:
        msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
index a469d21..ded2d52 100644 (file)
@@ -421,7 +421,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
        copied = skb->len;
        msg->sg.start = 0;
        msg->sg.size = copied;
-       msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge;
+       msg->sg.end = num_sge;
        msg->skb = skb;
 
        sk_psock_queue_msg(psock, msg);
index 8a56e09..e387051 100644 (file)
@@ -301,7 +301,7 @@ EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
 static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
                                struct sk_msg *msg, int *copied, int flags)
 {
-       bool cork = false, enospc = msg->sg.start == msg->sg.end;
+       bool cork = false, enospc = sk_msg_full(msg);
        struct sock *sk_redir;
        u32 tosend, delta = 0;
        int ret;
index 293d528..1047e80 100644 (file)
@@ -905,7 +905,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
        retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
                                        info->snd_portid, info->snd_seq, 0,
                                        cmd, ufid_flags);
-       BUG_ON(retval < 0);
+       if (WARN_ON_ONCE(retval < 0)) {
+               kfree_skb(skb);
+               skb = ERR_PTR(retval);
+       }
        return skb;
 }
 
@@ -1369,7 +1372,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
                                                     OVS_FLOW_CMD_DEL,
                                                     ufid_flags);
                        rcu_read_unlock();
-                       BUG_ON(err < 0);
+                       if (WARN_ON_ONCE(err < 0)) {
+                               kfree_skb(reply);
+                               goto out_free;
+                       }
 
                        ovs_notify(&dp_flow_genl_family, reply, info);
                } else {
@@ -1377,6 +1383,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
                }
        }
 
+out_free:
        ovs_flow_free(flow, true);
        return 0;
 unlock:
index 0d57833..278c0b2 100644 (file)
@@ -245,7 +245,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
        struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
        sch = dev_queue->qdisc_sleeping;
-       if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+       if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
+                                 &sch->bstats) < 0 ||
            qdisc_qstats_copy(d, sch) < 0)
                return -1;
        return 0;
index 46980b8..0d0113a 100644 (file)
@@ -557,8 +557,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
                struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 
                sch = dev_queue->qdisc_sleeping;
-               if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                         d, NULL, &sch->bstats) < 0 ||
+               if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
+                                         sch->cpu_bstats, &sch->bstats) < 0 ||
                    qdisc_qstats_copy(d, sch) < 0)
                        return -1;
        }
index b2b7fdb..1330ad2 100644 (file)
@@ -339,7 +339,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
        cl_q = q->queues[cl - 1];
        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl_q->bstats) < 0 ||
+                                 d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
            qdisc_qstats_copy(d, cl_q) < 0)
                return -1;
 
index 0f8fedb..18b884c 100644 (file)
@@ -356,7 +356,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
        cl_q = q->queues[cl - 1];
        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-                                 d, NULL, &cl_q->bstats) < 0 ||
+                                 d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
            qdisc_qstats_copy(d, cl_q) < 0)
                return -1;
 
index a1c8d72..41688da 100644 (file)
@@ -532,7 +532,7 @@ static void __tipc_shutdown(struct socket *sock, int error)
        struct sock *sk = sock->sk;
        struct tipc_sock *tsk = tipc_sk(sk);
        struct net *net = sock_net(sk);
-       long timeout = CONN_TIMEOUT_DEFAULT;
+       long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
        u32 dnode = tsk_peer_node(tsk);
        struct sk_buff *skb;
 
@@ -540,12 +540,10 @@ static void __tipc_shutdown(struct socket *sock, int error)
        tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
                                            !tsk_conn_cong(tsk)));
 
-       /* Push out unsent messages or remove if pending SYN */
-       skb = skb_peek(&sk->sk_write_queue);
-       if (skb && !msg_is_syn(buf_msg(skb)))
-               tipc_sk_push_backlog(tsk);
-       else
-               __skb_queue_purge(&sk->sk_write_queue);
+       /* Push out delayed messages if in Nagle mode */
+       tipc_sk_push_backlog(tsk);
+       /* Remove pending SYN */
+       __skb_queue_purge(&sk->sk_write_queue);
 
        /* Reject all unreceived messages, except on an active connection
         * (which disconnects locally & sends a 'FIN+' to peer).
@@ -1248,9 +1246,14 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk)
        struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
        struct net *net = sock_net(&tsk->sk);
        u32 dnode = tsk_peer_node(tsk);
+       struct sk_buff *skb = skb_peek(txq);
        int rc;
 
-       if (skb_queue_empty(txq) || tsk->cong_link_cnt)
+       if (!skb || tsk->cong_link_cnt)
+               return;
+
+       /* Do not send SYN again after congestion */
+       if (msg_is_syn(buf_msg(skb)))
                return;
 
        tsk->snt_unacked += tsk->snd_backlog;
@@ -1447,8 +1450,10 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
        rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
        if (unlikely(rc != dlen))
                return rc;
-       if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
+       if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) {
+               __skb_queue_purge(&pkts);
                return -ENOMEM;
+       }
 
        trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
        rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
@@ -2757,6 +2762,7 @@ static void tipc_sk_timeout(struct timer_list *t)
        if (sock_owned_by_user(sk)) {
                sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
                bh_unlock_sock(sk);
+               sock_put(sk);
                return;
        }
 
index bdca31f..b3da6c5 100644 (file)
@@ -209,24 +209,15 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
        return tls_push_sg(sk, ctx, sg, offset, flags);
 }
 
-bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
+void tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
 {
        struct scatterlist *sg;
 
-       sg = ctx->partially_sent_record;
-       if (!sg)
-               return false;
-
-       while (1) {
+       for (sg = ctx->partially_sent_record; sg; sg = sg_next(sg)) {
                put_page(sg_page(sg));
                sk_mem_uncharge(sk, sg->length);
-
-               if (sg_is_last(sg))
-                       break;
-               sg++;
        }
        ctx->partially_sent_record = NULL;
-       return true;
 }
 
 static void tls_write_space(struct sock *sk)
index da9f9ce..2b2d0ba 100644 (file)
@@ -710,8 +710,7 @@ static int tls_push_record(struct sock *sk, int flags,
        }
 
        i = msg_pl->sg.start;
-       sg_chain(rec->sg_aead_in, 2, rec->inplace_crypto ?
-                &msg_en->sg.data[i] : &msg_pl->sg.data[i]);
+       sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
 
        i = msg_en->sg.end;
        sk_msg_iter_var_prev(i);
@@ -771,8 +770,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 
        policy = !(flags & MSG_SENDPAGE_NOPOLICY);
        psock = sk_psock_get(sk);
-       if (!psock || !policy)
-               return tls_push_record(sk, flags, record_type);
+       if (!psock || !policy) {
+               err = tls_push_record(sk, flags, record_type);
+               if (err) {
+                       *copied -= sk_msg_free(sk, msg);
+                       tls_free_open_rec(sk);
+               }
+               return err;
+       }
 more_data:
        enospc = sk_msg_full(msg);
        if (psock->eval == __SK_NONE) {
@@ -970,8 +975,6 @@ alloc_encrypted:
                        if (ret)
                                goto fallback_to_reg_send;
 
-                       rec->inplace_crypto = 0;
-
                        num_zc++;
                        copied += try_to_copy;
 
@@ -984,7 +987,7 @@ alloc_encrypted:
                                        num_async++;
                                else if (ret == -ENOMEM)
                                        goto wait_for_memory;
-                               else if (ret == -ENOSPC)
+                               else if (ctx->open_rec && ret == -ENOSPC)
                                        goto rollback_iter;
                                else if (ret != -EAGAIN)
                                        goto send_end;
@@ -1053,11 +1056,12 @@ wait_for_memory:
                ret = sk_stream_wait_memory(sk, &timeo);
                if (ret) {
 trim_sgl:
-                       tls_trim_both_msgs(sk, orig_size);
+                       if (ctx->open_rec)
+                               tls_trim_both_msgs(sk, orig_size);
                        goto send_end;
                }
 
-               if (msg_en->sg.size < required_size)
+               if (ctx->open_rec && msg_en->sg.size < required_size)
                        goto alloc_encrypted;
        }
 
@@ -1169,7 +1173,6 @@ alloc_payload:
 
                tls_ctx->pending_open_record_frags = true;
                if (full_record || eor || sk_msg_full(msg_pl)) {
-                       rec->inplace_crypto = 0;
                        ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
                                                  record_type, &copied, flags);
                        if (ret) {
@@ -1190,11 +1193,13 @@ wait_for_sndbuf:
 wait_for_memory:
                ret = sk_stream_wait_memory(sk, &timeo);
                if (ret) {
-                       tls_trim_both_msgs(sk, msg_pl->sg.size);
+                       if (ctx->open_rec)
+                               tls_trim_both_msgs(sk, msg_pl->sg.size);
                        goto sendpage_end;
                }
 
-               goto alloc_payload;
+               if (ctx->open_rec)
+                       goto alloc_payload;
        }
 
        if (num_async) {
@@ -2084,7 +2089,8 @@ void tls_sw_release_resources_tx(struct sock *sk)
        /* Free up un-sent records in tx_list. First, free
         * the partially sent record if any at head of tx_list.
         */
-       if (tls_free_partial_record(sk, tls_ctx)) {
+       if (tls_ctx->partially_sent_record) {
+               tls_free_partial_record(sk, tls_ctx);
                rec = list_first_entry(&ctx->tx_list,
                                       struct tls_rec, list);
                list_del(&rec->list);
index 3845144..4a85151 100644 (file)
@@ -240,14 +240,14 @@ static int sockmap_init_sockets(int verbose)
        addr.sin_port = htons(S1_PORT);
        err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
        if (err < 0) {
-               perror("bind s1 failed()\n");
+               perror("bind s1 failed()");
                return errno;
        }
 
        addr.sin_port = htons(S2_PORT);
        err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
        if (err < 0) {
-               perror("bind s2 failed()\n");
+               perror("bind s2 failed()");
                return errno;
        }
 
@@ -255,14 +255,14 @@ static int sockmap_init_sockets(int verbose)
        addr.sin_port = htons(S1_PORT);
        err = listen(s1, 32);
        if (err < 0) {
-               perror("listen s1 failed()\n");
+               perror("listen s1 failed()");
                return errno;
        }
 
        addr.sin_port = htons(S2_PORT);
        err = listen(s2, 32);
        if (err < 0) {
-               perror("listen s1 failed()\n");
+               perror("listen s1 failed()");
                return errno;
        }
 
@@ -270,14 +270,14 @@ static int sockmap_init_sockets(int verbose)
        addr.sin_port = htons(S1_PORT);
        err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
        if (err < 0 && errno != EINPROGRESS) {
-               perror("connect c1 failed()\n");
+               perror("connect c1 failed()");
                return errno;
        }
 
        addr.sin_port = htons(S2_PORT);
        err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
        if (err < 0 && errno != EINPROGRESS) {
-               perror("connect c2 failed()\n");
+               perror("connect c2 failed()");
                return errno;
        } else if (err < 0) {
                err = 0;
@@ -286,13 +286,13 @@ static int sockmap_init_sockets(int verbose)
        /* Accept Connecrtions */
        p1 = accept(s1, NULL, NULL);
        if (p1 < 0) {
-               perror("accept s1 failed()\n");
+               perror("accept s1 failed()");
                return errno;
        }
 
        p2 = accept(s2, NULL, NULL);
        if (p2 < 0) {
-               perror("accept s1 failed()\n");
+               perror("accept s1 failed()");
                return errno;
        }
 
@@ -332,6 +332,10 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
        int i, fp;
 
        file = fopen(".sendpage_tst.tmp", "w+");
+       if (!file) {
+               perror("create file for sendpage");
+               return 1;
+       }
        for (i = 0; i < iov_length * cnt; i++, k++)
                fwrite(&k, sizeof(char), 1, file);
        fflush(file);
@@ -339,12 +343,17 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
        fclose(file);
 
        fp = open(".sendpage_tst.tmp", O_RDONLY);
+       if (fp < 0) {
+               perror("reopen file for sendpage");
+               return 1;
+       }
+
        clock_gettime(CLOCK_MONOTONIC, &s->start);
        for (i = 0; i < cnt; i++) {
                int sent = sendfile(fd, fp, NULL, iov_length);
 
                if (!drop && sent < 0) {
-                       perror("send loop error:");
+                       perror("send loop error");
                        close(fp);
                        return sent;
                } else if (drop && sent >= 0) {
@@ -463,7 +472,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                        int sent = sendmsg(fd, &msg, flags);
 
                        if (!drop && sent < 0) {
-                               perror("send loop error:");
+                               perror("send loop error");
                                goto out_errno;
                        } else if (drop && sent >= 0) {
                                printf("send loop error expected: %i\n", sent);
@@ -499,7 +508,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                total_bytes -= txmsg_pop_total;
                err = clock_gettime(CLOCK_MONOTONIC, &s->start);
                if (err < 0)
-                       perror("recv start time");
+                       perror("recv start time");
                while (s->bytes_recvd < total_bytes) {
                        if (txmsg_cork) {
                                timeout.tv_sec = 0;
@@ -543,7 +552,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                        if (recv < 0) {
                                if (errno != EWOULDBLOCK) {
                                        clock_gettime(CLOCK_MONOTONIC, &s->end);
-                                       perror("recv failed()\n");
+                                       perror("recv failed()");
                                        goto out_errno;
                                }
                        }
@@ -557,7 +566,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 
                                errno = msg_verify_data(&msg, recv, chunk_sz);
                                if (errno) {
-                                       perror("data verify msg failed\n");
+                                       perror("data verify msg failed");
                                        goto out_errno;
                                }
                                if (recvp) {
@@ -565,7 +574,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                                                                recvp,
                                                                chunk_sz);
                                        if (errno) {
-                                               perror("data verify msg_peek failed\n");
+                                               perror("data verify msg_peek failed");
                                                goto out_errno;
                                        }
                                }
@@ -654,7 +663,7 @@ static int sendmsg_test(struct sockmap_options *opt)
                        err = 0;
                exit(err ? 1 : 0);
        } else if (rxpid == -1) {
-               perror("msg_loop_rx");
+               perror("msg_loop_rx");
                return errno;
        }
 
@@ -681,7 +690,7 @@ static int sendmsg_test(struct sockmap_options *opt)
                                s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
                exit(err ? 1 : 0);
        } else if (txpid == -1) {
-               perror("msg_loop_tx");
+               perror("msg_loop_tx");
                return errno;
        }
 
@@ -715,7 +724,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
        /* Ping/Pong data from client to server */
        sc = send(c1, buf, sizeof(buf), 0);
        if (sc < 0) {
-               perror("send failed()\n");
+               perror("send failed()");
                return sc;
        }
 
@@ -748,7 +757,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
                        rc = recv(i, buf, sizeof(buf), 0);
                        if (rc < 0) {
                                if (errno != EWOULDBLOCK) {
-                                       perror("recv failed()\n");
+                                       perror("recv failed()");
                                        return rc;
                                }
                        }
@@ -760,7 +769,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
 
                        sc = send(i, buf, rc, 0);
                        if (sc < 0) {
-                               perror("send failed()\n");
+                               perror("send failed()");
                                return sc;
                        }
                }
index d60a343..842d915 100644 (file)
@@ -45,7 +45,7 @@ static int get_stats(int fd, __u16 count, __u32 raddr)
        printf("\nXDP RTT data:\n");
 
        if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) {
-               perror("bpf_map_lookup elem");
+               perror("bpf_map_lookup elem");
                return 1;
        }
 
index d93589b..64f6526 100644 (file)
@@ -3,16 +3,42 @@
 
 CHECK_TC="yes"
 
+# Can be overridden by the configuration file. See lib.sh
+TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
+
+__tc_check_packets()
+{
+       local id=$1
+       local handle=$2
+       local count=$3
+       local operator=$4
+
+       start_time="$(date -u +%s%3N)"
+       while true
+       do
+               cmd_jq "tc -j -s filter show $id" \
+                      ".[] | select(.options.handle == $handle) | \
+                           select(.options.actions[0].stats.packets $operator $count)" \
+                   &> /dev/null
+               ret=$?
+               if [[ $ret -eq 0 ]]; then
+                       return $ret
+               fi
+               current_time="$(date -u +%s%3N)"
+               diff=$(expr $current_time - $start_time)
+               if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then
+                       return 1
+               fi
+       done
+}
+
 tc_check_packets()
 {
        local id=$1
        local handle=$2
        local count=$3
 
-       cmd_jq "tc -j -s filter show $id" \
-              ".[] | select(.options.handle == $handle) | \
-                     select(.options.actions[0].stats.packets == $count)" \
-              &> /dev/null
+       __tc_check_packets "$id" "$handle" "$count" "=="
 }
 
 tc_check_packets_hitting()
@@ -20,8 +46,5 @@ tc_check_packets_hitting()
        local id=$1
        local handle=$2
 
-       cmd_jq "tc -j -s filter show $id" \
-              ".[] | select(.options.handle == $handle) | \
-                     select(.options.actions[0].stats.packets > 0)" \
-              &> /dev/null
+       __tc_check_packets "$id" "$handle" 0 ">"
 }
index ab367e7..d697815 100755 (executable)
@@ -1249,8 +1249,7 @@ test_list_flush_ipv4_exception() {
        done
        run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
 
-       # Each exception is printed as two lines
-       if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then
+       if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
                err "  can't list cached exceptions"
                fail=1
        fi
@@ -1300,7 +1299,7 @@ test_list_flush_ipv6_exception() {
                run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
        done
        run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
-       if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then
+       if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
                err "  can't list cached exceptions"
                fail=1
        fi
index 1c8f194..46abcae 100644 (file)
@@ -268,6 +268,38 @@ TEST_F(tls, sendmsg_single)
        EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+#define MAX_FRAGS      64
+#define SEND_LEN       13
+TEST_F(tls, sendmsg_fragmented)
+{
+       char const *test_str = "test_sendmsg";
+       char buf[SEND_LEN * MAX_FRAGS];
+       struct iovec vec[MAX_FRAGS];
+       struct msghdr msg;
+       int i, frags;
+
+       for (frags = 1; frags <= MAX_FRAGS; frags++) {
+               for (i = 0; i < frags; i++) {
+                       vec[i].iov_base = (char *)test_str;
+                       vec[i].iov_len = SEND_LEN;
+               }
+
+               memset(&msg, 0, sizeof(struct msghdr));
+               msg.msg_iov = vec;
+               msg.msg_iovlen = frags;
+
+               EXPECT_EQ(sendmsg(self->fd, &msg, 0), SEND_LEN * frags);
+               EXPECT_EQ(recv(self->cfd, buf, SEND_LEN * frags, MSG_WAITALL),
+                         SEND_LEN * frags);
+
+               for (i = 0; i < frags; i++)
+                       EXPECT_EQ(memcmp(buf + SEND_LEN * i,
+                                        test_str, SEND_LEN), 0);
+       }
+}
+#undef MAX_FRAGS
+#undef SEND_LEN
+
 TEST_F(tls, sendmsg_large)
 {
        void *mem = malloc(16384);
@@ -694,6 +726,34 @@ TEST_F(tls, recv_lowat)
        EXPECT_EQ(memcmp(send_mem, recv_mem + 10, 5), 0);
 }
 
+TEST_F(tls, recv_rcvbuf)
+{
+       char send_mem[4096];
+       char recv_mem[4096];
+       int rcv_buf = 1024;
+
+       memset(send_mem, 0x1c, sizeof(send_mem));
+
+       EXPECT_EQ(setsockopt(self->cfd, SOL_SOCKET, SO_RCVBUF,
+                            &rcv_buf, sizeof(rcv_buf)), 0);
+
+       EXPECT_EQ(send(self->fd, send_mem, 512, 0), 512);
+       memset(recv_mem, 0, sizeof(recv_mem));
+       EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0), 512);
+       EXPECT_EQ(memcmp(send_mem, recv_mem, 512), 0);
+
+       if (self->notls)
+               return;
+
+       EXPECT_EQ(send(self->fd, send_mem, 4096, 0), 4096);
+       memset(recv_mem, 0, sizeof(recv_mem));
+       EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0), -1);
+       EXPECT_EQ(errno, EMSGSIZE);
+
+       EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0), -1);
+       EXPECT_EQ(errno, EMSGSIZE);
+}
+
 TEST_F(tls, bidir)
 {
        char const *test_str = "test_read";