net-zerocopy: Set zerocopy hint when data is copied
authorArjun Roy <arjunroy@google.com>
Wed, 2 Dec 2020 22:53:48 +0000 (14:53 -0800)
committerJakub Kicinski <kuba@kernel.org>
Fri, 4 Dec 2020 21:40:53 +0000 (13:40 -0800)
Set zerocopy hint, event when falling back to copy, so that the
pending data can be efficiently received using zerocopy when
possible.

Signed-off-by: Arjun Roy <arjunroy@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/tcp.c

index 05ec656..df6dd15 100644 (file)
@@ -1800,6 +1800,43 @@ static int find_next_mappable_frag(const skb_frag_t *frag,
        return offset;
 }
 
+static void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
+                                         struct tcp_zerocopy_receive *zc,
+                                         struct sk_buff *skb, u32 offset)
+{
+       u32 frag_offset, partial_frag_remainder = 0;
+       int mappable_offset;
+       skb_frag_t *frag;
+
+       /* worst case: skip to next skb. try to improve on this case below */
+       zc->recv_skip_hint = skb->len - offset;
+
+       /* Find the frag containing this offset (and how far into that frag) */
+       frag = skb_advance_to_frag(skb, offset, &frag_offset);
+       if (!frag)
+               return;
+
+       if (frag_offset) {
+               struct skb_shared_info *info = skb_shinfo(skb);
+
+               /* We read part of the last frag, must recvmsg() rest of skb. */
+               if (frag == &info->frags[info->nr_frags - 1])
+                       return;
+
+               /* Else, we must at least read the remainder in this frag. */
+               partial_frag_remainder = skb_frag_size(frag) - frag_offset;
+               zc->recv_skip_hint -= partial_frag_remainder;
+               ++frag;
+       }
+
+       /* partial_frag_remainder: If part way through a frag, must read rest.
+        * mappable_offset: Bytes till next mappable frag, *not* counting bytes
+        * in partial_frag_remainder.
+        */
+       mappable_offset = find_next_mappable_frag(frag, zc->recv_skip_hint);
+       zc->recv_skip_hint = mappable_offset + partial_frag_remainder;
+}
+
 static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
                              int nonblock, int flags,
                              struct scm_timestamping_internal *tss,
@@ -1830,6 +1867,14 @@ static int receive_fallback_to_copy(struct sock *sk,
                return err;
 
        zc->copybuf_len = err;
+       if (likely(zc->copybuf_len)) {
+               struct sk_buff *skb;
+               u32 offset;
+
+               skb = tcp_recv_skb(sk, tcp_sk(sk)->copied_seq, &offset);
+               if (skb)
+                       tcp_zerocopy_set_hint_for_skb(sk, zc, skb, offset);
+       }
        return 0;
 }