net: Add skb->mono_delivery_time to distinguish mono delivery_time from (rcv) timestamp

author Martin KaFai Lau <kafai@fb.com>

Wed, 2 Mar 2022 19:55:25 +0000 (11:55 -0800)

committer David S. Miller <davem@davemloft.net>

Thu, 3 Mar 2022 14:38:48 +0000 (14:38 +0000)
author Martin KaFai Lau <kafai@fb.com>
Wed, 2 Mar 2022 19:55:25 +0000 (11:55 -0800)
committer David S. Miller <davem@davemloft.net>
Thu, 3 Mar 2022 14:38:48 +0000 (14:38 +0000)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index d67941f..803ffa6 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -795,6 +795,10 @@ typedef unsigned char *sk_buff_data_t;
   *     @dst_pending_confirm: need to confirm neighbour
   *     @decrypted: Decrypted SKB
   *     @slow_gro: state present at GRO time, slower prepare step required
+ *     @mono_delivery_time: When set, skb->tstamp has the
+ *             delivery_time in mono clock base (i.e. EDT).  Otherwise, the
+ *             skb->tstamp has the (rcv) timestamp at ingress and
+ *             delivery_time at egress.
   *     @napi_id: id of the NAPI struct this skb came from
   *     @sender_cpu: (aka @napi_id) source CPU in XPS
   *     @secmark: security marking
@@ -965,6 +969,7 @@ struct sk_buff {
         __u8                    decrypted:1;
  #endif
         __u8                    slow_gro:1;
+       __u8                    mono_delivery_time:1;
  
  #ifdef CONFIG_NET_SCHED
         __u16                   tc_index;       /* traffic control index */
@@ -3983,6 +3988,14 @@ static inline ktime_t net_timedelta(ktime_t t)
         return ktime_sub(ktime_get_real(), t);
  }
  
+static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
+                                        bool mono)
+{
+       skb->tstamp = kt;
+       /* Setting mono_delivery_time will be enabled later */
+       skb->mono_delivery_time = 0;
+}
+
  static inline u8 skb_metadata_len(const struct sk_buff *skb)
  {
         return skb_shinfo(skb)->meta_len;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c

index fdbed31..ebfb2a5 100644 (file)
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -32,6 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
                                            struct sk_buff *))
  {
         int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+       bool mono_delivery_time = skb->mono_delivery_time;
         unsigned int hlen, ll_rs, mtu;
         ktime_t tstamp = skb->tstamp;
         struct ip_frag_state state;
@@ -81,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
                         if (iter.frag)
                                 ip_fraglist_prepare(skb, &iter);
  
-                       skb->tstamp = tstamp;
+                       skb_set_delivery_time(skb, tstamp, mono_delivery_time);
                         err = output(net, sk, data, skb);
                         if (err || !iter.frag)
                                 break;
@@ -112,7 +113,7 @@ slow_path:
                         goto blackhole;
                 }
  
-               skb2->tstamp = tstamp;
+               skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
                 err = output(net, sk, data, skb2);
                 if (err)
                         goto blackhole;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 6df3545..a9588e0 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -761,6 +761,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
  {
         struct iphdr *iph;
         struct sk_buff *skb2;
+       bool mono_delivery_time = skb->mono_delivery_time;
         struct rtable *rt = skb_rtable(skb);
         unsigned int mtu, hlen, ll_rs;
         struct ip_fraglist_iter iter;
@@ -852,7 +853,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                                 }
                         }
  
-                       skb->tstamp = tstamp;
+                       skb_set_delivery_time(skb, tstamp, mono_delivery_time);
                         err = output(net, sk, skb);
  
                         if (!err)
@@ -908,7 +909,7 @@ slow_path:
                 /*
                  *      Put this fragment into the sending queue.
                  */
-               skb2->tstamp = tstamp;
+               skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
                 err = output(net, sk, skb2);
                 if (err)
                         goto fail;
@@ -1727,6 +1728,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                           arg->csumoffset) = csum_fold(csum_add(nskb->csum,
                                                                 arg->csum));
                 nskb->ip_summed = CHECKSUM_NONE;
+               /* Setting mono_delivery_time will be enabled later */
+               nskb->mono_delivery_time = 0;
                 ip_push_pending_frames(sk, &fl4);
         }
  out:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index e76bf1e..2319531 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1253,7 +1253,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
         tp = tcp_sk(sk);
         prior_wstamp = tp->tcp_wstamp_ns;
         tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
-       skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+       skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
         if (clone_it) {
                 oskb = skb;
  
@@ -1589,7 +1589,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
  
         skb_split(skb, buff, len);
  
-       buff->tstamp = skb->tstamp;
+       skb_set_delivery_time(buff, skb->tstamp, true);
         tcp_fragment_tstamp(skb, buff);
  
         old_factor = tcp_skb_pcount(skb);
@@ -2616,7 +2616,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
  
                 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
                         /* "skb_mstamp_ns" is used as a start point for the retransmit timer */
-                       skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+                       tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+                       skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
                         list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
                         tcp_init_tso_segs(skb, mss_now);
                         goto repair; /* Skip network transmission */
@@ -3541,11 +3542,12 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
         now = tcp_clock_ns();
  #ifdef CONFIG_SYN_COOKIES
         if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
-               skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
+               skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
+                                     true);
         else
  #endif
         {
-               skb->skb_mstamp_ns = now;
+               skb_set_delivery_time(skb, now, true);
                 if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
                         tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
         }
@@ -3594,7 +3596,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
         bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
                                 synack_type, &opts);
  
-       skb->skb_mstamp_ns = now;
+       skb_set_delivery_time(skb, now, true);
         tcp_add_tx_delay(skb, tp);
  
         return skb;
@@ -3771,7 +3773,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
  
         err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
  
-       syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
+       skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
  
         /* Now full SYN+DATA was cloned and sent (or not),
          * remove the SYN from the original skb (syn_data)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index c5edc86..dad4e3d 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -813,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
                                 inet6_sk(skb->sk) : NULL;
+       bool mono_delivery_time = skb->mono_delivery_time;
         struct ip6_frag_state state;
         unsigned int mtu, hlen, nexthdr_offset;
         ktime_t tstamp = skb->tstamp;
@@ -903,7 +904,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                         if (iter.frag)
                                 ip6_fraglist_prepare(skb, &iter);
  
-                       skb->tstamp = tstamp;
+                       skb_set_delivery_time(skb, tstamp, mono_delivery_time);
                         err = output(net, sk, skb);
                         if (!err)
                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -962,7 +963,7 @@ slow_path:
                 /*
                  *      Put this fragment into the sending queue.
                  */
-               frag->tstamp = tstamp;
+               skb_set_delivery_time(frag, tstamp, mono_delivery_time);
                 err = output(net, sk, frag);
                 if (err)
                         goto fail;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c

index 6ab710b..1da3324 100644 (file)
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -121,6 +121,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                                   struct sk_buff *))
  {
         int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+       bool mono_delivery_time = skb->mono_delivery_time;
         ktime_t tstamp = skb->tstamp;
         struct ip6_frag_state state;
         u8 *prevhdr, nexthdr = 0;
@@ -186,7 +187,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                         if (iter.frag)
                                 ip6_fraglist_prepare(skb, &iter);
  
-                       skb->tstamp = tstamp;
+                       skb_set_delivery_time(skb, tstamp, mono_delivery_time);
                         err = output(net, sk, data, skb);
                         if (err || !iter.frag)
                                 break;
@@ -219,7 +220,7 @@ slow_path:
                         goto blackhole;
                 }
  
-               skb2->tstamp = tstamp;
+               skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
                 err = output(net, sk, data, skb2);
                 if (err)
                         goto blackhole;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index e98af86..cb2bb7d 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -940,7 +940,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
                 } else {
                         mark = sk->sk_mark;
                 }
-               buff->tstamp = tcp_transmit_time(sk);
+               skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
         }
         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
         fl6.fl6_dport = t1->dest;
author	Martin KaFai Lau <kafai@fb.com>
	Wed, 2 Mar 2022 19:55:25 +0000 (11:55 -0800)
committer	David S. Miller <davem@davemloft.net>
	Thu, 3 Mar 2022 14:38:48 +0000 (14:38 +0000)
include/linux/skbuff.h		patch \| blob \| history
net/bridge/netfilter/nf_conntrack_bridge.c		patch \| blob \| history
net/ipv4/ip_output.c		patch \| blob \| history
net/ipv4/tcp_output.c		patch \| blob \| history
net/ipv6/ip6_output.c		patch \| blob \| history
net/ipv6/netfilter.c		patch \| blob \| history
net/ipv6/tcp_ipv6.c		patch \| blob \| history