Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

[linux-2.6-microblaze.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 3ec4edc..ab5f0ea 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
  
         if (sk->sk_sndbuf < sndmem)
                 WRITE_ONCE(sk->sk_sndbuf,
-                          min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
+                          min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
  }
  
  /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
         struct tcp_sock *tp = tcp_sk(sk);
         /* Optimize this! */
         int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
-       int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+       int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
  
         while (tp->rcv_ssthresh <= window) {
                 if (truesize <= skb->len)
@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
   */
  static void tcp_init_buffer_space(struct sock *sk)
  {
-       int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+       int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
         struct tcp_sock *tp = tcp_sk(sk);
         int maxwin;
  
@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct net *net = sock_net(sk);
+       int rmem2;
  
         icsk->icsk_ack.quick = 0;
+       rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
  
-       if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
+       if (sk->sk_rcvbuf < rmem2 &&
             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
             !tcp_under_memory_pressure(sk) &&
             sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
                 WRITE_ONCE(sk->sk_rcvbuf,
-                          min(atomic_read(&sk->sk_rmem_alloc),
-                              net->ipv4.sysctl_tcp_rmem[2]));
+                          min(atomic_read(&sk->sk_rmem_alloc), rmem2));
         }
         if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
                 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -724,7 +725,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
          * <prev RTT . ><current RTT .. ><next RTT .... >
          */
  
-       if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
                 int rcvmem, rcvbuf;
                 u64 rcvwin, grow;
@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
  
                 do_div(rcvwin, tp->advmss);
                 rcvbuf = min_t(u64, rcvwin * rcvmem,
-                              sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+                              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
                 if (rcvbuf > sk->sk_rcvbuf) {
                         WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
  
@@ -805,7 +806,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
                          * restart window, so that we send ACKs quickly.
                          */
                         tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
-                       sk_mem_reclaim(sk);
                 }
         }
         icsk->icsk_ack.lrcvtime = now;
@@ -910,9 +910,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
          *       end of slow start and should slow down.
          */
         if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
-               rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
+               rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
         else
-               rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
+               rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
  
         rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
  
@@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
                          tp->undo_marker ? tp->undo_retrans : 0);
  #endif
                 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
-                                      sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+                                      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
         }
  
         /* This exciting event is worth to be remembered. 8) */
@@ -2030,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
                 return;
  
         tp->reordering = min_t(u32, tp->packets_out + addend,
-                              sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+                              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
         tp->reord_seen++;
         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
  }
@@ -2095,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
  
  static bool tcp_is_rack(const struct sock *sk)
  {
-       return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+               TCP_RACK_LOSS_DETECTION;
  }
  
  /* If we detect SACK reneging, forget all SACK information
@@ -2139,6 +2140,7 @@ void tcp_enter_loss(struct sock *sk)
         struct tcp_sock *tp = tcp_sk(sk);
         struct net *net = sock_net(sk);
         bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+       u8 reordering;
  
         tcp_timeout_mark_lost(sk);
  
@@ -2159,10 +2161,12 @@ void tcp_enter_loss(struct sock *sk)
         /* Timeout in disordered state after receiving substantial DUPACKs
          * suggests that the degree of reordering is over-estimated.
          */
+       reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
         if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
-           tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+           tp->sacked_out >= reordering)
                 tp->reordering = min_t(unsigned int, tp->reordering,
-                                      net->ipv4.sysctl_tcp_reordering);
+                                      reordering);
+
         tcp_set_ca_state(sk, TCP_CA_Loss);
         tp->high_seq = tp->snd_nxt;
         tcp_ecn_queue_cwr(tp);
@@ -2171,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk)
          * loss recovery is underway except recurring timeout(s) on
          * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
          */
-       tp->frto = net->ipv4.sysctl_tcp_frto &&
+       tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
                    (new_recovery || icsk->icsk_retransmits) &&
                    !inet_csk(sk)->icsk_mtup.probe_size;
  }
@@ -3054,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
  
  static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
  {
-       u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+       u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
         struct tcp_sock *tp = tcp_sk(sk);
  
         if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
@@ -3464,7 +3468,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
          * new SACK or ECE mark may first advance cwnd here and later reduce
          * cwnd in tcp_fastretrans_alert() based on more states.
          */
-       if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+       if (tcp_sk(sk)->reordering >
+           READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
                 return flag & FLAG_FORWARD_PROGRESS;
  
         return flag & FLAG_DATA_ACKED;
@@ -3576,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
         if (*last_oow_ack_time) {
                 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
  
-               if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+               if (0 <= elapsed &&
+                   elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
                         NET_INC_STATS(net, mib_idx);
                         return true;    /* rate-limited: don't send yet! */
                 }
@@ -3624,7 +3630,7 @@ static void tcp_send_challenge_ack(struct sock *sk)
         /* Then check host-wide RFC 5961 rate limit. */
         now = jiffies / HZ;
         if (now != challenge_timestamp) {
-               u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+               u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
                 u32 half = (ack_limit + 1) >> 1;
  
                 challenge_timestamp = now;
@@ -3967,7 +3973,7 @@ static bool smc_parse_options(const struct tcphdr *th,
  /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
   * value on success.
   */
-static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
  {
         const unsigned char *ptr = (const unsigned char *)(th + 1);
         int length = (th->doff * 4) - sizeof(struct tcphdr);
@@ -4006,6 +4012,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
         }
         return mss;
  }
+EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
  
  /* Look for tcp options. Normally only called on SYN and SYNACK packets.
   * But, this can also be called on packets in the established flow when
@@ -4056,7 +4063,7 @@ void tcp_parse_options(const struct net *net,
                                 break;
                         case TCPOPT_WINDOW:
                                 if (opsize == TCPOLEN_WINDOW && th->syn &&
-                                   !estab && net->ipv4.sysctl_tcp_window_scaling) {
+                                   !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
                                         __u8 snd_wscale = *(__u8 *)ptr;
                                         opt_rx->wscale_ok = 1;
                                         if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4072,7 +4079,7 @@ void tcp_parse_options(const struct net *net,
                         case TCPOPT_TIMESTAMP:
                                 if ((opsize == TCPOLEN_TIMESTAMP) &&
                                     ((estab && opt_rx->tstamp_ok) ||
-                                    (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+                                    (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
                                         opt_rx->saw_tstamp = 1;
                                         opt_rx->rcv_tsval = get_unaligned_be32(ptr);
                                         opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4080,7 +4087,7 @@ void tcp_parse_options(const struct net *net,
                                 break;
                         case TCPOPT_SACK_PERM:
                                 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-                                   !estab && net->ipv4.sysctl_tcp_sack) {
+                                   !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
                                         opt_rx->sack_ok = TCP_SACK_SEEN;
                                         tcp_sack_reset(opt_rx);
                                 }
@@ -4390,7 +4397,6 @@ void tcp_fin(struct sock *sk)
         skb_rbtree_purge(&tp->out_of_order_queue);
         if (tcp_is_sack(tp))
                 tcp_sack_reset(&tp->rx_opt);
-       sk_mem_reclaim(sk);
  
         if (!sock_flag(sk, SOCK_DEAD)) {
                 sk->sk_state_change(sk);
@@ -4421,7 +4427,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
-       if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+       if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
                 int mib_idx;
  
                 if (before(seq, tp->rcv_nxt))
@@ -4468,7 +4474,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
                 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
                 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
  
-               if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+               if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
                         u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  
                         tcp_rcv_spurious_retrans(sk, skb);
@@ -5287,7 +5293,7 @@ new_range:
                     before(TCP_SKB_CB(skb)->end_seq, start)) {
                         /* Do not attempt collapsing tiny skbs */
                         if (range_truesize != head->truesize ||
-                           end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+                           end - start >= SKB_WITH_OVERHEAD(PAGE_SIZE)) {
                                 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
                                              head, skb, start, end);
                         } else {
@@ -5336,7 +5342,6 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
                 tcp_drop_reason(sk, rb_to_skb(node),
                                 SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
                 if (!prev || goal <= 0) {
-                       sk_mem_reclaim(sk);
                         if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
                             !tcp_under_memory_pressure(sk))
                                 break;
@@ -5383,7 +5388,6 @@ static int tcp_prune_queue(struct sock *sk)
                              skb_peek(&sk->sk_receive_queue),
                              NULL,
                              tp->copied_seq, tp->rcv_nxt);
-       sk_mem_reclaim(sk);
  
         if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
                 return 0;
@@ -5514,7 +5518,7 @@ send_now:
         }
  
         if (!tcp_is_sack(tp) ||
-           tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+           tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
                 goto send_now;
  
         if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5535,11 +5539,12 @@ send_now:
         if (tp->srtt_us && tp->srtt_us < rtt)
                 rtt = tp->srtt_us;
  
-       delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+       delay = min_t(unsigned long,
+                     READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
                       rtt * (NSEC_PER_USEC >> 3)/20);
         sock_hold(sk);
         hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
-                              sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+                              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
                                HRTIMER_MODE_REL_PINNED_SOFT);
  }
  
@@ -5567,7 +5572,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
         struct tcp_sock *tp = tcp_sk(sk);
         u32 ptr = ntohs(th->urg_ptr);
  
-       if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+       if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
                 ptr--;
         ptr += ntohl(th->seq);
  
@@ -6797,11 +6802,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
  {
         struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
         const char *msg = "Dropping request";
-       bool want_cookie = false;
         struct net *net = sock_net(sk);
+       bool want_cookie = false;
+       u8 syncookies;
+
+       syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
  
  #ifdef CONFIG_SYN_COOKIES
-       if (net->ipv4.sysctl_tcp_syncookies) {
+       if (syncookies) {
                 msg = "Sending cookies";
                 want_cookie = true;
                 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6809,8 +6817,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
  #endif
                 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
  
-       if (!queue->synflood_warned &&
-           net->ipv4.sysctl_tcp_syncookies != 2 &&
+       if (!queue->synflood_warned && syncookies != 2 &&
             xchg(&queue->synflood_warned, 1) == 0)
                 net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
                                      proto, sk->sk_num, msg);
@@ -6859,7 +6866,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
         struct tcp_sock *tp = tcp_sk(sk);
         u16 mss;
  
-       if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
             !inet_csk_reqsk_queue_is_full(sk))
                 return 0;
  
@@ -6893,13 +6900,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
         bool want_cookie = false;
         struct dst_entry *dst;
         struct flowi fl;
+       u8 syncookies;
+
+       syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
  
         /* TW buckets are converted to open requests without
          * limitations, they conserve resources and peer is
          * evidently real one.
          */
-       if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
-            inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+       if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
                 want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
                 if (!want_cookie)
                         goto drop;
@@ -6948,10 +6957,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                 tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
  
         if (!want_cookie && !isn) {
+               int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
                 /* Kill the following clause, if you dislike this way. */
-               if (!net->ipv4.sysctl_tcp_syncookies &&
-                   (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-                    (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+               if (!syncookies &&
+                   (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                    (max_syn_backlog >> 2)) &&
                     !tcp_peer_is_proven(req, dst)) {
                         /* Without syncookies last quarter of
                          * backlog is filled with destinations,