Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-microblaze.git] / net / ipv4 / tcp.c
index 2faaaaf..ba2bdc8 100644 (file)
@@ -294,6 +294,8 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
 
 atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp;       /* Current allocated memory. */
 EXPORT_SYMBOL(tcp_memory_allocated);
+DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
 
 #if IS_ENABLED(CONFIG_SMC)
 DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
@@ -856,9 +858,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 {
        struct sk_buff *skb;
 
-       if (unlikely(tcp_under_memory_pressure(sk)))
-               sk_mem_reclaim_partial(sk);
-
        skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
        if (likely(skb)) {
                bool mem_scheduled;
@@ -952,6 +951,24 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
        return 0;
 }
 
+
+static int tcp_wmem_schedule(struct sock *sk, int copy)
+{
+       int left;
+
+       if (likely(sk_wmem_schedule(sk, copy)))
+               return copy;
+
+       /* We could be in trouble if we have nothing queued.
+        * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0]
+        * to guarantee some progress.
+        */
+       left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued;
+       if (left > 0)
+               sk_forced_mem_schedule(sk, min(left, copy));
+       return min(copy, sk->sk_forward_alloc);
+}
+
 static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
                                      struct page *page, int offset, size_t *size)
 {
@@ -987,7 +1004,11 @@ new_segment:
                tcp_mark_push(tp, skb);
                goto new_segment;
        }
-       if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy))
+       if (tcp_downgrade_zcopy_pure(sk, skb))
+               return NULL;
+
+       copy = tcp_wmem_schedule(sk, copy);
+       if (!copy)
                return NULL;
 
        if (can_coalesce) {
@@ -1203,17 +1224,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
        flags = msg->msg_flags;
 
-       if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
+       if ((flags & MSG_ZEROCOPY) && size) {
                skb = tcp_write_queue_tail(sk);
-               uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
-               if (!uarg) {
-                       err = -ENOBUFS;
-                       goto out_err;
-               }
 
-               zc = sk->sk_route_caps & NETIF_F_SG;
-               if (!zc)
-                       uarg->zerocopy = 0;
+               if (msg->msg_ubuf) {
+                       uarg = msg->msg_ubuf;
+                       net_zcopy_get(uarg);
+                       zc = sk->sk_route_caps & NETIF_F_SG;
+               } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+                       uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
+                       if (!uarg) {
+                               err = -ENOBUFS;
+                               goto out_err;
+                       }
+                       zc = sk->sk_route_caps & NETIF_F_SG;
+                       if (!zc)
+                               uarg->zerocopy = 0;
+               }
        }
 
        if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1336,8 +1363,14 @@ new_segment:
 
                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-                       if (tcp_downgrade_zcopy_pure(sk, skb) ||
-                           !sk_wmem_schedule(sk, copy))
+                       if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
+                               if (tcp_downgrade_zcopy_pure(sk, skb))
+                                       goto wait_for_space;
+                               skb_zcopy_downgrade_managed(skb);
+                       }
+
+                       copy = tcp_wmem_schedule(sk, copy);
+                       if (!copy)
                                goto wait_for_space;
 
                        err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
@@ -1364,7 +1397,8 @@ new_segment:
                                skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
 
                        if (!skb_zcopy_pure(skb)) {
-                               if (!sk_wmem_schedule(sk, copy))
+                               copy = tcp_wmem_schedule(sk, copy);
+                               if (!copy)
                                        goto wait_for_space;
                        }
 
@@ -1710,6 +1744,50 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 }
 EXPORT_SYMBOL(tcp_read_sock);
 
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       u32 seq = tp->copied_seq;
+       struct sk_buff *skb;
+       int copied = 0;
+       u32 offset;
+
+       if (sk->sk_state == TCP_LISTEN)
+               return -ENOTCONN;
+
+       while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+               int used;
+
+               __skb_unlink(skb, &sk->sk_receive_queue);
+               used = recv_actor(sk, skb);
+               if (used <= 0) {
+                       if (!copied)
+                               copied = used;
+                       break;
+               }
+               seq += used;
+               copied += used;
+
+               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+                       consume_skb(skb);
+                       ++seq;
+                       break;
+               }
+               consume_skb(skb);
+               break;
+       }
+       WRITE_ONCE(tp->copied_seq, seq);
+
+       tcp_rcv_space_adjust(sk);
+
+       /* Clean up data we have read: This will do ACK frames. */
+       if (copied > 0)
+               tcp_cleanup_rbuf(sk, copied);
+
+       return copied;
+}
+EXPORT_SYMBOL(tcp_read_skb);
+
 int tcp_peek_len(struct socket *sock)
 {
        return tcp_inq(sock->sk);
@@ -2764,8 +2842,6 @@ void __tcp_close(struct sock *sk, long timeout)
                __kfree_skb(skb);
        }
 
-       sk_mem_reclaim(sk);
-
        /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
        if (sk->sk_state == TCP_CLOSE)
                goto adjudge_to_death;
@@ -2873,7 +2949,6 @@ adjudge_to_death:
                }
        }
        if (sk->sk_state != TCP_CLOSE) {
-               sk_mem_reclaim(sk);
                if (tcp_check_oom(sk, 0)) {
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -2951,7 +3026,6 @@ void tcp_write_queue_purge(struct sock *sk)
        }
        tcp_rtx_queue_purge(sk);
        INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
-       sk_mem_reclaim(sk);
        tcp_clear_all_retrans_hints(tcp_sk(sk));
        tcp_sk(sk)->packets_out = 0;
        inet_csk(sk)->icsk_backoff = 0;
@@ -4514,16 +4588,24 @@ EXPORT_SYMBOL_GPL(tcp_done);
 
 int tcp_abort(struct sock *sk, int err)
 {
-       if (!sk_fullsock(sk)) {
-               if (sk->sk_state == TCP_NEW_SYN_RECV) {
-                       struct request_sock *req = inet_reqsk(sk);
+       int state = inet_sk_state_load(sk);
 
-                       local_bh_disable();
-                       inet_csk_reqsk_queue_drop(req->rsk_listener, req);
-                       local_bh_enable();
-                       return 0;
-               }
-               return -EOPNOTSUPP;
+       if (state == TCP_NEW_SYN_RECV) {
+               struct request_sock *req = inet_reqsk(sk);
+
+               local_bh_disable();
+               inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+               local_bh_enable();
+               return 0;
+       }
+       if (state == TCP_TIME_WAIT) {
+               struct inet_timewait_sock *tw = inet_twsk(sk);
+
+               refcount_inc(&tw->tw_refcnt);
+               local_bh_disable();
+               inet_twsk_deschedule_put(tw);
+               local_bh_enable();
+               return 0;
        }
 
        /* Don't race with userspace socket closes such as tcp_close. */
@@ -4655,11 +4737,11 @@ void __init tcp_init(void)
        max_wshare = min(4UL*1024*1024, limit);
        max_rshare = min(6UL*1024*1024, limit);
 
-       init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
+       init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;
        init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
        init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
-       init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
+       init_net.ipv4.sysctl_tcp_rmem[0] = PAGE_SIZE;
        init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
        init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);