net: deal with most data-races in sk_wait_event()
authorEric Dumazet <edumazet@google.com>
Tue, 9 May 2023 18:29:48 +0000 (18:29 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 10 May 2023 09:03:32 +0000 (10:03 +0100)
__condition is evaluated twice in sk_wait_event() macro.

First invocation is lockless, and reads can race with writes,
as spotted by syzbot.

BUG: KCSAN: data-race in sk_stream_wait_connect / tcp_disconnect

write to 0xffff88812d83d6a0 of 4 bytes by task 9065 on cpu 1:
tcp_disconnect+0x2cd/0xdb0
inet_shutdown+0x19e/0x1f0 net/ipv4/af_inet.c:911
__sys_shutdown_sock net/socket.c:2343 [inline]
__sys_shutdown net/socket.c:2355 [inline]
__do_sys_shutdown net/socket.c:2363 [inline]
__se_sys_shutdown+0xf8/0x140 net/socket.c:2361
__x64_sys_shutdown+0x31/0x40 net/socket.c:2361
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff88812d83d6a0 of 4 bytes by task 9040 on cpu 0:
sk_stream_wait_connect+0x1de/0x3a0 net/core/stream.c:75
tcp_sendmsg_locked+0x2e4/0x2120 net/ipv4/tcp.c:1266
tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1484
inet6_sendmsg+0x63/0x80 net/ipv6/af_inet6.c:651
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
__sys_sendto+0x246/0x300 net/socket.c:2142
__do_sys_sendto net/socket.c:2154 [inline]
__se_sys_sendto net/socket.c:2150 [inline]
__x64_sys_sendto+0x78/0x90 net/socket.c:2150
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0x00000000 -> 0x00000068

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/core/stream.c
net/ipv4/tcp_bpf.c
net/llc/af_llc.c
net/smc/smc_close.c
net/smc/smc_rx.c
net/smc/smc_tx.c
net/tipc/socket.c
net/tls/tls_main.c

index 434446a..f5c4e47 100644 (file)
@@ -73,8 +73,8 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
                add_wait_queue(sk_sleep(sk), &wait);
                sk->sk_write_pending++;
                done = sk_wait_event(sk, timeo_p,
-                                    !sk->sk_err &&
-                                    !((1 << sk->sk_state) &
+                                    !READ_ONCE(sk->sk_err) &&
+                                    !((1 << READ_ONCE(sk->sk_state)) &
                                       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
                remove_wait_queue(sk_sleep(sk), &wait);
                sk->sk_write_pending--;
@@ -87,9 +87,9 @@ EXPORT_SYMBOL(sk_stream_wait_connect);
  * sk_stream_closing - Return 1 if we still have things to send in our buffers.
  * @sk: socket to verify
  */
-static inline int sk_stream_closing(struct sock *sk)
+static int sk_stream_closing(const struct sock *sk)
 {
-       return (1 << sk->sk_state) &
+       return (1 << READ_ONCE(sk->sk_state)) &
               (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
 }
 
@@ -142,8 +142,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                sk->sk_write_pending++;
-               sk_wait_event(sk, &current_timeo, sk->sk_err ||
-                                                 (sk->sk_shutdown & SEND_SHUTDOWN) ||
+               sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) ||
+                                                 (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
                                                  (sk_stream_memory_free(sk) &&
                                                  !vm_wait), &wait);
                sk->sk_write_pending--;
index ebf9175..2e95474 100644 (file)
@@ -168,7 +168,7 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        ret = sk_wait_event(sk, &timeo,
                            !list_empty(&psock->ingress_msg) ||
-                           !skb_queue_empty(&sk->sk_receive_queue), &wait);
+                           !skb_queue_empty_lockless(&sk->sk_receive_queue), &wait);
        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        remove_wait_queue(sk_sleep(sk), &wait);
        return ret;
index da7fe94..9ffbc66 100644 (file)
@@ -583,7 +583,8 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
 
        add_wait_queue(sk_sleep(sk), &wait);
        while (1) {
-               if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
+               if (sk_wait_event(sk, &timeout,
+                                 READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait))
                        break;
                rc = -ERESTARTSYS;
                if (signal_pending(current))
@@ -603,7 +604,8 @@ static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
 
        add_wait_queue(sk_sleep(sk), &wait);
        while (1) {
-               if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
+               if (sk_wait_event(sk, &timeout,
+                                 READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait))
                        break;
                if (signal_pending(current) || !timeout)
                        break;
@@ -622,7 +624,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
        while (1) {
                rc = 0;
                if (sk_wait_event(sk, &timeout,
-                                 (sk->sk_shutdown & RCV_SHUTDOWN) ||
+                                 (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) ||
                                  (!llc_data_accept_state(llc->state) &&
                                   !llc->remote_busy_flag &&
                                   !llc->p_flag), &wait))
index 31db743..dbdf03e 100644 (file)
@@ -67,8 +67,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
 
                rc = sk_wait_event(sk, &timeout,
                                   !smc_tx_prepared_sends(&smc->conn) ||
-                                  sk->sk_err == ECONNABORTED ||
-                                  sk->sk_err == ECONNRESET ||
+                                  READ_ONCE(sk->sk_err) == ECONNABORTED ||
+                                  READ_ONCE(sk->sk_err) == ECONNRESET ||
                                   smc->conn.killed,
                                   &wait);
                if (rc)
index 4380d32..9a2f363 100644 (file)
@@ -267,9 +267,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        add_wait_queue(sk_sleep(sk), &wait);
        rc = sk_wait_event(sk, timeo,
-                          sk->sk_err ||
+                          READ_ONCE(sk->sk_err) ||
                           cflags->peer_conn_abort ||
-                          sk->sk_shutdown & RCV_SHUTDOWN ||
+                          READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
                           conn->killed ||
                           fcrit(conn),
                           &wait);
index f4b6a71..4512844 100644 (file)
@@ -113,8 +113,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
                        break; /* at least 1 byte of free & no urgent data */
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                sk_wait_event(sk, &timeo,
-                             sk->sk_err ||
-                             (sk->sk_shutdown & SEND_SHUTDOWN) ||
+                             READ_ONCE(sk->sk_err) ||
+                             (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
                              smc_cdc_rxed_any_close(conn) ||
                              (atomic_read(&conn->sndbuf_space) &&
                               !conn->urg_tx_pend),
index 37edfe1..dd73d71 100644 (file)
@@ -314,9 +314,9 @@ static void tsk_rej_rx_queue(struct sock *sk, int error)
                tipc_sk_respond(sk, skb, error);
 }
 
-static bool tipc_sk_connected(struct sock *sk)
+static bool tipc_sk_connected(const struct sock *sk)
 {
-       return sk->sk_state == TIPC_ESTABLISHED;
+       return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED;
 }
 
 /* tipc_sk_type_connectionless - check if the socket is datagram socket
index b32c112..f2e7302 100644 (file)
@@ -111,7 +111,8 @@ int wait_on_pending_writer(struct sock *sk, long *timeo)
                        break;
                }
 
-               if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait))
+               if (sk_wait_event(sk, timeo,
+                                 !READ_ONCE(sk->sk_write_pending), &wait))
                        break;
        }
        remove_wait_queue(sk_sleep(sk), &wait);