net: implement lockless SO_PRIORITY
authorEric Dumazet <edumazet@google.com>
Thu, 21 Sep 2023 20:28:11 +0000 (20:28 +0000)
committerDavid S. Miller <davem@davemloft.net>
Sun, 1 Oct 2023 18:09:54 +0000 (19:09 +0100)
This is a followup of 8bf43be799d4 ("net: annotate data-races
around sk->sk_priority").

sk->sk_priority can be read and written without holding the socket lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
24 files changed:
drivers/net/ppp/pppoe.c
include/net/bluetooth/bluetooth.h
net/appletalk/aarp.c
net/ax25/af_ax25.c
net/bluetooth/l2cap_sock.c
net/can/j1939/socket.c
net/can/raw.c
net/core/sock.c
net/dccp/ipv6.c
net/ipv4/inet_diag.c
net/ipv4/ip_output.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv6/inet6_connection_sock.c
net/ipv6/ip6_output.c
net/ipv6/tcp_ipv6.c
net/mptcp/sockopt.c
net/netrom/af_netrom.c
net/rose/af_rose.c
net/sched/em_meta.c
net/sctp/ipv6.c
net/smc/af_smc.c
net/x25/af_x25.c
net/xdp/xsk.c

index ba8b6bd..8e7238e 100644 (file)
@@ -877,7 +877,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
 
        skb->dev = dev;
 
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        skb->protocol = cpu_to_be16(ETH_P_PPP_SES);
 
        ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr));
index aa90adc..7ffa8c1 100644 (file)
@@ -541,7 +541,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
                return ERR_PTR(-EFAULT);
        }
 
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
 
        return skb;
 }
index c7236da..9fa0b24 100644 (file)
@@ -664,7 +664,7 @@ out_unlock:
 
 sendit:
        if (skb->sk)
-               skb->priority = skb->sk->sk_priority;
+               skb->priority = READ_ONCE(skb->sk->sk_priority);
        if (dev_queue_xmit(skb))
                goto drop;
 sent:
index 5db805d..558e158 100644 (file)
@@ -939,7 +939,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
        sock_init_data(NULL, sk);
 
        sk->sk_type     = osk->sk_type;
-       sk->sk_priority = osk->sk_priority;
+       sk->sk_priority = READ_ONCE(osk->sk_priority);
        sk->sk_protocol = osk->sk_protocol;
        sk->sk_rcvbuf   = osk->sk_rcvbuf;
        sk->sk_sndbuf   = osk->sk_sndbuf;
index 3bdfc3f..e50d3d1 100644 (file)
@@ -1615,7 +1615,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
                return ERR_PTR(-ENOTCONN);
        }
 
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
 
        bt_cb(skb)->l2cap.chan = chan;
 
index b28c976..14c4316 100644 (file)
@@ -884,7 +884,7 @@ static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
        skcb = j1939_skb_to_cb(skb);
        memset(skcb, 0, sizeof(*skcb));
        skcb->addr = jsk->addr;
-       skcb->priority = j1939_prio(sk->sk_priority);
+       skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority));
 
        if (msg->msg_name) {
                struct sockaddr_can *addr = msg->msg_name;
index d50c3f3..73468d2 100644 (file)
@@ -881,7 +881,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        }
 
        skb->dev = dev;
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = READ_ONCE(sk->sk_mark);
        skb->tstamp = sockc.transmit_time;
 
index a599575..1fdc0a0 100644 (file)
@@ -806,9 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
 
 void sock_set_priority(struct sock *sk, u32 priority)
 {
-       lock_sock(sk);
        WRITE_ONCE(sk->sk_priority, priority);
-       release_sock(sk);
 }
 EXPORT_SYMBOL(sock_set_priority);
 
@@ -1118,6 +1116,18 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
 
        valbool = val ? 1 : 0;
 
+       /* handle options which do not require locking the socket. */
+       switch (optname) {
+       case SO_PRIORITY:
+               if ((val >= 0 && val <= 6) ||
+                   sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+                   sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+                       sock_set_priority(sk, val);
+                       return 0;
+               }
+               return -EPERM;
+       }
+
        sockopt_lock_sock(sk);
 
        switch (optname) {
@@ -1213,15 +1223,6 @@ set_sndbuf:
                sk->sk_no_check_tx = valbool;
                break;
 
-       case SO_PRIORITY:
-               if ((val >= 0 && val <= 6) ||
-                   sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
-                   sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
-                       WRITE_ONCE(sk->sk_priority, val);
-               else
-                       ret = -EPERM;
-               break;
-
        case SO_LINGER:
                if (optlen < sizeof(ling)) {
                        ret = -EINVAL;  /* 1003.1g */
index 80b956b..8d344b2 100644 (file)
@@ -239,7 +239,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
                if (!opt)
                        opt = rcu_dereference(np->opt);
                err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
-                              np->tclass, sk->sk_priority);
+                              np->tclass, READ_ONCE(sk->sk_priority));
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
index e13a844..9f0bd51 100644 (file)
@@ -165,7 +165,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
                 * For cgroup2 classid is always zero.
                 */
                if (!classid)
-                       classid = sk->sk_priority;
+                       classid = READ_ONCE(sk->sk_priority);
 
                if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
                        goto errout;
index 4ab877c..6b14097 100644 (file)
@@ -1449,7 +1449,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
                ip_options_build(skb, opt, cork->addr, rt);
        }
 
-       skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
+       skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
        skb->mark = cork->mark;
        skb->tstamp = cork->transmit_time;
        /*
index f13eb7e..95e972b 100644 (file)
@@ -828,7 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
                                   inet_twsk(sk)->tw_mark : sk->sk_mark;
                ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
-                                  inet_twsk(sk)->tw_priority : sk->sk_priority;
+                                  inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
                transmit_time = tcp_transmit_time(sk);
                xfrm_sk_clone_policy(ctl_sk, sk);
                txhash = (sk->sk_state == TCP_TIME_WAIT) ?
index eee8ab1..3f87611 100644 (file)
@@ -292,7 +292,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 
                tw->tw_transparent      = inet_test_bit(TRANSPARENT, sk);
                tw->tw_mark             = sk->sk_mark;
-               tw->tw_priority         = sk->sk_priority;
+               tw->tw_priority         = READ_ONCE(sk->sk_priority);
                tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
                tcptw->tw_rcv_nxt       = tp->rcv_nxt;
                tcptw->tw_snd_nxt       = tp->snd_nxt;
index 0c50dcd..80043e4 100644 (file)
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
        fl6.daddr = sk->sk_v6_daddr;
 
        res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
-                      np->tclass,  sk->sk_priority);
+                      np->tclass, READ_ONCE(sk->sk_priority));
        rcu_read_unlock();
        return res;
 }
index 951ba80..cdaa927 100644 (file)
@@ -1984,7 +1984,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
        hdr->saddr = fl6->saddr;
        hdr->daddr = *final_dst;
 
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = cork->base.mark;
        skb->tstamp = cork->base.transmit_time;
 
index 94afb8d..8a6e2e9 100644 (file)
@@ -565,7 +565,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
                if (!opt)
                        opt = rcu_dereference(np->opt);
                err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
-                              opt, tclass, sk->sk_priority);
+                              opt, tclass, READ_ONCE(sk->sk_priority));
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
@@ -1058,7 +1058,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
                        trace_tcp_send_reset(sk, skb);
                        if (inet6_test_bit(REPFLOW, sk))
                                label = ip6_flowlabel(ipv6h);
-                       priority = sk->sk_priority;
+                       priority = READ_ONCE(sk->sk_priority);
                        txhash = sk->sk_txhash;
                }
                if (sk->sk_state == TCP_TIME_WAIT) {
index 8260202..f3485a6 100644 (file)
@@ -89,7 +89,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
                        sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
                        break;
                case SO_PRIORITY:
-                       ssk->sk_priority = val;
+                       WRITE_ONCE(ssk->sk_priority, val);
                        break;
                case SO_SNDBUF:
                case SO_SNDBUFFORCE:
index 96e91ab..0eed001 100644 (file)
@@ -487,7 +487,7 @@ static struct sock *nr_make_new(struct sock *osk)
        sock_init_data(NULL, sk);
 
        sk->sk_type     = osk->sk_type;
-       sk->sk_priority = osk->sk_priority;
+       sk->sk_priority = READ_ONCE(osk->sk_priority);
        sk->sk_protocol = osk->sk_protocol;
        sk->sk_rcvbuf   = osk->sk_rcvbuf;
        sk->sk_sndbuf   = osk->sk_sndbuf;
index 49dafe9..0cc5a4e 100644 (file)
@@ -583,7 +583,7 @@ static struct sock *rose_make_new(struct sock *osk)
 #endif
 
        sk->sk_type     = osk->sk_type;
-       sk->sk_priority = osk->sk_priority;
+       sk->sk_priority = READ_ONCE(osk->sk_priority);
        sk->sk_protocol = osk->sk_protocol;
        sk->sk_rcvbuf   = osk->sk_rcvbuf;
        sk->sk_sndbuf   = osk->sk_sndbuf;
index da34fd4..09d8afd 100644 (file)
@@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio)
                *err = -1;
                return;
        }
-       dst->value = sk->sk_priority;
+       dst->value = READ_ONCE(sk->sk_priority);
 }
 
 META_COLLECTOR(int_sk_rcvlowat)
index 5c0ed59..24368f7 100644 (file)
@@ -247,7 +247,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
                rcu_read_lock();
                res = ip6_xmit(sk, skb, fl6, sk->sk_mark,
                               rcu_dereference(np->opt),
-                              tclass, sk->sk_priority);
+                              tclass, READ_ONCE(sk->sk_priority));
                rcu_read_unlock();
                return res;
        }
index bacdd97..2976816 100644 (file)
@@ -493,7 +493,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
        nsk->sk_sndtimeo = osk->sk_sndtimeo;
        nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
        nsk->sk_mark = READ_ONCE(osk->sk_mark);
-       nsk->sk_priority = osk->sk_priority;
+       nsk->sk_priority = READ_ONCE(osk->sk_priority);
        nsk->sk_rcvlowat = osk->sk_rcvlowat;
        nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
        nsk->sk_err = osk->sk_err;
index 0fb5143..aad8ffe 100644 (file)
@@ -598,7 +598,7 @@ static struct sock *x25_make_new(struct sock *osk)
        x25 = x25_sk(sk);
 
        sk->sk_type        = osk->sk_type;
-       sk->sk_priority    = osk->sk_priority;
+       sk->sk_priority    = READ_ONCE(osk->sk_priority);
        sk->sk_protocol    = osk->sk_protocol;
        sk->sk_rcvbuf      = osk->sk_rcvbuf;
        sk->sk_sndbuf      = osk->sk_sndbuf;
index 7482d0a..f5e96e0 100644 (file)
@@ -684,7 +684,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
        }
 
        skb->dev = dev;
-       skb->priority = xs->sk.sk_priority;
+       skb->priority = READ_ONCE(xs->sk.sk_priority);
        skb->mark = READ_ONCE(xs->sk.sk_mark);
        skb->destructor = xsk_destruct_skb;
        xsk_set_destructor_arg(skb);