net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(struct sock const *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 struct sock *unix_peer_get(struct sock *s)
 198 {
 199         struct sock *peer;
 200
 201         unix_state_lock(s);
 202         peer = unix_peer(s);
 203         if (peer)
 204                 sock_hold(peer);
 205         unix_state_unlock(s);
 206         return peer;
 207 }
 208 EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210 static inline void unix_release_addr(struct unix_address *addr)
 211 {
 212         if (refcount_dec_and_test(&addr->refcnt))
 213                 kfree(addr);
 214 }
 215
 216 /*
 217  *      Check unix socket name:
 218  *              - should be not zero length.
 219  *              - if started by not zero, should be NULL terminated (FS object)
 220  *              - if started by zero, it is abstract name.
 221  */
 222
 223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224 {
 225         *hashp = 0;
 226
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         return s;
 288         }
 289         return NULL;
 290 }
 291
 292 static inline struct sock *unix_find_socket_byname(struct net *net,
 293                                                    struct sockaddr_un *sunname,
 294                                                    int len, int type,
 295                                                    unsigned int hash)
 296 {
 297         struct sock *s;
 298
 299         spin_lock(&unix_table_lock);
 300         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 301         if (s)
 302                 sock_hold(s);
 303         spin_unlock(&unix_table_lock);
 304         return s;
 305 }
 306
 307 static struct sock *unix_find_socket_byinode(struct inode *i)
 308 {
 309         struct sock *s;
 310
 311         spin_lock(&unix_table_lock);
 312         sk_for_each(s,
 313                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 314                 struct dentry *dentry = unix_sk(s)->path.dentry;
 315
 316                 if (dentry && d_backing_inode(dentry) == i) {
 317                         sock_hold(s);
 318                         goto found;
 319                 }
 320         }
 321         s = NULL;
 322 found:
 323         spin_unlock(&unix_table_lock);
 324         return s;
 325 }
 326
 327 /* Support code for asymmetrically connected dgram sockets
 328  *
 329  * If a datagram socket is connected to a socket not itself connected
 330  * to the first socket (eg, /dev/log), clients may only enqueue more
 331  * messages if the present receive queue of the server socket is not
 332  * "too large". This means there's a second writeability condition
 333  * poll and sendmsg need to test. The dgram recv code will do a wake
 334  * up on the peer_wait wait queue of a socket upon reception of a
 335  * datagram which needs to be propagated to sleeping would-be writers
 336  * since these might not have sent anything so far. This can't be
 337  * accomplished via poll_wait because the lifetime of the server
 338  * socket might be less than that of its clients if these break their
 339  * association with it or if the server socket is closed while clients
 340  * are still connected to it and there's no way to inform "a polling
 341  * implementation" that it should let go of a certain wait queue
 342  *
 343  * In order to propagate a wake up, a wait_queue_entry_t of the client
 344  * socket is enqueued on the peer_wait queue of the server socket
 345  * whose wake function does a wake_up on the ordinary client socket
 346  * wait queue. This connection is established whenever a write (or
 347  * poll for write) hit the flow control condition and broken when the
 348  * association to the server socket is dissolved or after a wake up
 349  * was relayed.
 350  */
 351
 352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 353                                       void *key)
 354 {
 355         struct unix_sock *u;
 356         wait_queue_head_t *u_sleep;
 357
 358         u = container_of(q, struct unix_sock, peer_wake);
 359
 360         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 361                             q);
 362         u->peer_wake.private = NULL;
 363
 364         /* relaying can only happen while the wq still exists */
 365         u_sleep = sk_sleep(&u->sk);
 366         if (u_sleep)
 367                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 368
 369         return 0;
 370 }
 371
 372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 373 {
 374         struct unix_sock *u, *u_other;
 375         int rc;
 376
 377         u = unix_sk(sk);
 378         u_other = unix_sk(other);
 379         rc = 0;
 380         spin_lock(&u_other->peer_wait.lock);
 381
 382         if (!u->peer_wake.private) {
 383                 u->peer_wake.private = other;
 384                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 385
 386                 rc = 1;
 387         }
 388
 389         spin_unlock(&u_other->peer_wait.lock);
 390         return rc;
 391 }
 392
 393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 394                                             struct sock *other)
 395 {
 396         struct unix_sock *u, *u_other;
 397
 398         u = unix_sk(sk);
 399         u_other = unix_sk(other);
 400         spin_lock(&u_other->peer_wait.lock);
 401
 402         if (u->peer_wake.private == other) {
 403                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 404                 u->peer_wake.private = NULL;
 405         }
 406
 407         spin_unlock(&u_other->peer_wait.lock);
 408 }
 409
 410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 411                                                    struct sock *other)
 412 {
 413         unix_dgram_peer_wake_disconnect(sk, other);
 414         wake_up_interruptible_poll(sk_sleep(sk),
 415                                    EPOLLOUT |
 416                                    EPOLLWRNORM |
 417                                    EPOLLWRBAND);
 418 }
 419
 420 /* preconditions:
 421  *      - unix_peer(sk) == other
 422  *      - association is stable
 423  */
 424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 425 {
 426         int connected;
 427
 428         connected = unix_dgram_peer_wake_connect(sk, other);
 429
 430         /* If other is SOCK_DEAD, we want to make sure we signal
 431          * POLLOUT, such that a subsequent write() can get a
 432          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 433          * to other and its full, we will hang waiting for POLLOUT.
 434          */
 435         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 436                 return 1;
 437
 438         if (connected)
 439                 unix_dgram_peer_wake_disconnect(sk, other);
 440
 441         return 0;
 442 }
 443
 444 static int unix_writable(const struct sock *sk)
 445 {
 446         return sk->sk_state != TCP_LISTEN &&
 447                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 448 }
 449
 450 static void unix_write_space(struct sock *sk)
 451 {
 452         struct socket_wq *wq;
 453
 454         rcu_read_lock();
 455         if (unix_writable(sk)) {
 456                 wq = rcu_dereference(sk->sk_wq);
 457                 if (skwq_has_sleeper(wq))
 458                         wake_up_interruptible_sync_poll(&wq->wait,
 459                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 460                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 461         }
 462         rcu_read_unlock();
 463 }
 464
 465 /* When dgram socket disconnects (or changes its peer), we clear its receive
 466  * queue of packets arrived from previous peer. First, it allows to do
 467  * flow control based only on wmem_alloc; second, sk connected to peer
 468  * may receive messages only from that peer. */
 469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 470 {
 471         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 472                 skb_queue_purge(&sk->sk_receive_queue);
 473                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 474
 475                 /* If one link of bidirectional dgram pipe is disconnected,
 476                  * we signal error. Messages are lost. Do not make this,
 477                  * when peer was not connected to us.
 478                  */
 479                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 480                         other->sk_err = ECONNRESET;
 481                         other->sk_error_report(other);
 482                 }
 483         }
 484 }
 485
 486 static void unix_sock_destructor(struct sock *sk)
 487 {
 488         struct unix_sock *u = unix_sk(sk);
 489
 490         skb_queue_purge(&sk->sk_receive_queue);
 491
 492         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 493         WARN_ON(!sk_unhashed(sk));
 494         WARN_ON(sk->sk_socket);
 495         if (!sock_flag(sk, SOCK_DEAD)) {
 496                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 497                 return;
 498         }
 499
 500         if (u->addr)
 501                 unix_release_addr(u->addr);
 502
 503         atomic_long_dec(&unix_nr_socks);
 504         local_bh_disable();
 505         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 506         local_bh_enable();
 507 #ifdef UNIX_REFCNT_DEBUG
 508         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 509                 atomic_long_read(&unix_nr_socks));
 510 #endif
 511 }
 512
 513 static void unix_release_sock(struct sock *sk, int embrion)
 514 {
 515         struct unix_sock *u = unix_sk(sk);
 516         struct path path;
 517         struct sock *skpair;
 518         struct sk_buff *skb;
 519         int state;
 520
 521         unix_remove_socket(sk);
 522
 523         /* Clear state */
 524         unix_state_lock(sk);
 525         sock_orphan(sk);
 526         sk->sk_shutdown = SHUTDOWN_MASK;
 527         path         = u->path;
 528         u->path.dentry = NULL;
 529         u->path.mnt = NULL;
 530         state = sk->sk_state;
 531         sk->sk_state = TCP_CLOSE;
 532         unix_state_unlock(sk);
 533
 534         wake_up_interruptible_all(&u->peer_wait);
 535
 536         skpair = unix_peer(sk);
 537
 538         if (skpair != NULL) {
 539                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 540                         unix_state_lock(skpair);
 541                         /* No more writes */
 542                         skpair->sk_shutdown = SHUTDOWN_MASK;
 543                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 544                                 skpair->sk_err = ECONNRESET;
 545                         unix_state_unlock(skpair);
 546                         skpair->sk_state_change(skpair);
 547                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 548                 }
 549
 550                 unix_dgram_peer_wake_disconnect(sk, skpair);
 551                 sock_put(skpair); /* It may now die */
 552                 unix_peer(sk) = NULL;
 553         }
 554
 555         /* Try to flush out this socket. Throw out buffers at least */
 556
 557         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 558                 if (state == TCP_LISTEN)
 559                         unix_release_sock(skb->sk, 1);
 560                 /* passed fds are erased in the kfree_skb hook        */
 561                 UNIXCB(skb).consumed = skb->len;
 562                 kfree_skb(skb);
 563         }
 564
 565         if (path.dentry)
 566                 path_put(&path);
 567
 568         sock_put(sk);
 569
 570         /* ---- Socket is dead now and most probably destroyed ---- */
 571
 572         /*
 573          * Fixme: BSD difference: In BSD all sockets connected to us get
 574          *        ECONNRESET and we die on the spot. In Linux we behave
 575          *        like files and pipes do and wait for the last
 576          *        dereference.
 577          *
 578          * Can't we simply set sock->err?
 579          *
 580          *        What the above comment does talk about? --ANK(980817)
 581          */
 582
 583         if (unix_tot_inflight)
 584                 unix_gc();              /* Garbage collect fds */
 585 }
 586
 587 static void init_peercred(struct sock *sk)
 588 {
 589         put_pid(sk->sk_peer_pid);
 590         if (sk->sk_peer_cred)
 591                 put_cred(sk->sk_peer_cred);
 592         sk->sk_peer_pid  = get_pid(task_tgid(current));
 593         sk->sk_peer_cred = get_current_cred();
 594 }
 595
 596 static void copy_peercred(struct sock *sk, struct sock *peersk)
 597 {
 598         put_pid(sk->sk_peer_pid);
 599         if (sk->sk_peer_cred)
 600                 put_cred(sk->sk_peer_cred);
 601         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 602         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 603 }
 604
 605 static int unix_listen(struct socket *sock, int backlog)
 606 {
 607         int err;
 608         struct sock *sk = sock->sk;
 609         struct unix_sock *u = unix_sk(sk);
 610         struct pid *old_pid = NULL;
 611
 612         err = -EOPNOTSUPP;
 613         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 614                 goto out;       /* Only stream/seqpacket sockets accept */
 615         err = -EINVAL;
 616         if (!u->addr)
 617                 goto out;       /* No listens on an unbound socket */
 618         unix_state_lock(sk);
 619         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 620                 goto out_unlock;
 621         if (backlog > sk->sk_max_ack_backlog)
 622                 wake_up_interruptible_all(&u->peer_wait);
 623         sk->sk_max_ack_backlog  = backlog;
 624         sk->sk_state            = TCP_LISTEN;
 625         /* set credentials so connect can copy them */
 626         init_peercred(sk);
 627         err = 0;
 628
 629 out_unlock:
 630         unix_state_unlock(sk);
 631         put_pid(old_pid);
 632 out:
 633         return err;
 634 }
 635
 636 static int unix_release(struct socket *);
 637 static int unix_bind(struct socket *, struct sockaddr *, int);
 638 static int unix_stream_connect(struct socket *, struct sockaddr *,
 639                                int addr_len, int flags);
 640 static int unix_socketpair(struct socket *, struct socket *);
 641 static int unix_accept(struct socket *, struct socket *, int, bool);
 642 static int unix_getname(struct socket *, struct sockaddr *, int);
 643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 645                                     poll_table *);
 646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 647 #ifdef CONFIG_COMPAT
 648 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 649 #endif
 650 static int unix_shutdown(struct socket *, int);
 651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 654                                     size_t size, int flags);
 655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 656                                        struct pipe_inode_info *, size_t size,
 657                                        unsigned int flags);
 658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 661                               int, int);
 662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 664                                   int);
 665
 666 static int unix_set_peek_off(struct sock *sk, int val)
 667 {
 668         struct unix_sock *u = unix_sk(sk);
 669
 670         if (mutex_lock_interruptible(&u->iolock))
 671                 return -EINTR;
 672
 673         sk->sk_peek_off = val;
 674         mutex_unlock(&u->iolock);
 675
 676         return 0;
 677 }
 678
 679
 680 static const struct proto_ops unix_stream_ops = {
 681         .family =       PF_UNIX,
 682         .owner =        THIS_MODULE,
 683         .release =      unix_release,
 684         .bind =         unix_bind,
 685         .connect =      unix_stream_connect,
 686         .socketpair =   unix_socketpair,
 687         .accept =       unix_accept,
 688         .getname =      unix_getname,
 689         .poll =         unix_poll,
 690         .ioctl =        unix_ioctl,
 691 #ifdef CONFIG_COMPAT
 692         .compat_ioctl = unix_compat_ioctl,
 693 #endif
 694         .listen =       unix_listen,
 695         .shutdown =     unix_shutdown,
 696         .setsockopt =   sock_no_setsockopt,
 697         .getsockopt =   sock_no_getsockopt,
 698         .sendmsg =      unix_stream_sendmsg,
 699         .recvmsg =      unix_stream_recvmsg,
 700         .mmap =         sock_no_mmap,
 701         .sendpage =     unix_stream_sendpage,
 702         .splice_read =  unix_stream_splice_read,
 703         .set_peek_off = unix_set_peek_off,
 704 };
 705
 706 static const struct proto_ops unix_dgram_ops = {
 707         .family =       PF_UNIX,
 708         .owner =        THIS_MODULE,
 709         .release =      unix_release,
 710         .bind =         unix_bind,
 711         .connect =      unix_dgram_connect,
 712         .socketpair =   unix_socketpair,
 713         .accept =       sock_no_accept,
 714         .getname =      unix_getname,
 715         .poll =         unix_dgram_poll,
 716         .ioctl =        unix_ioctl,
 717 #ifdef CONFIG_COMPAT
 718         .compat_ioctl = unix_compat_ioctl,
 719 #endif
 720         .listen =       sock_no_listen,
 721         .shutdown =     unix_shutdown,
 722         .setsockopt =   sock_no_setsockopt,
 723         .getsockopt =   sock_no_getsockopt,
 724         .sendmsg =      unix_dgram_sendmsg,
 725         .recvmsg =      unix_dgram_recvmsg,
 726         .mmap =         sock_no_mmap,
 727         .sendpage =     sock_no_sendpage,
 728         .set_peek_off = unix_set_peek_off,
 729 };
 730
 731 static const struct proto_ops unix_seqpacket_ops = {
 732         .family =       PF_UNIX,
 733         .owner =        THIS_MODULE,
 734         .release =      unix_release,
 735         .bind =         unix_bind,
 736         .connect =      unix_stream_connect,
 737         .socketpair =   unix_socketpair,
 738         .accept =       unix_accept,
 739         .getname =      unix_getname,
 740         .poll =         unix_dgram_poll,
 741         .ioctl =        unix_ioctl,
 742 #ifdef CONFIG_COMPAT
 743         .compat_ioctl = unix_compat_ioctl,
 744 #endif
 745         .listen =       unix_listen,
 746         .shutdown =     unix_shutdown,
 747         .setsockopt =   sock_no_setsockopt,
 748         .getsockopt =   sock_no_getsockopt,
 749         .sendmsg =      unix_seqpacket_sendmsg,
 750         .recvmsg =      unix_seqpacket_recvmsg,
 751         .mmap =         sock_no_mmap,
 752         .sendpage =     sock_no_sendpage,
 753         .set_peek_off = unix_set_peek_off,
 754 };
 755
 756 static struct proto unix_proto = {
 757         .name                   = "UNIX",
 758         .owner                  = THIS_MODULE,
 759         .obj_size               = sizeof(struct unix_sock),
 760 };
 761
 762 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 763 {
 764         struct sock *sk = NULL;
 765         struct unix_sock *u;
 766
 767         atomic_long_inc(&unix_nr_socks);
 768         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 769                 goto out;
 770
 771         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 772         if (!sk)
 773                 goto out;
 774
 775         sock_init_data(sock, sk);
 776
 777         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 778         sk->sk_write_space      = unix_write_space;
 779         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 780         sk->sk_destruct         = unix_sock_destructor;
 781         u         = unix_sk(sk);
 782         u->path.dentry = NULL;
 783         u->path.mnt = NULL;
 784         spin_lock_init(&u->lock);
 785         atomic_long_set(&u->inflight, 0);
 786         INIT_LIST_HEAD(&u->link);
 787         mutex_init(&u->iolock); /* single task reading lock */
 788         mutex_init(&u->bindlock); /* single task binding lock */
 789         init_waitqueue_head(&u->peer_wait);
 790         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 791         unix_insert_socket(unix_sockets_unbound(sk), sk);
 792 out:
 793         if (sk == NULL)
 794                 atomic_long_dec(&unix_nr_socks);
 795         else {
 796                 local_bh_disable();
 797                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 798                 local_bh_enable();
 799         }
 800         return sk;
 801 }
 802
 803 static int unix_create(struct net *net, struct socket *sock, int protocol,
 804                        int kern)
 805 {
 806         if (protocol && protocol != PF_UNIX)
 807                 return -EPROTONOSUPPORT;
 808
 809         sock->state = SS_UNCONNECTED;
 810
 811         switch (sock->type) {
 812         case SOCK_STREAM:
 813                 sock->ops = &unix_stream_ops;
 814                 break;
 815                 /*
 816                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 817                  *      nothing uses it.
 818                  */
 819         case SOCK_RAW:
 820                 sock->type = SOCK_DGRAM;
 821                 /* fall through */
 822         case SOCK_DGRAM:
 823                 sock->ops = &unix_dgram_ops;
 824                 break;
 825         case SOCK_SEQPACKET:
 826                 sock->ops = &unix_seqpacket_ops;
 827                 break;
 828         default:
 829                 return -ESOCKTNOSUPPORT;
 830         }
 831
 832         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 833 }
 834
 835 static int unix_release(struct socket *sock)
 836 {
 837         struct sock *sk = sock->sk;
 838
 839         if (!sk)
 840                 return 0;
 841
 842         unix_release_sock(sk, 0);
 843         sock->sk = NULL;
 844
 845         return 0;
 846 }
 847
 848 static int unix_autobind(struct socket *sock)
 849 {
 850         struct sock *sk = sock->sk;
 851         struct net *net = sock_net(sk);
 852         struct unix_sock *u = unix_sk(sk);
 853         static u32 ordernum = 1;
 854         struct unix_address *addr;
 855         int err;
 856         unsigned int retries = 0;
 857
 858         err = mutex_lock_interruptible(&u->bindlock);
 859         if (err)
 860                 return err;
 861
 862         err = 0;
 863         if (u->addr)
 864                 goto out;
 865
 866         err = -ENOMEM;
 867         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 868         if (!addr)
 869                 goto out;
 870
 871         addr->name->sun_family = AF_UNIX;
 872         refcount_set(&addr->refcnt, 1);
 873
 874 retry:
 875         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 876         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 877
 878         spin_lock(&unix_table_lock);
 879         ordernum = (ordernum+1)&0xFFFFF;
 880
 881         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 882                                       addr->hash)) {
 883                 spin_unlock(&unix_table_lock);
 884                 /*
 885                  * __unix_find_socket_byname() may take long time if many names
 886                  * are already in use.
 887                  */
 888                 cond_resched();
 889                 /* Give up if all names seems to be in use. */
 890                 if (retries++ == 0xFFFFF) {
 891                         err = -ENOSPC;
 892                         kfree(addr);
 893                         goto out;
 894                 }
 895                 goto retry;
 896         }
 897         addr->hash ^= sk->sk_type;
 898
 899         __unix_remove_socket(sk);
 900         smp_store_release(&u->addr, addr);
 901         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 902         spin_unlock(&unix_table_lock);
 903         err = 0;
 904
 905 out:    mutex_unlock(&u->bindlock);
 906         return err;
 907 }
 908
 909 static struct sock *unix_find_other(struct net *net,
 910                                     struct sockaddr_un *sunname, int len,
 911                                     int type, unsigned int hash, int *error)
 912 {
 913         struct sock *u;
 914         struct path path;
 915         int err = 0;
 916
 917         if (sunname->sun_path[0]) {
 918                 struct inode *inode;
 919                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 920                 if (err)
 921                         goto fail;
 922                 inode = d_backing_inode(path.dentry);
 923                 err = inode_permission(inode, MAY_WRITE);
 924                 if (err)
 925                         goto put_fail;
 926
 927                 err = -ECONNREFUSED;
 928                 if (!S_ISSOCK(inode->i_mode))
 929                         goto put_fail;
 930                 u = unix_find_socket_byinode(inode);
 931                 if (!u)
 932                         goto put_fail;
 933
 934                 if (u->sk_type == type)
 935                         touch_atime(&path);
 936
 937                 path_put(&path);
 938
 939                 err = -EPROTOTYPE;
 940                 if (u->sk_type != type) {
 941                         sock_put(u);
 942                         goto fail;
 943                 }
 944         } else {
 945                 err = -ECONNREFUSED;
 946                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 947                 if (u) {
 948                         struct dentry *dentry;
 949                         dentry = unix_sk(u)->path.dentry;
 950                         if (dentry)
 951                                 touch_atime(&unix_sk(u)->path);
 952                 } else
 953                         goto fail;
 954         }
 955         return u;
 956
 957 put_fail:
 958         path_put(&path);
 959 fail:
 960         *error = err;
 961         return NULL;
 962 }
 963
 964 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 965 {
 966         struct dentry *dentry;
 967         struct path path;
 968         int err = 0;
 969         /*
 970          * Get the parent directory, calculate the hash for last
 971          * component.
 972          */
 973         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 974         err = PTR_ERR(dentry);
 975         if (IS_ERR(dentry))
 976                 return err;
 977
 978         /*
 979          * All right, let's create it.
 980          */
 981         err = security_path_mknod(&path, dentry, mode, 0);
 982         if (!err) {
 983                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 984                 if (!err) {
 985                         res->mnt = mntget(path.mnt);
 986                         res->dentry = dget(dentry);
 987                 }
 988         }
 989         done_path_create(&path, dentry);
 990         return err;
 991 }
 992
 993 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 994 {
 995         struct sock *sk = sock->sk;
 996         struct net *net = sock_net(sk);
 997         struct unix_sock *u = unix_sk(sk);
 998         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 999         char *sun_path = sunaddr->sun_path;
1000         int err;
1001         unsigned int hash;
1002         struct unix_address *addr;
1003         struct hlist_head *list;
1004         struct path path = { };
1005
1006         err = -EINVAL;
1007         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1008             sunaddr->sun_family != AF_UNIX)
1009                 goto out;
1010
1011         if (addr_len == sizeof(short)) {
1012                 err = unix_autobind(sock);
1013                 goto out;
1014         }
1015
1016         err = unix_mkname(sunaddr, addr_len, &hash);
1017         if (err < 0)
1018                 goto out;
1019         addr_len = err;
1020
1021         if (sun_path[0]) {
1022                 umode_t mode = S_IFSOCK |
1023                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1024                 err = unix_mknod(sun_path, mode, &path);
1025                 if (err) {
1026                         if (err == -EEXIST)
1027                                 err = -EADDRINUSE;
1028                         goto out;
1029                 }
1030         }
1031
1032         err = mutex_lock_interruptible(&u->bindlock);
1033         if (err)
1034                 goto out_put;
1035
1036         err = -EINVAL;
1037         if (u->addr)
1038                 goto out_up;
1039
1040         err = -ENOMEM;
1041         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1042         if (!addr)
1043                 goto out_up;
1044
1045         memcpy(addr->name, sunaddr, addr_len);
1046         addr->len = addr_len;
1047         addr->hash = hash ^ sk->sk_type;
1048         refcount_set(&addr->refcnt, 1);
1049
1050         if (sun_path[0]) {
1051                 addr->hash = UNIX_HASH_SIZE;
1052                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1053                 spin_lock(&unix_table_lock);
1054                 u->path = path;
1055                 list = &unix_socket_table[hash];
1056         } else {
1057                 spin_lock(&unix_table_lock);
1058                 err = -EADDRINUSE;
1059                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1060                                               sk->sk_type, hash)) {
1061                         unix_release_addr(addr);
1062                         goto out_unlock;
1063                 }
1064
1065                 list = &unix_socket_table[addr->hash];
1066         }
1067
1068         err = 0;
1069         __unix_remove_socket(sk);
1070         smp_store_release(&u->addr, addr);
1071         __unix_insert_socket(list, sk);
1072
1073 out_unlock:
1074         spin_unlock(&unix_table_lock);
1075 out_up:
1076         mutex_unlock(&u->bindlock);
1077 out_put:
1078         if (err)
1079                 path_put(&path);
1080 out:
1081         return err;
1082 }
1083
1084 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085 {
1086         if (unlikely(sk1 == sk2) || !sk2) {
1087                 unix_state_lock(sk1);
1088                 return;
1089         }
1090         if (sk1 < sk2) {
1091                 unix_state_lock(sk1);
1092                 unix_state_lock_nested(sk2);
1093         } else {
1094                 unix_state_lock(sk2);
1095                 unix_state_lock_nested(sk1);
1096         }
1097 }
1098
1099 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100 {
1101         if (unlikely(sk1 == sk2) || !sk2) {
1102                 unix_state_unlock(sk1);
1103                 return;
1104         }
1105         unix_state_unlock(sk1);
1106         unix_state_unlock(sk2);
1107 }
1108
1109 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110                               int alen, int flags)
1111 {
1112         struct sock *sk = sock->sk;
1113         struct net *net = sock_net(sk);
1114         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115         struct sock *other;
1116         unsigned int hash;
1117         int err;
1118
1119         err = -EINVAL;
1120         if (alen < offsetofend(struct sockaddr, sa_family))
1121                 goto out;
1122
1123         if (addr->sa_family != AF_UNSPEC) {
1124                 err = unix_mkname(sunaddr, alen, &hash);
1125                 if (err < 0)
1126                         goto out;
1127                 alen = err;
1128
1129                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1130                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1131                         goto out;
1132
1133 restart:
1134                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1135                 if (!other)
1136                         goto out;
1137
1138                 unix_state_double_lock(sk, other);
1139
1140                 /* Apparently VFS overslept socket death. Retry. */
1141                 if (sock_flag(other, SOCK_DEAD)) {
1142                         unix_state_double_unlock(sk, other);
1143                         sock_put(other);
1144                         goto restart;
1145                 }
1146
1147                 err = -EPERM;
1148                 if (!unix_may_send(sk, other))
1149                         goto out_unlock;
1150
1151                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1152                 if (err)
1153                         goto out_unlock;
1154
1155         } else {
1156                 /*
1157                  *      1003.1g breaking connected state with AF_UNSPEC
1158                  */
1159                 other = NULL;
1160                 unix_state_double_lock(sk, other);
1161         }
1162
1163         /*
1164          * If it was connected, reconnect.
1165          */
1166         if (unix_peer(sk)) {
1167                 struct sock *old_peer = unix_peer(sk);
1168                 unix_peer(sk) = other;
1169                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1170
1171                 unix_state_double_unlock(sk, other);
1172
1173                 if (other != old_peer)
1174                         unix_dgram_disconnected(sk, old_peer);
1175                 sock_put(old_peer);
1176         } else {
1177                 unix_peer(sk) = other;
1178                 unix_state_double_unlock(sk, other);
1179         }
1180         return 0;
1181
1182 out_unlock:
1183         unix_state_double_unlock(sk, other);
1184         sock_put(other);
1185 out:
1186         return err;
1187 }
1188
1189 static long unix_wait_for_peer(struct sock *other, long timeo)
1190 {
1191         struct unix_sock *u = unix_sk(other);
1192         int sched;
1193         DEFINE_WAIT(wait);
1194
1195         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1196
1197         sched = !sock_flag(other, SOCK_DEAD) &&
1198                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1199                 unix_recvq_full(other);
1200
1201         unix_state_unlock(other);
1202
1203         if (sched)
1204                 timeo = schedule_timeout(timeo);
1205
1206         finish_wait(&u->peer_wait, &wait);
1207         return timeo;
1208 }
1209
1210 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1211                                int addr_len, int flags)
1212 {
1213         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1214         struct sock *sk = sock->sk;
1215         struct net *net = sock_net(sk);
1216         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1217         struct sock *newsk = NULL;
1218         struct sock *other = NULL;
1219         struct sk_buff *skb = NULL;
1220         unsigned int hash;
1221         int st;
1222         int err;
1223         long timeo;
1224
1225         err = unix_mkname(sunaddr, addr_len, &hash);
1226         if (err < 0)
1227                 goto out;
1228         addr_len = err;
1229
1230         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1231             (err = unix_autobind(sock)) != 0)
1232                 goto out;
1233
1234         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1235
1236         /* First of all allocate resources.
1237            If we will make it after state is locked,
1238            we will have to recheck all again in any case.
1239          */
1240
1241         err = -ENOMEM;
1242
1243         /* create new sock for complete connection */
1244         newsk = unix_create1(sock_net(sk), NULL, 0);
1245         if (newsk == NULL)
1246                 goto out;
1247
1248         /* Allocate skb for sending to listening sock */
1249         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1250         if (skb == NULL)
1251                 goto out;
1252
1253 restart:
1254         /*  Find listening sock. */
1255         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1256         if (!other)
1257                 goto out;
1258
1259         /* Latch state of peer */
1260         unix_state_lock(other);
1261
1262         /* Apparently VFS overslept socket death. Retry. */
1263         if (sock_flag(other, SOCK_DEAD)) {
1264                 unix_state_unlock(other);
1265                 sock_put(other);
1266                 goto restart;
1267         }
1268
1269         err = -ECONNREFUSED;
1270         if (other->sk_state != TCP_LISTEN)
1271                 goto out_unlock;
1272         if (other->sk_shutdown & RCV_SHUTDOWN)
1273                 goto out_unlock;
1274
1275         if (unix_recvq_full(other)) {
1276                 err = -EAGAIN;
1277                 if (!timeo)
1278                         goto out_unlock;
1279
1280                 timeo = unix_wait_for_peer(other, timeo);
1281
1282                 err = sock_intr_errno(timeo);
1283                 if (signal_pending(current))
1284                         goto out;
1285                 sock_put(other);
1286                 goto restart;
1287         }
1288
1289         /* Latch our state.
1290
1291            It is tricky place. We need to grab our state lock and cannot
1292            drop lock on peer. It is dangerous because deadlock is
1293            possible. Connect to self case and simultaneous
1294            attempt to connect are eliminated by checking socket
1295            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1296            check this before attempt to grab lock.
1297
1298            Well, and we have to recheck the state after socket locked.
1299          */
1300         st = sk->sk_state;
1301
1302         switch (st) {
1303         case TCP_CLOSE:
1304                 /* This is ok... continue with connect */
1305                 break;
1306         case TCP_ESTABLISHED:
1307                 /* Socket is already connected */
1308                 err = -EISCONN;
1309                 goto out_unlock;
1310         default:
1311                 err = -EINVAL;
1312                 goto out_unlock;
1313         }
1314
1315         unix_state_lock_nested(sk);
1316
1317         if (sk->sk_state != st) {
1318                 unix_state_unlock(sk);
1319                 unix_state_unlock(other);
1320                 sock_put(other);
1321                 goto restart;
1322         }
1323
1324         err = security_unix_stream_connect(sk, other, newsk);
1325         if (err) {
1326                 unix_state_unlock(sk);
1327                 goto out_unlock;
1328         }
1329
1330         /* The way is open! Fastly set all the necessary fields... */
1331
1332         sock_hold(sk);
1333         unix_peer(newsk)        = sk;
1334         newsk->sk_state         = TCP_ESTABLISHED;
1335         newsk->sk_type          = sk->sk_type;
1336         init_peercred(newsk);
1337         newu = unix_sk(newsk);
1338         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1339         otheru = unix_sk(other);
1340
1341         /* copy address information from listening to new sock
1342          *
1343          * The contents of *(otheru->addr) and otheru->path
1344          * are seen fully set up here, since we have found
1345          * otheru in hash under unix_table_lock.  Insertion
1346          * into the hash chain we'd found it in had been done
1347          * in an earlier critical area protected by unix_table_lock,
1348          * the same one where we'd set *(otheru->addr) contents,
1349          * as well as otheru->path and otheru->addr itself.
1350          *
1351          * Using smp_store_release() here to set newu->addr
1352          * is enough to make those stores, as well as stores
1353          * to newu->path visible to anyone who gets newu->addr
1354          * by smp_load_acquire().  IOW, the same warranties
1355          * as for unix_sock instances bound in unix_bind() or
1356          * in unix_autobind().
1357          */
1358         if (otheru->path.dentry) {
1359                 path_get(&otheru->path);
1360                 newu->path = otheru->path;
1361         }
1362         refcount_inc(&otheru->addr->refcnt);
1363         smp_store_release(&newu->addr, otheru->addr);
1364
1365         /* Set credentials */
1366         copy_peercred(sk, other);
1367
1368         sock->state     = SS_CONNECTED;
1369         sk->sk_state    = TCP_ESTABLISHED;
1370         sock_hold(newsk);
1371
1372         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1373         unix_peer(sk)   = newsk;
1374
1375         unix_state_unlock(sk);
1376
1377         /* take ten and and send info to listening sock */
1378         spin_lock(&other->sk_receive_queue.lock);
1379         __skb_queue_tail(&other->sk_receive_queue, skb);
1380         spin_unlock(&other->sk_receive_queue.lock);
1381         unix_state_unlock(other);
1382         other->sk_data_ready(other);
1383         sock_put(other);
1384         return 0;
1385
1386 out_unlock:
1387         if (other)
1388                 unix_state_unlock(other);
1389
1390 out:
1391         kfree_skb(skb);
1392         if (newsk)
1393                 unix_release_sock(newsk, 0);
1394         if (other)
1395                 sock_put(other);
1396         return err;
1397 }
1398
1399 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1400 {
1401         struct sock *ska = socka->sk, *skb = sockb->sk;
1402
1403         /* Join our sockets back to back */
1404         sock_hold(ska);
1405         sock_hold(skb);
1406         unix_peer(ska) = skb;
1407         unix_peer(skb) = ska;
1408         init_peercred(ska);
1409         init_peercred(skb);
1410
1411         if (ska->sk_type != SOCK_DGRAM) {
1412                 ska->sk_state = TCP_ESTABLISHED;
1413                 skb->sk_state = TCP_ESTABLISHED;
1414                 socka->state  = SS_CONNECTED;
1415                 sockb->state  = SS_CONNECTED;
1416         }
1417         return 0;
1418 }
1419
1420 static void unix_sock_inherit_flags(const struct socket *old,
1421                                     struct socket *new)
1422 {
1423         if (test_bit(SOCK_PASSCRED, &old->flags))
1424                 set_bit(SOCK_PASSCRED, &new->flags);
1425         if (test_bit(SOCK_PASSSEC, &old->flags))
1426                 set_bit(SOCK_PASSSEC, &new->flags);
1427 }
1428
1429 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1430                        bool kern)
1431 {
1432         struct sock *sk = sock->sk;
1433         struct sock *tsk;
1434         struct sk_buff *skb;
1435         int err;
1436
1437         err = -EOPNOTSUPP;
1438         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1439                 goto out;
1440
1441         err = -EINVAL;
1442         if (sk->sk_state != TCP_LISTEN)
1443                 goto out;
1444
1445         /* If socket state is TCP_LISTEN it cannot change (for now...),
1446          * so that no locks are necessary.
1447          */
1448
1449         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1450         if (!skb) {
1451                 /* This means receive shutdown. */
1452                 if (err == 0)
1453                         err = -EINVAL;
1454                 goto out;
1455         }
1456
1457         tsk = skb->sk;
1458         skb_free_datagram(sk, skb);
1459         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1460
1461         /* attach accepted sock to socket */
1462         unix_state_lock(tsk);
1463         newsock->state = SS_CONNECTED;
1464         unix_sock_inherit_flags(sock, newsock);
1465         sock_graft(tsk, newsock);
1466         unix_state_unlock(tsk);
1467         return 0;
1468
1469 out:
1470         return err;
1471 }
1472
1473
1474 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1475 {
1476         struct sock *sk = sock->sk;
1477         struct unix_address *addr;
1478         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1479         int err = 0;
1480
1481         if (peer) {
1482                 sk = unix_peer_get(sk);
1483
1484                 err = -ENOTCONN;
1485                 if (!sk)
1486                         goto out;
1487                 err = 0;
1488         } else {
1489                 sock_hold(sk);
1490         }
1491
1492         addr = smp_load_acquire(&unix_sk(sk)->addr);
1493         if (!addr) {
1494                 sunaddr->sun_family = AF_UNIX;
1495                 sunaddr->sun_path[0] = 0;
1496                 err = sizeof(short);
1497         } else {
1498                 err = addr->len;
1499                 memcpy(sunaddr, addr->name, addr->len);
1500         }
1501         sock_put(sk);
1502 out:
1503         return err;
1504 }
1505
1506 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507 {
1508         int err = 0;
1509
1510         UNIXCB(skb).pid  = get_pid(scm->pid);
1511         UNIXCB(skb).uid = scm->creds.uid;
1512         UNIXCB(skb).gid = scm->creds.gid;
1513         UNIXCB(skb).fp = NULL;
1514         unix_get_secdata(scm, skb);
1515         if (scm->fp && send_fds)
1516                 err = unix_attach_fds(scm, skb);
1517
1518         skb->destructor = unix_destruct_scm;
1519         return err;
1520 }
1521
1522 static bool unix_passcred_enabled(const struct socket *sock,
1523                                   const struct sock *other)
1524 {
1525         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1526                !other->sk_socket ||
1527                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1528 }
1529
1530 /*
1531  * Some apps rely on write() giving SCM_CREDENTIALS
1532  * We include credentials if source or destination socket
1533  * asserted SOCK_PASSCRED.
1534  */
1535 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1536                             const struct sock *other)
1537 {
1538         if (UNIXCB(skb).pid)
1539                 return;
1540         if (unix_passcred_enabled(sock, other)) {
1541                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1542                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1543         }
1544 }
1545
1546 static int maybe_init_creds(struct scm_cookie *scm,
1547                             struct socket *socket,
1548                             const struct sock *other)
1549 {
1550         int err;
1551         struct msghdr msg = { .msg_controllen = 0 };
1552
1553         err = scm_send(socket, &msg, scm, false);
1554         if (err)
1555                 return err;
1556
1557         if (unix_passcred_enabled(socket, other)) {
1558                 scm->pid = get_pid(task_tgid(current));
1559                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1560         }
1561         return err;
1562 }
1563
1564 static bool unix_skb_scm_eq(struct sk_buff *skb,
1565                             struct scm_cookie *scm)
1566 {
1567         const struct unix_skb_parms *u = &UNIXCB(skb);
1568
1569         return u->pid == scm->pid &&
1570                uid_eq(u->uid, scm->creds.uid) &&
1571                gid_eq(u->gid, scm->creds.gid) &&
1572                unix_secdata_eq(scm, skb);
1573 }
1574
1575 /*
1576  *      Send AF_UNIX data.
1577  */
1578
1579 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1580                               size_t len)
1581 {
1582         struct sock *sk = sock->sk;
1583         struct net *net = sock_net(sk);
1584         struct unix_sock *u = unix_sk(sk);
1585         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586         struct sock *other = NULL;
1587         int namelen = 0; /* fake GCC */
1588         int err;
1589         unsigned int hash;
1590         struct sk_buff *skb;
1591         long timeo;
1592         struct scm_cookie scm;
1593         int data_len = 0;
1594         int sk_locked;
1595
1596         wait_for_unix_gc();
1597         err = scm_send(sock, msg, &scm, false);
1598         if (err < 0)
1599                 return err;
1600
1601         err = -EOPNOTSUPP;
1602         if (msg->msg_flags&MSG_OOB)
1603                 goto out;
1604
1605         if (msg->msg_namelen) {
1606                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1607                 if (err < 0)
1608                         goto out;
1609                 namelen = err;
1610         } else {
1611                 sunaddr = NULL;
1612                 err = -ENOTCONN;
1613                 other = unix_peer_get(sk);
1614                 if (!other)
1615                         goto out;
1616         }
1617
1618         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1619             && (err = unix_autobind(sock)) != 0)
1620                 goto out;
1621
1622         err = -EMSGSIZE;
1623         if (len > sk->sk_sndbuf - 32)
1624                 goto out;
1625
1626         if (len > SKB_MAX_ALLOC) {
1627                 data_len = min_t(size_t,
1628                                  len - SKB_MAX_ALLOC,
1629                                  MAX_SKB_FRAGS * PAGE_SIZE);
1630                 data_len = PAGE_ALIGN(data_len);
1631
1632                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1633         }
1634
1635         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1636                                    msg->msg_flags & MSG_DONTWAIT, &err,
1637                                    PAGE_ALLOC_COSTLY_ORDER);
1638         if (skb == NULL)
1639                 goto out;
1640
1641         err = unix_scm_to_skb(&scm, skb, true);
1642         if (err < 0)
1643                 goto out_free;
1644
1645         skb_put(skb, len - data_len);
1646         skb->data_len = data_len;
1647         skb->len = len;
1648         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1649         if (err)
1650                 goto out_free;
1651
1652         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1653
1654 restart:
1655         if (!other) {
1656                 err = -ECONNRESET;
1657                 if (sunaddr == NULL)
1658                         goto out_free;
1659
1660                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1661                                         hash, &err);
1662                 if (other == NULL)
1663                         goto out_free;
1664         }
1665
1666         if (sk_filter(other, skb) < 0) {
1667                 /* Toss the packet but do not return any error to the sender */
1668                 err = len;
1669                 goto out_free;
1670         }
1671
1672         sk_locked = 0;
1673         unix_state_lock(other);
1674 restart_locked:
1675         err = -EPERM;
1676         if (!unix_may_send(sk, other))
1677                 goto out_unlock;
1678
1679         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1680                 /*
1681                  *      Check with 1003.1g - what should
1682                  *      datagram error
1683                  */
1684                 unix_state_unlock(other);
1685                 sock_put(other);
1686
1687                 if (!sk_locked)
1688                         unix_state_lock(sk);
1689
1690                 err = 0;
1691                 if (unix_peer(sk) == other) {
1692                         unix_peer(sk) = NULL;
1693                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1694
1695                         unix_state_unlock(sk);
1696
1697                         unix_dgram_disconnected(sk, other);
1698                         sock_put(other);
1699                         err = -ECONNREFUSED;
1700                 } else {
1701                         unix_state_unlock(sk);
1702                 }
1703
1704                 other = NULL;
1705                 if (err)
1706                         goto out_free;
1707                 goto restart;
1708         }
1709
1710         err = -EPIPE;
1711         if (other->sk_shutdown & RCV_SHUTDOWN)
1712                 goto out_unlock;
1713
1714         if (sk->sk_type != SOCK_SEQPACKET) {
1715                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1716                 if (err)
1717                         goto out_unlock;
1718         }
1719
1720         /* other == sk && unix_peer(other) != sk if
1721          * - unix_peer(sk) == NULL, destination address bound to sk
1722          * - unix_peer(sk) == sk by time of get but disconnected before lock
1723          */
1724         if (other != sk &&
1725             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1726                 if (timeo) {
1727                         timeo = unix_wait_for_peer(other, timeo);
1728
1729                         err = sock_intr_errno(timeo);
1730                         if (signal_pending(current))
1731                                 goto out_free;
1732
1733                         goto restart;
1734                 }
1735
1736                 if (!sk_locked) {
1737                         unix_state_unlock(other);
1738                         unix_state_double_lock(sk, other);
1739                 }
1740
1741                 if (unix_peer(sk) != other ||
1742                     unix_dgram_peer_wake_me(sk, other)) {
1743                         err = -EAGAIN;
1744                         sk_locked = 1;
1745                         goto out_unlock;
1746                 }
1747
1748                 if (!sk_locked) {
1749                         sk_locked = 1;
1750                         goto restart_locked;
1751                 }
1752         }
1753
1754         if (unlikely(sk_locked))
1755                 unix_state_unlock(sk);
1756
1757         if (sock_flag(other, SOCK_RCVTSTAMP))
1758                 __net_timestamp(skb);
1759         maybe_add_creds(skb, sock, other);
1760         skb_queue_tail(&other->sk_receive_queue, skb);
1761         unix_state_unlock(other);
1762         other->sk_data_ready(other);
1763         sock_put(other);
1764         scm_destroy(&scm);
1765         return len;
1766
1767 out_unlock:
1768         if (sk_locked)
1769                 unix_state_unlock(sk);
1770         unix_state_unlock(other);
1771 out_free:
1772         kfree_skb(skb);
1773 out:
1774         if (other)
1775                 sock_put(other);
1776         scm_destroy(&scm);
1777         return err;
1778 }
1779
1780 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1781  * bytes, and a minimum of a full page.
1782  */
1783 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1784
1785 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1786                                size_t len)
1787 {
1788         struct sock *sk = sock->sk;
1789         struct sock *other = NULL;
1790         int err, size;
1791         struct sk_buff *skb;
1792         int sent = 0;
1793         struct scm_cookie scm;
1794         bool fds_sent = false;
1795         int data_len;
1796
1797         wait_for_unix_gc();
1798         err = scm_send(sock, msg, &scm, false);
1799         if (err < 0)
1800                 return err;
1801
1802         err = -EOPNOTSUPP;
1803         if (msg->msg_flags&MSG_OOB)
1804                 goto out_err;
1805
1806         if (msg->msg_namelen) {
1807                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1808                 goto out_err;
1809         } else {
1810                 err = -ENOTCONN;
1811                 other = unix_peer(sk);
1812                 if (!other)
1813                         goto out_err;
1814         }
1815
1816         if (sk->sk_shutdown & SEND_SHUTDOWN)
1817                 goto pipe_err;
1818
1819         while (sent < len) {
1820                 size = len - sent;
1821
1822                 /* Keep two messages in the pipe so it schedules better */
1823                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1824
1825                 /* allow fallback to order-0 allocations */
1826                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1827
1828                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1829
1830                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1831
1832                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1833                                            msg->msg_flags & MSG_DONTWAIT, &err,
1834                                            get_order(UNIX_SKB_FRAGS_SZ));
1835                 if (!skb)
1836                         goto out_err;
1837
1838                 /* Only send the fds in the first buffer */
1839                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1840                 if (err < 0) {
1841                         kfree_skb(skb);
1842                         goto out_err;
1843                 }
1844                 fds_sent = true;
1845
1846                 skb_put(skb, size - data_len);
1847                 skb->data_len = data_len;
1848                 skb->len = size;
1849                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1850                 if (err) {
1851                         kfree_skb(skb);
1852                         goto out_err;
1853                 }
1854
1855                 unix_state_lock(other);
1856
1857                 if (sock_flag(other, SOCK_DEAD) ||
1858                     (other->sk_shutdown & RCV_SHUTDOWN))
1859                         goto pipe_err_free;
1860
1861                 maybe_add_creds(skb, sock, other);
1862                 skb_queue_tail(&other->sk_receive_queue, skb);
1863                 unix_state_unlock(other);
1864                 other->sk_data_ready(other);
1865                 sent += size;
1866         }
1867
1868         scm_destroy(&scm);
1869
1870         return sent;
1871
1872 pipe_err_free:
1873         unix_state_unlock(other);
1874         kfree_skb(skb);
1875 pipe_err:
1876         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1877                 send_sig(SIGPIPE, current, 0);
1878         err = -EPIPE;
1879 out_err:
1880         scm_destroy(&scm);
1881         return sent ? : err;
1882 }
1883
1884 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1885                                     int offset, size_t size, int flags)
1886 {
1887         int err;
1888         bool send_sigpipe = false;
1889         bool init_scm = true;
1890         struct scm_cookie scm;
1891         struct sock *other, *sk = socket->sk;
1892         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1893
1894         if (flags & MSG_OOB)
1895                 return -EOPNOTSUPP;
1896
1897         other = unix_peer(sk);
1898         if (!other || sk->sk_state != TCP_ESTABLISHED)
1899                 return -ENOTCONN;
1900
1901         if (false) {
1902 alloc_skb:
1903                 unix_state_unlock(other);
1904                 mutex_unlock(&unix_sk(other)->iolock);
1905                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1906                                               &err, 0);
1907                 if (!newskb)
1908                         goto err;
1909         }
1910
1911         /* we must acquire iolock as we modify already present
1912          * skbs in the sk_receive_queue and mess with skb->len
1913          */
1914         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1915         if (err) {
1916                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1917                 goto err;
1918         }
1919
1920         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1921                 err = -EPIPE;
1922                 send_sigpipe = true;
1923                 goto err_unlock;
1924         }
1925
1926         unix_state_lock(other);
1927
1928         if (sock_flag(other, SOCK_DEAD) ||
1929             other->sk_shutdown & RCV_SHUTDOWN) {
1930                 err = -EPIPE;
1931                 send_sigpipe = true;
1932                 goto err_state_unlock;
1933         }
1934
1935         if (init_scm) {
1936                 err = maybe_init_creds(&scm, socket, other);
1937                 if (err)
1938                         goto err_state_unlock;
1939                 init_scm = false;
1940         }
1941
1942         skb = skb_peek_tail(&other->sk_receive_queue);
1943         if (tail && tail == skb) {
1944                 skb = newskb;
1945         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1946                 if (newskb) {
1947                         skb = newskb;
1948                 } else {
1949                         tail = skb;
1950                         goto alloc_skb;
1951                 }
1952         } else if (newskb) {
1953                 /* this is fast path, we don't necessarily need to
1954                  * call to kfree_skb even though with newskb == NULL
1955                  * this - does no harm
1956                  */
1957                 consume_skb(newskb);
1958                 newskb = NULL;
1959         }
1960
1961         if (skb_append_pagefrags(skb, page, offset, size)) {
1962                 tail = skb;
1963                 goto alloc_skb;
1964         }
1965
1966         skb->len += size;
1967         skb->data_len += size;
1968         skb->truesize += size;
1969         refcount_add(size, &sk->sk_wmem_alloc);
1970
1971         if (newskb) {
1972                 err = unix_scm_to_skb(&scm, skb, false);
1973                 if (err)
1974                         goto err_state_unlock;
1975                 spin_lock(&other->sk_receive_queue.lock);
1976                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1977                 spin_unlock(&other->sk_receive_queue.lock);
1978         }
1979
1980         unix_state_unlock(other);
1981         mutex_unlock(&unix_sk(other)->iolock);
1982
1983         other->sk_data_ready(other);
1984         scm_destroy(&scm);
1985         return size;
1986
1987 err_state_unlock:
1988         unix_state_unlock(other);
1989 err_unlock:
1990         mutex_unlock(&unix_sk(other)->iolock);
1991 err:
1992         kfree_skb(newskb);
1993         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1994                 send_sig(SIGPIPE, current, 0);
1995         if (!init_scm)
1996                 scm_destroy(&scm);
1997         return err;
1998 }
1999
2000 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2001                                   size_t len)
2002 {
2003         int err;
2004         struct sock *sk = sock->sk;
2005
2006         err = sock_error(sk);
2007         if (err)
2008                 return err;
2009
2010         if (sk->sk_state != TCP_ESTABLISHED)
2011                 return -ENOTCONN;
2012
2013         if (msg->msg_namelen)
2014                 msg->msg_namelen = 0;
2015
2016         return unix_dgram_sendmsg(sock, msg, len);
2017 }
2018
2019 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2020                                   size_t size, int flags)
2021 {
2022         struct sock *sk = sock->sk;
2023
2024         if (sk->sk_state != TCP_ESTABLISHED)
2025                 return -ENOTCONN;
2026
2027         return unix_dgram_recvmsg(sock, msg, size, flags);
2028 }
2029
2030 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2031 {
2032         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2033
2034         if (addr) {
2035                 msg->msg_namelen = addr->len;
2036                 memcpy(msg->msg_name, addr->name, addr->len);
2037         }
2038 }
2039
2040 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2041                               size_t size, int flags)
2042 {
2043         struct scm_cookie scm;
2044         struct sock *sk = sock->sk;
2045         struct unix_sock *u = unix_sk(sk);
2046         struct sk_buff *skb, *last;
2047         long timeo;
2048         int skip;
2049         int err;
2050
2051         err = -EOPNOTSUPP;
2052         if (flags&MSG_OOB)
2053                 goto out;
2054
2055         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2056
2057         do {
2058                 mutex_lock(&u->iolock);
2059
2060                 skip = sk_peek_offset(sk, flags);
2061                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2062                                               NULL, &skip, &err, &last);
2063                 if (skb)
2064                         break;
2065
2066                 mutex_unlock(&u->iolock);
2067
2068                 if (err != -EAGAIN)
2069                         break;
2070         } while (timeo &&
2071                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2072                                               &err, &timeo, last));
2073
2074         if (!skb) { /* implies iolock unlocked */
2075                 unix_state_lock(sk);
2076                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2077                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2078                     (sk->sk_shutdown & RCV_SHUTDOWN))
2079                         err = 0;
2080                 unix_state_unlock(sk);
2081                 goto out;
2082         }
2083
2084         if (wq_has_sleeper(&u->peer_wait))
2085                 wake_up_interruptible_sync_poll(&u->peer_wait,
2086                                                 EPOLLOUT | EPOLLWRNORM |
2087                                                 EPOLLWRBAND);
2088
2089         if (msg->msg_name)
2090                 unix_copy_addr(msg, skb->sk);
2091
2092         if (size > skb->len - skip)
2093                 size = skb->len - skip;
2094         else if (size < skb->len - skip)
2095                 msg->msg_flags |= MSG_TRUNC;
2096
2097         err = skb_copy_datagram_msg(skb, skip, msg, size);
2098         if (err)
2099                 goto out_free;
2100
2101         if (sock_flag(sk, SOCK_RCVTSTAMP))
2102                 __sock_recv_timestamp(msg, sk, skb);
2103
2104         memset(&scm, 0, sizeof(scm));
2105
2106         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2107         unix_set_secdata(&scm, skb);
2108
2109         if (!(flags & MSG_PEEK)) {
2110                 if (UNIXCB(skb).fp)
2111                         unix_detach_fds(&scm, skb);
2112
2113                 sk_peek_offset_bwd(sk, skb->len);
2114         } else {
2115                 /* It is questionable: on PEEK we could:
2116                    - do not return fds - good, but too simple 8)
2117                    - return fds, and do not return them on read (old strategy,
2118                      apparently wrong)
2119                    - clone fds (I chose it for now, it is the most universal
2120                      solution)
2121
2122                    POSIX 1003.1g does not actually define this clearly
2123                    at all. POSIX 1003.1g doesn't define a lot of things
2124                    clearly however!
2125
2126                 */
2127
2128                 sk_peek_offset_fwd(sk, size);
2129
2130                 if (UNIXCB(skb).fp)
2131                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2132         }
2133         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2134
2135         scm_recv(sock, msg, &scm, flags);
2136
2137 out_free:
2138         skb_free_datagram(sk, skb);
2139         mutex_unlock(&u->iolock);
2140 out:
2141         return err;
2142 }
2143
2144 /*
2145  *      Sleep until more data has arrived. But check for races..
2146  */
2147 static long unix_stream_data_wait(struct sock *sk, long timeo,
2148                                   struct sk_buff *last, unsigned int last_len,
2149                                   bool freezable)
2150 {
2151         struct sk_buff *tail;
2152         DEFINE_WAIT(wait);
2153
2154         unix_state_lock(sk);
2155
2156         for (;;) {
2157                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2158
2159                 tail = skb_peek_tail(&sk->sk_receive_queue);
2160                 if (tail != last ||
2161                     (tail && tail->len != last_len) ||
2162                     sk->sk_err ||
2163                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2164                     signal_pending(current) ||
2165                     !timeo)
2166                         break;
2167
2168                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2169                 unix_state_unlock(sk);
2170                 if (freezable)
2171                         timeo = freezable_schedule_timeout(timeo);
2172                 else
2173                         timeo = schedule_timeout(timeo);
2174                 unix_state_lock(sk);
2175
2176                 if (sock_flag(sk, SOCK_DEAD))
2177                         break;
2178
2179                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2180         }
2181
2182         finish_wait(sk_sleep(sk), &wait);
2183         unix_state_unlock(sk);
2184         return timeo;
2185 }
2186
2187 static unsigned int unix_skb_len(const struct sk_buff *skb)
2188 {
2189         return skb->len - UNIXCB(skb).consumed;
2190 }
2191
2192 struct unix_stream_read_state {
2193         int (*recv_actor)(struct sk_buff *, int, int,
2194                           struct unix_stream_read_state *);
2195         struct socket *socket;
2196         struct msghdr *msg;
2197         struct pipe_inode_info *pipe;
2198         size_t size;
2199         int flags;
2200         unsigned int splice_flags;
2201 };
2202
2203 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2204                                     bool freezable)
2205 {
2206         struct scm_cookie scm;
2207         struct socket *sock = state->socket;
2208         struct sock *sk = sock->sk;
2209         struct unix_sock *u = unix_sk(sk);
2210         int copied = 0;
2211         int flags = state->flags;
2212         int noblock = flags & MSG_DONTWAIT;
2213         bool check_creds = false;
2214         int target;
2215         int err = 0;
2216         long timeo;
2217         int skip;
2218         size_t size = state->size;
2219         unsigned int last_len;
2220
2221         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2222                 err = -EINVAL;
2223                 goto out;
2224         }
2225
2226         if (unlikely(flags & MSG_OOB)) {
2227                 err = -EOPNOTSUPP;
2228                 goto out;
2229         }
2230
2231         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2232         timeo = sock_rcvtimeo(sk, noblock);
2233
2234         memset(&scm, 0, sizeof(scm));
2235
2236         /* Lock the socket to prevent queue disordering
2237          * while sleeps in memcpy_tomsg
2238          */
2239         mutex_lock(&u->iolock);
2240
2241         skip = max(sk_peek_offset(sk, flags), 0);
2242
2243         do {
2244                 int chunk;
2245                 bool drop_skb;
2246                 struct sk_buff *skb, *last;
2247
2248 redo:
2249                 unix_state_lock(sk);
2250                 if (sock_flag(sk, SOCK_DEAD)) {
2251                         err = -ECONNRESET;
2252                         goto unlock;
2253                 }
2254                 last = skb = skb_peek(&sk->sk_receive_queue);
2255                 last_len = last ? last->len : 0;
2256 again:
2257                 if (skb == NULL) {
2258                         if (copied >= target)
2259                                 goto unlock;
2260
2261                         /*
2262                          *      POSIX 1003.1g mandates this order.
2263                          */
2264
2265                         err = sock_error(sk);
2266                         if (err)
2267                                 goto unlock;
2268                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2269                                 goto unlock;
2270
2271                         unix_state_unlock(sk);
2272                         if (!timeo) {
2273                                 err = -EAGAIN;
2274                                 break;
2275                         }
2276
2277                         mutex_unlock(&u->iolock);
2278
2279                         timeo = unix_stream_data_wait(sk, timeo, last,
2280                                                       last_len, freezable);
2281
2282                         if (signal_pending(current)) {
2283                                 err = sock_intr_errno(timeo);
2284                                 scm_destroy(&scm);
2285                                 goto out;
2286                         }
2287
2288                         mutex_lock(&u->iolock);
2289                         goto redo;
2290 unlock:
2291                         unix_state_unlock(sk);
2292                         break;
2293                 }
2294
2295                 while (skip >= unix_skb_len(skb)) {
2296                         skip -= unix_skb_len(skb);
2297                         last = skb;
2298                         last_len = skb->len;
2299                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2300                         if (!skb)
2301                                 goto again;
2302                 }
2303
2304                 unix_state_unlock(sk);
2305
2306                 if (check_creds) {
2307                         /* Never glue messages from different writers */
2308                         if (!unix_skb_scm_eq(skb, &scm))
2309                                 break;
2310                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2311                         /* Copy credentials */
2312                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2313                         unix_set_secdata(&scm, skb);
2314                         check_creds = true;
2315                 }
2316
2317                 /* Copy address just once */
2318                 if (state->msg && state->msg->msg_name) {
2319                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2320                                          state->msg->msg_name);
2321                         unix_copy_addr(state->msg, skb->sk);
2322                         sunaddr = NULL;
2323                 }
2324
2325                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2326                 skb_get(skb);
2327                 chunk = state->recv_actor(skb, skip, chunk, state);
2328                 drop_skb = !unix_skb_len(skb);
2329                 /* skb is only safe to use if !drop_skb */
2330                 consume_skb(skb);
2331                 if (chunk < 0) {
2332                         if (copied == 0)
2333                                 copied = -EFAULT;
2334                         break;
2335                 }
2336                 copied += chunk;
2337                 size -= chunk;
2338
2339                 if (drop_skb) {
2340                         /* the skb was touched by a concurrent reader;
2341                          * we should not expect anything from this skb
2342                          * anymore and assume it invalid - we can be
2343                          * sure it was dropped from the socket queue
2344                          *
2345                          * let's report a short read
2346                          */
2347                         err = 0;
2348                         break;
2349                 }
2350
2351                 /* Mark read part of skb as used */
2352                 if (!(flags & MSG_PEEK)) {
2353                         UNIXCB(skb).consumed += chunk;
2354
2355                         sk_peek_offset_bwd(sk, chunk);
2356
2357                         if (UNIXCB(skb).fp)
2358                                 unix_detach_fds(&scm, skb);
2359
2360                         if (unix_skb_len(skb))
2361                                 break;
2362
2363                         skb_unlink(skb, &sk->sk_receive_queue);
2364                         consume_skb(skb);
2365
2366                         if (scm.fp)
2367                                 break;
2368                 } else {
2369                         /* It is questionable, see note in unix_dgram_recvmsg.
2370                          */
2371                         if (UNIXCB(skb).fp)
2372                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2373
2374                         sk_peek_offset_fwd(sk, chunk);
2375
2376                         if (UNIXCB(skb).fp)
2377                                 break;
2378
2379                         skip = 0;
2380                         last = skb;
2381                         last_len = skb->len;
2382                         unix_state_lock(sk);
2383                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2384                         if (skb)
2385                                 goto again;
2386                         unix_state_unlock(sk);
2387                         break;
2388                 }
2389         } while (size);
2390
2391         mutex_unlock(&u->iolock);
2392         if (state->msg)
2393                 scm_recv(sock, state->msg, &scm, flags);
2394         else
2395                 scm_destroy(&scm);
2396 out:
2397         return copied ? : err;
2398 }
2399
2400 static int unix_stream_read_actor(struct sk_buff *skb,
2401                                   int skip, int chunk,
2402                                   struct unix_stream_read_state *state)
2403 {
2404         int ret;
2405
2406         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2407                                     state->msg, chunk);
2408         return ret ?: chunk;
2409 }
2410
2411 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2412                                size_t size, int flags)
2413 {
2414         struct unix_stream_read_state state = {
2415                 .recv_actor = unix_stream_read_actor,
2416                 .socket = sock,
2417                 .msg = msg,
2418                 .size = size,
2419                 .flags = flags
2420         };
2421
2422         return unix_stream_read_generic(&state, true);
2423 }
2424
2425 static int unix_stream_splice_actor(struct sk_buff *skb,
2426                                     int skip, int chunk,
2427                                     struct unix_stream_read_state *state)
2428 {
2429         return skb_splice_bits(skb, state->socket->sk,
2430                                UNIXCB(skb).consumed + skip,
2431                                state->pipe, chunk, state->splice_flags);
2432 }
2433
2434 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2435                                        struct pipe_inode_info *pipe,
2436                                        size_t size, unsigned int flags)
2437 {
2438         struct unix_stream_read_state state = {
2439                 .recv_actor = unix_stream_splice_actor,
2440                 .socket = sock,
2441                 .pipe = pipe,
2442                 .size = size,
2443                 .splice_flags = flags,
2444         };
2445
2446         if (unlikely(*ppos))
2447                 return -ESPIPE;
2448
2449         if (sock->file->f_flags & O_NONBLOCK ||
2450             flags & SPLICE_F_NONBLOCK)
2451                 state.flags = MSG_DONTWAIT;
2452
2453         return unix_stream_read_generic(&state, false);
2454 }
2455
2456 static int unix_shutdown(struct socket *sock, int mode)
2457 {
2458         struct sock *sk = sock->sk;
2459         struct sock *other;
2460
2461         if (mode < SHUT_RD || mode > SHUT_RDWR)
2462                 return -EINVAL;
2463         /* This maps:
2464          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2465          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2466          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2467          */
2468         ++mode;
2469
2470         unix_state_lock(sk);
2471         sk->sk_shutdown |= mode;
2472         other = unix_peer(sk);
2473         if (other)
2474                 sock_hold(other);
2475         unix_state_unlock(sk);
2476         sk->sk_state_change(sk);
2477
2478         if (other &&
2479                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2480
2481                 int peer_mode = 0;
2482
2483                 if (mode&RCV_SHUTDOWN)
2484                         peer_mode |= SEND_SHUTDOWN;
2485                 if (mode&SEND_SHUTDOWN)
2486                         peer_mode |= RCV_SHUTDOWN;
2487                 unix_state_lock(other);
2488                 other->sk_shutdown |= peer_mode;
2489                 unix_state_unlock(other);
2490                 other->sk_state_change(other);
2491                 if (peer_mode == SHUTDOWN_MASK)
2492                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2493                 else if (peer_mode & RCV_SHUTDOWN)
2494                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2495         }
2496         if (other)
2497                 sock_put(other);
2498
2499         return 0;
2500 }
2501
2502 long unix_inq_len(struct sock *sk)
2503 {
2504         struct sk_buff *skb;
2505         long amount = 0;
2506
2507         if (sk->sk_state == TCP_LISTEN)
2508                 return -EINVAL;
2509
2510         spin_lock(&sk->sk_receive_queue.lock);
2511         if (sk->sk_type == SOCK_STREAM ||
2512             sk->sk_type == SOCK_SEQPACKET) {
2513                 skb_queue_walk(&sk->sk_receive_queue, skb)
2514                         amount += unix_skb_len(skb);
2515         } else {
2516                 skb = skb_peek(&sk->sk_receive_queue);
2517                 if (skb)
2518                         amount = skb->len;
2519         }
2520         spin_unlock(&sk->sk_receive_queue.lock);
2521
2522         return amount;
2523 }
2524 EXPORT_SYMBOL_GPL(unix_inq_len);
2525
2526 long unix_outq_len(struct sock *sk)
2527 {
2528         return sk_wmem_alloc_get(sk);
2529 }
2530 EXPORT_SYMBOL_GPL(unix_outq_len);
2531
2532 static int unix_open_file(struct sock *sk)
2533 {
2534         struct path path;
2535         struct file *f;
2536         int fd;
2537
2538         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2539                 return -EPERM;
2540
2541         if (!smp_load_acquire(&unix_sk(sk)->addr))
2542                 return -ENOENT;
2543
2544         path = unix_sk(sk)->path;
2545         if (!path.dentry)
2546                 return -ENOENT;
2547
2548         path_get(&path);
2549
2550         fd = get_unused_fd_flags(O_CLOEXEC);
2551         if (fd < 0)
2552                 goto out;
2553
2554         f = dentry_open(&path, O_PATH, current_cred());
2555         if (IS_ERR(f)) {
2556                 put_unused_fd(fd);
2557                 fd = PTR_ERR(f);
2558                 goto out;
2559         }
2560
2561         fd_install(fd, f);
2562 out:
2563         path_put(&path);
2564
2565         return fd;
2566 }
2567
2568 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2569 {
2570         struct sock *sk = sock->sk;
2571         long amount = 0;
2572         int err;
2573
2574         switch (cmd) {
2575         case SIOCOUTQ:
2576                 amount = unix_outq_len(sk);
2577                 err = put_user(amount, (int __user *)arg);
2578                 break;
2579         case SIOCINQ:
2580                 amount = unix_inq_len(sk);
2581                 if (amount < 0)
2582                         err = amount;
2583                 else
2584                         err = put_user(amount, (int __user *)arg);
2585                 break;
2586         case SIOCUNIXFILE:
2587                 err = unix_open_file(sk);
2588                 break;
2589         default:
2590                 err = -ENOIOCTLCMD;
2591                 break;
2592         }
2593         return err;
2594 }
2595
2596 #ifdef CONFIG_COMPAT
2597 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2598 {
2599         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2600 }
2601 #endif
2602
2603 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2604 {
2605         struct sock *sk = sock->sk;
2606         __poll_t mask;
2607
2608         sock_poll_wait(file, sock, wait);
2609         mask = 0;
2610
2611         /* exceptional events? */
2612         if (sk->sk_err)
2613                 mask |= EPOLLERR;
2614         if (sk->sk_shutdown == SHUTDOWN_MASK)
2615                 mask |= EPOLLHUP;
2616         if (sk->sk_shutdown & RCV_SHUTDOWN)
2617                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2618
2619         /* readable? */
2620         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2621                 mask |= EPOLLIN | EPOLLRDNORM;
2622
2623         /* Connection-based need to check for termination and startup */
2624         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2625             sk->sk_state == TCP_CLOSE)
2626                 mask |= EPOLLHUP;
2627
2628         /*
2629          * we set writable also when the other side has shut down the
2630          * connection. This prevents stuck sockets.
2631          */
2632         if (unix_writable(sk))
2633                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2634
2635         return mask;
2636 }
2637
2638 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2639                                     poll_table *wait)
2640 {
2641         struct sock *sk = sock->sk, *other;
2642         unsigned int writable;
2643         __poll_t mask;
2644
2645         sock_poll_wait(file, sock, wait);
2646         mask = 0;
2647
2648         /* exceptional events? */
2649         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2650                 mask |= EPOLLERR |
2651                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2652
2653         if (sk->sk_shutdown & RCV_SHUTDOWN)
2654                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2655         if (sk->sk_shutdown == SHUTDOWN_MASK)
2656                 mask |= EPOLLHUP;
2657
2658         /* readable? */
2659         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2660                 mask |= EPOLLIN | EPOLLRDNORM;
2661
2662         /* Connection-based need to check for termination and startup */
2663         if (sk->sk_type == SOCK_SEQPACKET) {
2664                 if (sk->sk_state == TCP_CLOSE)
2665                         mask |= EPOLLHUP;
2666                 /* connection hasn't started yet? */
2667                 if (sk->sk_state == TCP_SYN_SENT)
2668                         return mask;
2669         }
2670
2671         /* No write status requested, avoid expensive OUT tests. */
2672         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2673                 return mask;
2674
2675         writable = unix_writable(sk);
2676         if (writable) {
2677                 unix_state_lock(sk);
2678
2679                 other = unix_peer(sk);
2680                 if (other && unix_peer(other) != sk &&
2681                     unix_recvq_full(other) &&
2682                     unix_dgram_peer_wake_me(sk, other))
2683                         writable = 0;
2684
2685                 unix_state_unlock(sk);
2686         }
2687
2688         if (writable)
2689                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2690         else
2691                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2692
2693         return mask;
2694 }
2695
2696 #ifdef CONFIG_PROC_FS
2697
2698 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2699
2700 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2701 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2702 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2703
2704 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2705 {
2706         unsigned long offset = get_offset(*pos);
2707         unsigned long bucket = get_bucket(*pos);
2708         struct sock *sk;
2709         unsigned long count = 0;
2710
2711         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2712                 if (sock_net(sk) != seq_file_net(seq))
2713                         continue;
2714                 if (++count == offset)
2715                         break;
2716         }
2717
2718         return sk;
2719 }
2720
2721 static struct sock *unix_next_socket(struct seq_file *seq,
2722                                      struct sock *sk,
2723                                      loff_t *pos)
2724 {
2725         unsigned long bucket;
2726
2727         while (sk > (struct sock *)SEQ_START_TOKEN) {
2728                 sk = sk_next(sk);
2729                 if (!sk)
2730                         goto next_bucket;
2731                 if (sock_net(sk) == seq_file_net(seq))
2732                         return sk;
2733         }
2734
2735         do {
2736                 sk = unix_from_bucket(seq, pos);
2737                 if (sk)
2738                         return sk;
2739
2740 next_bucket:
2741                 bucket = get_bucket(*pos) + 1;
2742                 *pos = set_bucket_offset(bucket, 1);
2743         } while (bucket < ARRAY_SIZE(unix_socket_table));
2744
2745         return NULL;
2746 }
2747
2748 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2749         __acquires(unix_table_lock)
2750 {
2751         spin_lock(&unix_table_lock);
2752
2753         if (!*pos)
2754                 return SEQ_START_TOKEN;
2755
2756         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2757                 return NULL;
2758
2759         return unix_next_socket(seq, NULL, pos);
2760 }
2761
2762 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2763 {
2764         ++*pos;
2765         return unix_next_socket(seq, v, pos);
2766 }
2767
2768 static void unix_seq_stop(struct seq_file *seq, void *v)
2769         __releases(unix_table_lock)
2770 {
2771         spin_unlock(&unix_table_lock);
2772 }
2773
2774 static int unix_seq_show(struct seq_file *seq, void *v)
2775 {
2776
2777         if (v == SEQ_START_TOKEN)
2778                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2779                          "Inode Path\n");
2780         else {
2781                 struct sock *s = v;
2782                 struct unix_sock *u = unix_sk(s);
2783                 unix_state_lock(s);
2784
2785                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2786                         s,
2787                         refcount_read(&s->sk_refcnt),
2788                         0,
2789                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2790                         s->sk_type,
2791                         s->sk_socket ?
2792                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2793                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2794                         sock_i_ino(s));
2795
2796                 if (u->addr) {  // under unix_table_lock here
2797                         int i, len;
2798                         seq_putc(seq, ' ');
2799
2800                         i = 0;
2801                         len = u->addr->len - sizeof(short);
2802                         if (!UNIX_ABSTRACT(s))
2803                                 len--;
2804                         else {
2805                                 seq_putc(seq, '@');
2806                                 i++;
2807                         }
2808                         for ( ; i < len; i++)
2809                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2810                                          '@');
2811                 }
2812                 unix_state_unlock(s);
2813                 seq_putc(seq, '\n');
2814         }
2815
2816         return 0;
2817 }
2818
2819 static const struct seq_operations unix_seq_ops = {
2820         .start  = unix_seq_start,
2821         .next   = unix_seq_next,
2822         .stop   = unix_seq_stop,
2823         .show   = unix_seq_show,
2824 };
2825 #endif
2826
2827 static const struct net_proto_family unix_family_ops = {
2828         .family = PF_UNIX,
2829         .create = unix_create,
2830         .owner  = THIS_MODULE,
2831 };
2832
2833
2834 static int __net_init unix_net_init(struct net *net)
2835 {
2836         int error = -ENOMEM;
2837
2838         net->unx.sysctl_max_dgram_qlen = 10;
2839         if (unix_sysctl_register(net))
2840                 goto out;
2841
2842 #ifdef CONFIG_PROC_FS
2843         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2844                         sizeof(struct seq_net_private))) {
2845                 unix_sysctl_unregister(net);
2846                 goto out;
2847         }
2848 #endif
2849         error = 0;
2850 out:
2851         return error;
2852 }
2853
2854 static void __net_exit unix_net_exit(struct net *net)
2855 {
2856         unix_sysctl_unregister(net);
2857         remove_proc_entry("unix", net->proc_net);
2858 }
2859
2860 static struct pernet_operations unix_net_ops = {
2861         .init = unix_net_init,
2862         .exit = unix_net_exit,
2863 };
2864
2865 static int __init af_unix_init(void)
2866 {
2867         int rc = -1;
2868
2869         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2870
2871         rc = proto_register(&unix_proto, 1);
2872         if (rc != 0) {
2873                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2874                 goto out;
2875         }
2876
2877         sock_register(&unix_family_ops);
2878         register_pernet_subsys(&unix_net_ops);
2879 out:
2880         return rc;
2881 }
2882
2883 static void __exit af_unix_exit(void)
2884 {
2885         sock_unregister(PF_UNIX);
2886         proto_unregister(&unix_proto);
2887         unregister_pernet_subsys(&unix_net_ops);
2888 }
2889
2890 /* Earlier than device_initcall() so that other drivers invoking
2891    request_module() don't end up in a loop when modprobe tries
2892    to use a UNIX socket. But later than subsys_initcall() because
2893    we depend on stuff initialised there */
2894 fs_initcall(af_unix_init);
2895 module_exit(af_unix_exit);
2896
2897 MODULE_LICENSE("GPL");
2898 MODULE_ALIAS_NETPROTO(PF_UNIX);