net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                 return -EINVAL;
 230         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                 return -EINVAL;
 232         if (sunaddr->sun_path[0]) {
 233                 /*
 234                  * This may look like an off by one error but it is a bit more
 235                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                  * sun_path[108] doesn't as such exist.  However in kernel space
 237                  * we are guaranteed that it is a valid memory location in our
 238                  * kernel address buffer.
 239                  */
 240                 ((char *)sunaddr)[len] = 0;
 241                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                 return len;
 243         }
 244
 245         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246         return len;
 247 }
 248
 249 static void __unix_remove_socket(struct sock *sk)
 250 {
 251         sk_del_node_init(sk);
 252 }
 253
 254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255 {
 256         WARN_ON(!sk_unhashed(sk));
 257         sk_add_node(sk, list);
 258 }
 259
 260 static inline void unix_remove_socket(struct sock *sk)
 261 {
 262         spin_lock(&unix_table_lock);
 263         __unix_remove_socket(sk);
 264         spin_unlock(&unix_table_lock);
 265 }
 266
 267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268 {
 269         spin_lock(&unix_table_lock);
 270         __unix_insert_socket(list, sk);
 271         spin_unlock(&unix_table_lock);
 272 }
 273
 274 static struct sock *__unix_find_socket_byname(struct net *net,
 275                                               struct sockaddr_un *sunname,
 276                                               int len, int type, unsigned int hash)
 277 {
 278         struct sock *s;
 279
 280         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                 struct unix_sock *u = unix_sk(s);
 282
 283                 if (!net_eq(sock_net(s), net))
 284                         continue;
 285
 286                 if (u->addr->len == len &&
 287                     !memcmp(u->addr->name, sunname, len))
 288                         goto found;
 289         }
 290         s = NULL;
 291 found:
 292         return s;
 293 }
 294
 295 static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                    struct sockaddr_un *sunname,
 297                                                    int len, int type,
 298                                                    unsigned int hash)
 299 {
 300         struct sock *s;
 301
 302         spin_lock(&unix_table_lock);
 303         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304         if (s)
 305                 sock_hold(s);
 306         spin_unlock(&unix_table_lock);
 307         return s;
 308 }
 309
 310 static struct sock *unix_find_socket_byinode(struct inode *i)
 311 {
 312         struct sock *s;
 313
 314         spin_lock(&unix_table_lock);
 315         sk_for_each(s,
 316                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                 struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                 if (dentry && d_backing_inode(dentry) == i) {
 320                         sock_hold(s);
 321                         goto found;
 322                 }
 323         }
 324         s = NULL;
 325 found:
 326         spin_unlock(&unix_table_lock);
 327         return s;
 328 }
 329
 330 /* Support code for asymmetrically connected dgram sockets
 331  *
 332  * If a datagram socket is connected to a socket not itself connected
 333  * to the first socket (eg, /dev/log), clients may only enqueue more
 334  * messages if the present receive queue of the server socket is not
 335  * "too large". This means there's a second writeability condition
 336  * poll and sendmsg need to test. The dgram recv code will do a wake
 337  * up on the peer_wait wait queue of a socket upon reception of a
 338  * datagram which needs to be propagated to sleeping would-be writers
 339  * since these might not have sent anything so far. This can't be
 340  * accomplished via poll_wait because the lifetime of the server
 341  * socket might be less than that of its clients if these break their
 342  * association with it or if the server socket is closed while clients
 343  * are still connected to it and there's no way to inform "a polling
 344  * implementation" that it should let go of a certain wait queue
 345  *
 346  * In order to propagate a wake up, a wait_queue_entry_t of the client
 347  * socket is enqueued on the peer_wait queue of the server socket
 348  * whose wake function does a wake_up on the ordinary client socket
 349  * wait queue. This connection is established whenever a write (or
 350  * poll for write) hit the flow control condition and broken when the
 351  * association to the server socket is dissolved or after a wake up
 352  * was relayed.
 353  */
 354
 355 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                       void *key)
 357 {
 358         struct unix_sock *u;
 359         wait_queue_head_t *u_sleep;
 360
 361         u = container_of(q, struct unix_sock, peer_wake);
 362
 363         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                             q);
 365         u->peer_wake.private = NULL;
 366
 367         /* relaying can only happen while the wq still exists */
 368         u_sleep = sk_sleep(&u->sk);
 369         if (u_sleep)
 370                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 371
 372         return 0;
 373 }
 374
 375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376 {
 377         struct unix_sock *u, *u_other;
 378         int rc;
 379
 380         u = unix_sk(sk);
 381         u_other = unix_sk(other);
 382         rc = 0;
 383         spin_lock(&u_other->peer_wait.lock);
 384
 385         if (!u->peer_wake.private) {
 386                 u->peer_wake.private = other;
 387                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                 rc = 1;
 390         }
 391
 392         spin_unlock(&u_other->peer_wait.lock);
 393         return rc;
 394 }
 395
 396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                             struct sock *other)
 398 {
 399         struct unix_sock *u, *u_other;
 400
 401         u = unix_sk(sk);
 402         u_other = unix_sk(other);
 403         spin_lock(&u_other->peer_wait.lock);
 404
 405         if (u->peer_wake.private == other) {
 406                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                 u->peer_wake.private = NULL;
 408         }
 409
 410         spin_unlock(&u_other->peer_wait.lock);
 411 }
 412
 413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                    struct sock *other)
 415 {
 416         unix_dgram_peer_wake_disconnect(sk, other);
 417         wake_up_interruptible_poll(sk_sleep(sk),
 418                                    EPOLLOUT |
 419                                    EPOLLWRNORM |
 420                                    EPOLLWRBAND);
 421 }
 422
 423 /* preconditions:
 424  *      - unix_peer(sk) == other
 425  *      - association is stable
 426  */
 427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428 {
 429         int connected;
 430
 431         connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433         /* If other is SOCK_DEAD, we want to make sure we signal
 434          * POLLOUT, such that a subsequent write() can get a
 435          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 436          * to other and its full, we will hang waiting for POLLOUT.
 437          */
 438         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 439                 return 1;
 440
 441         if (connected)
 442                 unix_dgram_peer_wake_disconnect(sk, other);
 443
 444         return 0;
 445 }
 446
 447 static int unix_writable(const struct sock *sk)
 448 {
 449         return sk->sk_state != TCP_LISTEN &&
 450                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 451 }
 452
 453 static void unix_write_space(struct sock *sk)
 454 {
 455         struct socket_wq *wq;
 456
 457         rcu_read_lock();
 458         if (unix_writable(sk)) {
 459                 wq = rcu_dereference(sk->sk_wq);
 460                 if (skwq_has_sleeper(wq))
 461                         wake_up_interruptible_sync_poll(&wq->wait,
 462                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 463                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 464         }
 465         rcu_read_unlock();
 466 }
 467
 468 /* When dgram socket disconnects (or changes its peer), we clear its receive
 469  * queue of packets arrived from previous peer. First, it allows to do
 470  * flow control based only on wmem_alloc; second, sk connected to peer
 471  * may receive messages only from that peer. */
 472 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 473 {
 474         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 475                 skb_queue_purge(&sk->sk_receive_queue);
 476                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 477
 478                 /* If one link of bidirectional dgram pipe is disconnected,
 479                  * we signal error. Messages are lost. Do not make this,
 480                  * when peer was not connected to us.
 481                  */
 482                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 483                         other->sk_err = ECONNRESET;
 484                         other->sk_error_report(other);
 485                 }
 486         }
 487 }
 488
 489 static void unix_sock_destructor(struct sock *sk)
 490 {
 491         struct unix_sock *u = unix_sk(sk);
 492
 493         skb_queue_purge(&sk->sk_receive_queue);
 494
 495         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 496         WARN_ON(!sk_unhashed(sk));
 497         WARN_ON(sk->sk_socket);
 498         if (!sock_flag(sk, SOCK_DEAD)) {
 499                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 500                 return;
 501         }
 502
 503         if (u->addr)
 504                 unix_release_addr(u->addr);
 505
 506         atomic_long_dec(&unix_nr_socks);
 507         local_bh_disable();
 508         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 509         local_bh_enable();
 510 #ifdef UNIX_REFCNT_DEBUG
 511         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 512                 atomic_long_read(&unix_nr_socks));
 513 #endif
 514 }
 515
 516 static void unix_release_sock(struct sock *sk, int embrion)
 517 {
 518         struct unix_sock *u = unix_sk(sk);
 519         struct path path;
 520         struct sock *skpair;
 521         struct sk_buff *skb;
 522         int state;
 523
 524         unix_remove_socket(sk);
 525
 526         /* Clear state */
 527         unix_state_lock(sk);
 528         sock_orphan(sk);
 529         sk->sk_shutdown = SHUTDOWN_MASK;
 530         path         = u->path;
 531         u->path.dentry = NULL;
 532         u->path.mnt = NULL;
 533         state = sk->sk_state;
 534         sk->sk_state = TCP_CLOSE;
 535         unix_state_unlock(sk);
 536
 537         wake_up_interruptible_all(&u->peer_wait);
 538
 539         skpair = unix_peer(sk);
 540
 541         if (skpair != NULL) {
 542                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 543                         unix_state_lock(skpair);
 544                         /* No more writes */
 545                         skpair->sk_shutdown = SHUTDOWN_MASK;
 546                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 547                                 skpair->sk_err = ECONNRESET;
 548                         unix_state_unlock(skpair);
 549                         skpair->sk_state_change(skpair);
 550                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 551                 }
 552
 553                 unix_dgram_peer_wake_disconnect(sk, skpair);
 554                 sock_put(skpair); /* It may now die */
 555                 unix_peer(sk) = NULL;
 556         }
 557
 558         /* Try to flush out this socket. Throw out buffers at least */
 559
 560         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 561                 if (state == TCP_LISTEN)
 562                         unix_release_sock(skb->sk, 1);
 563                 /* passed fds are erased in the kfree_skb hook        */
 564                 UNIXCB(skb).consumed = skb->len;
 565                 kfree_skb(skb);
 566         }
 567
 568         if (path.dentry)
 569                 path_put(&path);
 570
 571         sock_put(sk);
 572
 573         /* ---- Socket is dead now and most probably destroyed ---- */
 574
 575         /*
 576          * Fixme: BSD difference: In BSD all sockets connected to us get
 577          *        ECONNRESET and we die on the spot. In Linux we behave
 578          *        like files and pipes do and wait for the last
 579          *        dereference.
 580          *
 581          * Can't we simply set sock->err?
 582          *
 583          *        What the above comment does talk about? --ANK(980817)
 584          */
 585
 586         if (unix_tot_inflight)
 587                 unix_gc();              /* Garbage collect fds */
 588 }
 589
 590 static void init_peercred(struct sock *sk)
 591 {
 592         put_pid(sk->sk_peer_pid);
 593         if (sk->sk_peer_cred)
 594                 put_cred(sk->sk_peer_cred);
 595         sk->sk_peer_pid  = get_pid(task_tgid(current));
 596         sk->sk_peer_cred = get_current_cred();
 597 }
 598
 599 static void copy_peercred(struct sock *sk, struct sock *peersk)
 600 {
 601         put_pid(sk->sk_peer_pid);
 602         if (sk->sk_peer_cred)
 603                 put_cred(sk->sk_peer_cred);
 604         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 605         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 606 }
 607
 608 static int unix_listen(struct socket *sock, int backlog)
 609 {
 610         int err;
 611         struct sock *sk = sock->sk;
 612         struct unix_sock *u = unix_sk(sk);
 613         struct pid *old_pid = NULL;
 614
 615         err = -EOPNOTSUPP;
 616         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 617                 goto out;       /* Only stream/seqpacket sockets accept */
 618         err = -EINVAL;
 619         if (!u->addr)
 620                 goto out;       /* No listens on an unbound socket */
 621         unix_state_lock(sk);
 622         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 623                 goto out_unlock;
 624         if (backlog > sk->sk_max_ack_backlog)
 625                 wake_up_interruptible_all(&u->peer_wait);
 626         sk->sk_max_ack_backlog  = backlog;
 627         sk->sk_state            = TCP_LISTEN;
 628         /* set credentials so connect can copy them */
 629         init_peercred(sk);
 630         err = 0;
 631
 632 out_unlock:
 633         unix_state_unlock(sk);
 634         put_pid(old_pid);
 635 out:
 636         return err;
 637 }
 638
 639 static int unix_release(struct socket *);
 640 static int unix_bind(struct socket *, struct sockaddr *, int);
 641 static int unix_stream_connect(struct socket *, struct sockaddr *,
 642                                int addr_len, int flags);
 643 static int unix_socketpair(struct socket *, struct socket *);
 644 static int unix_accept(struct socket *, struct socket *, int, bool);
 645 static int unix_getname(struct socket *, struct sockaddr *, int);
 646 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 647 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 648                                     poll_table *);
 649 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 650 static int unix_shutdown(struct socket *, int);
 651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 654                                     size_t size, int flags);
 655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 656                                        struct pipe_inode_info *, size_t size,
 657                                        unsigned int flags);
 658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 661                               int, int);
 662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 664                                   int);
 665
 666 static int unix_set_peek_off(struct sock *sk, int val)
 667 {
 668         struct unix_sock *u = unix_sk(sk);
 669
 670         if (mutex_lock_interruptible(&u->iolock))
 671                 return -EINTR;
 672
 673         sk->sk_peek_off = val;
 674         mutex_unlock(&u->iolock);
 675
 676         return 0;
 677 }
 678
 679
 680 static const struct proto_ops unix_stream_ops = {
 681         .family =       PF_UNIX,
 682         .owner =        THIS_MODULE,
 683         .release =      unix_release,
 684         .bind =         unix_bind,
 685         .connect =      unix_stream_connect,
 686         .socketpair =   unix_socketpair,
 687         .accept =       unix_accept,
 688         .getname =      unix_getname,
 689         .poll =         unix_poll,
 690         .ioctl =        unix_ioctl,
 691         .listen =       unix_listen,
 692         .shutdown =     unix_shutdown,
 693         .setsockopt =   sock_no_setsockopt,
 694         .getsockopt =   sock_no_getsockopt,
 695         .sendmsg =      unix_stream_sendmsg,
 696         .recvmsg =      unix_stream_recvmsg,
 697         .mmap =         sock_no_mmap,
 698         .sendpage =     unix_stream_sendpage,
 699         .splice_read =  unix_stream_splice_read,
 700         .set_peek_off = unix_set_peek_off,
 701 };
 702
 703 static const struct proto_ops unix_dgram_ops = {
 704         .family =       PF_UNIX,
 705         .owner =        THIS_MODULE,
 706         .release =      unix_release,
 707         .bind =         unix_bind,
 708         .connect =      unix_dgram_connect,
 709         .socketpair =   unix_socketpair,
 710         .accept =       sock_no_accept,
 711         .getname =      unix_getname,
 712         .poll =         unix_dgram_poll,
 713         .ioctl =        unix_ioctl,
 714         .listen =       sock_no_listen,
 715         .shutdown =     unix_shutdown,
 716         .setsockopt =   sock_no_setsockopt,
 717         .getsockopt =   sock_no_getsockopt,
 718         .sendmsg =      unix_dgram_sendmsg,
 719         .recvmsg =      unix_dgram_recvmsg,
 720         .mmap =         sock_no_mmap,
 721         .sendpage =     sock_no_sendpage,
 722         .set_peek_off = unix_set_peek_off,
 723 };
 724
 725 static const struct proto_ops unix_seqpacket_ops = {
 726         .family =       PF_UNIX,
 727         .owner =        THIS_MODULE,
 728         .release =      unix_release,
 729         .bind =         unix_bind,
 730         .connect =      unix_stream_connect,
 731         .socketpair =   unix_socketpair,
 732         .accept =       unix_accept,
 733         .getname =      unix_getname,
 734         .poll =         unix_dgram_poll,
 735         .ioctl =        unix_ioctl,
 736         .listen =       unix_listen,
 737         .shutdown =     unix_shutdown,
 738         .setsockopt =   sock_no_setsockopt,
 739         .getsockopt =   sock_no_getsockopt,
 740         .sendmsg =      unix_seqpacket_sendmsg,
 741         .recvmsg =      unix_seqpacket_recvmsg,
 742         .mmap =         sock_no_mmap,
 743         .sendpage =     sock_no_sendpage,
 744         .set_peek_off = unix_set_peek_off,
 745 };
 746
 747 static struct proto unix_proto = {
 748         .name                   = "UNIX",
 749         .owner                  = THIS_MODULE,
 750         .obj_size               = sizeof(struct unix_sock),
 751 };
 752
 753 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 754 {
 755         struct sock *sk = NULL;
 756         struct unix_sock *u;
 757
 758         atomic_long_inc(&unix_nr_socks);
 759         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 760                 goto out;
 761
 762         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 763         if (!sk)
 764                 goto out;
 765
 766         sock_init_data(sock, sk);
 767
 768         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 769         sk->sk_write_space      = unix_write_space;
 770         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 771         sk->sk_destruct         = unix_sock_destructor;
 772         u         = unix_sk(sk);
 773         u->path.dentry = NULL;
 774         u->path.mnt = NULL;
 775         spin_lock_init(&u->lock);
 776         atomic_long_set(&u->inflight, 0);
 777         INIT_LIST_HEAD(&u->link);
 778         mutex_init(&u->iolock); /* single task reading lock */
 779         mutex_init(&u->bindlock); /* single task binding lock */
 780         init_waitqueue_head(&u->peer_wait);
 781         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 782         unix_insert_socket(unix_sockets_unbound(sk), sk);
 783 out:
 784         if (sk == NULL)
 785                 atomic_long_dec(&unix_nr_socks);
 786         else {
 787                 local_bh_disable();
 788                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 789                 local_bh_enable();
 790         }
 791         return sk;
 792 }
 793
 794 static int unix_create(struct net *net, struct socket *sock, int protocol,
 795                        int kern)
 796 {
 797         if (protocol && protocol != PF_UNIX)
 798                 return -EPROTONOSUPPORT;
 799
 800         sock->state = SS_UNCONNECTED;
 801
 802         switch (sock->type) {
 803         case SOCK_STREAM:
 804                 sock->ops = &unix_stream_ops;
 805                 break;
 806                 /*
 807                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 808                  *      nothing uses it.
 809                  */
 810         case SOCK_RAW:
 811                 sock->type = SOCK_DGRAM;
 812                 /* fall through */
 813         case SOCK_DGRAM:
 814                 sock->ops = &unix_dgram_ops;
 815                 break;
 816         case SOCK_SEQPACKET:
 817                 sock->ops = &unix_seqpacket_ops;
 818                 break;
 819         default:
 820                 return -ESOCKTNOSUPPORT;
 821         }
 822
 823         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 824 }
 825
 826 static int unix_release(struct socket *sock)
 827 {
 828         struct sock *sk = sock->sk;
 829
 830         if (!sk)
 831                 return 0;
 832
 833         unix_release_sock(sk, 0);
 834         sock->sk = NULL;
 835
 836         return 0;
 837 }
 838
 839 static int unix_autobind(struct socket *sock)
 840 {
 841         struct sock *sk = sock->sk;
 842         struct net *net = sock_net(sk);
 843         struct unix_sock *u = unix_sk(sk);
 844         static u32 ordernum = 1;
 845         struct unix_address *addr;
 846         int err;
 847         unsigned int retries = 0;
 848
 849         err = mutex_lock_interruptible(&u->bindlock);
 850         if (err)
 851                 return err;
 852
 853         err = 0;
 854         if (u->addr)
 855                 goto out;
 856
 857         err = -ENOMEM;
 858         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 859         if (!addr)
 860                 goto out;
 861
 862         addr->name->sun_family = AF_UNIX;
 863         refcount_set(&addr->refcnt, 1);
 864
 865 retry:
 866         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 867         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 868
 869         spin_lock(&unix_table_lock);
 870         ordernum = (ordernum+1)&0xFFFFF;
 871
 872         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 873                                       addr->hash)) {
 874                 spin_unlock(&unix_table_lock);
 875                 /*
 876                  * __unix_find_socket_byname() may take long time if many names
 877                  * are already in use.
 878                  */
 879                 cond_resched();
 880                 /* Give up if all names seems to be in use. */
 881                 if (retries++ == 0xFFFFF) {
 882                         err = -ENOSPC;
 883                         kfree(addr);
 884                         goto out;
 885                 }
 886                 goto retry;
 887         }
 888         addr->hash ^= sk->sk_type;
 889
 890         __unix_remove_socket(sk);
 891         u->addr = addr;
 892         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 893         spin_unlock(&unix_table_lock);
 894         err = 0;
 895
 896 out:    mutex_unlock(&u->bindlock);
 897         return err;
 898 }
 899
 900 static struct sock *unix_find_other(struct net *net,
 901                                     struct sockaddr_un *sunname, int len,
 902                                     int type, unsigned int hash, int *error)
 903 {
 904         struct sock *u;
 905         struct path path;
 906         int err = 0;
 907
 908         if (sunname->sun_path[0]) {
 909                 struct inode *inode;
 910                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 911                 if (err)
 912                         goto fail;
 913                 inode = d_backing_inode(path.dentry);
 914                 err = inode_permission(inode, MAY_WRITE);
 915                 if (err)
 916                         goto put_fail;
 917
 918                 err = -ECONNREFUSED;
 919                 if (!S_ISSOCK(inode->i_mode))
 920                         goto put_fail;
 921                 u = unix_find_socket_byinode(inode);
 922                 if (!u)
 923                         goto put_fail;
 924
 925                 if (u->sk_type == type)
 926                         touch_atime(&path);
 927
 928                 path_put(&path);
 929
 930                 err = -EPROTOTYPE;
 931                 if (u->sk_type != type) {
 932                         sock_put(u);
 933                         goto fail;
 934                 }
 935         } else {
 936                 err = -ECONNREFUSED;
 937                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 938                 if (u) {
 939                         struct dentry *dentry;
 940                         dentry = unix_sk(u)->path.dentry;
 941                         if (dentry)
 942                                 touch_atime(&unix_sk(u)->path);
 943                 } else
 944                         goto fail;
 945         }
 946         return u;
 947
 948 put_fail:
 949         path_put(&path);
 950 fail:
 951         *error = err;
 952         return NULL;
 953 }
 954
 955 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 956 {
 957         struct dentry *dentry;
 958         struct path path;
 959         int err = 0;
 960         /*
 961          * Get the parent directory, calculate the hash for last
 962          * component.
 963          */
 964         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 965         err = PTR_ERR(dentry);
 966         if (IS_ERR(dentry))
 967                 return err;
 968
 969         /*
 970          * All right, let's create it.
 971          */
 972         err = security_path_mknod(&path, dentry, mode, 0);
 973         if (!err) {
 974                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 975                 if (!err) {
 976                         res->mnt = mntget(path.mnt);
 977                         res->dentry = dget(dentry);
 978                 }
 979         }
 980         done_path_create(&path, dentry);
 981         return err;
 982 }
 983
 984 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 985 {
 986         struct sock *sk = sock->sk;
 987         struct net *net = sock_net(sk);
 988         struct unix_sock *u = unix_sk(sk);
 989         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 990         char *sun_path = sunaddr->sun_path;
 991         int err;
 992         unsigned int hash;
 993         struct unix_address *addr;
 994         struct hlist_head *list;
 995         struct path path = { };
 996
 997         err = -EINVAL;
 998         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 999             sunaddr->sun_family != AF_UNIX)
1000                 goto out;
1001
1002         if (addr_len == sizeof(short)) {
1003                 err = unix_autobind(sock);
1004                 goto out;
1005         }
1006
1007         err = unix_mkname(sunaddr, addr_len, &hash);
1008         if (err < 0)
1009                 goto out;
1010         addr_len = err;
1011
1012         if (sun_path[0]) {
1013                 umode_t mode = S_IFSOCK |
1014                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1015                 err = unix_mknod(sun_path, mode, &path);
1016                 if (err) {
1017                         if (err == -EEXIST)
1018                                 err = -EADDRINUSE;
1019                         goto out;
1020                 }
1021         }
1022
1023         err = mutex_lock_interruptible(&u->bindlock);
1024         if (err)
1025                 goto out_put;
1026
1027         err = -EINVAL;
1028         if (u->addr)
1029                 goto out_up;
1030
1031         err = -ENOMEM;
1032         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1033         if (!addr)
1034                 goto out_up;
1035
1036         memcpy(addr->name, sunaddr, addr_len);
1037         addr->len = addr_len;
1038         addr->hash = hash ^ sk->sk_type;
1039         refcount_set(&addr->refcnt, 1);
1040
1041         if (sun_path[0]) {
1042                 addr->hash = UNIX_HASH_SIZE;
1043                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1044                 spin_lock(&unix_table_lock);
1045                 u->path = path;
1046                 list = &unix_socket_table[hash];
1047         } else {
1048                 spin_lock(&unix_table_lock);
1049                 err = -EADDRINUSE;
1050                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1051                                               sk->sk_type, hash)) {
1052                         unix_release_addr(addr);
1053                         goto out_unlock;
1054                 }
1055
1056                 list = &unix_socket_table[addr->hash];
1057         }
1058
1059         err = 0;
1060         __unix_remove_socket(sk);
1061         u->addr = addr;
1062         __unix_insert_socket(list, sk);
1063
1064 out_unlock:
1065         spin_unlock(&unix_table_lock);
1066 out_up:
1067         mutex_unlock(&u->bindlock);
1068 out_put:
1069         if (err)
1070                 path_put(&path);
1071 out:
1072         return err;
1073 }
1074
1075 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1076 {
1077         if (unlikely(sk1 == sk2) || !sk2) {
1078                 unix_state_lock(sk1);
1079                 return;
1080         }
1081         if (sk1 < sk2) {
1082                 unix_state_lock(sk1);
1083                 unix_state_lock_nested(sk2);
1084         } else {
1085                 unix_state_lock(sk2);
1086                 unix_state_lock_nested(sk1);
1087         }
1088 }
1089
1090 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1091 {
1092         if (unlikely(sk1 == sk2) || !sk2) {
1093                 unix_state_unlock(sk1);
1094                 return;
1095         }
1096         unix_state_unlock(sk1);
1097         unix_state_unlock(sk2);
1098 }
1099
1100 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1101                               int alen, int flags)
1102 {
1103         struct sock *sk = sock->sk;
1104         struct net *net = sock_net(sk);
1105         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1106         struct sock *other;
1107         unsigned int hash;
1108         int err;
1109
1110         err = -EINVAL;
1111         if (alen < offsetofend(struct sockaddr, sa_family))
1112                 goto out;
1113
1114         if (addr->sa_family != AF_UNSPEC) {
1115                 err = unix_mkname(sunaddr, alen, &hash);
1116                 if (err < 0)
1117                         goto out;
1118                 alen = err;
1119
1120                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1121                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1122                         goto out;
1123
1124 restart:
1125                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1126                 if (!other)
1127                         goto out;
1128
1129                 unix_state_double_lock(sk, other);
1130
1131                 /* Apparently VFS overslept socket death. Retry. */
1132                 if (sock_flag(other, SOCK_DEAD)) {
1133                         unix_state_double_unlock(sk, other);
1134                         sock_put(other);
1135                         goto restart;
1136                 }
1137
1138                 err = -EPERM;
1139                 if (!unix_may_send(sk, other))
1140                         goto out_unlock;
1141
1142                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1143                 if (err)
1144                         goto out_unlock;
1145
1146         } else {
1147                 /*
1148                  *      1003.1g breaking connected state with AF_UNSPEC
1149                  */
1150                 other = NULL;
1151                 unix_state_double_lock(sk, other);
1152         }
1153
1154         /*
1155          * If it was connected, reconnect.
1156          */
1157         if (unix_peer(sk)) {
1158                 struct sock *old_peer = unix_peer(sk);
1159                 unix_peer(sk) = other;
1160                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1161
1162                 unix_state_double_unlock(sk, other);
1163
1164                 if (other != old_peer)
1165                         unix_dgram_disconnected(sk, old_peer);
1166                 sock_put(old_peer);
1167         } else {
1168                 unix_peer(sk) = other;
1169                 unix_state_double_unlock(sk, other);
1170         }
1171         return 0;
1172
1173 out_unlock:
1174         unix_state_double_unlock(sk, other);
1175         sock_put(other);
1176 out:
1177         return err;
1178 }
1179
1180 static long unix_wait_for_peer(struct sock *other, long timeo)
1181 {
1182         struct unix_sock *u = unix_sk(other);
1183         int sched;
1184         DEFINE_WAIT(wait);
1185
1186         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1187
1188         sched = !sock_flag(other, SOCK_DEAD) &&
1189                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1190                 unix_recvq_full(other);
1191
1192         unix_state_unlock(other);
1193
1194         if (sched)
1195                 timeo = schedule_timeout(timeo);
1196
1197         finish_wait(&u->peer_wait, &wait);
1198         return timeo;
1199 }
1200
1201 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1202                                int addr_len, int flags)
1203 {
1204         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1205         struct sock *sk = sock->sk;
1206         struct net *net = sock_net(sk);
1207         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1208         struct sock *newsk = NULL;
1209         struct sock *other = NULL;
1210         struct sk_buff *skb = NULL;
1211         unsigned int hash;
1212         int st;
1213         int err;
1214         long timeo;
1215
1216         err = unix_mkname(sunaddr, addr_len, &hash);
1217         if (err < 0)
1218                 goto out;
1219         addr_len = err;
1220
1221         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1222             (err = unix_autobind(sock)) != 0)
1223                 goto out;
1224
1225         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1226
1227         /* First of all allocate resources.
1228            If we will make it after state is locked,
1229            we will have to recheck all again in any case.
1230          */
1231
1232         err = -ENOMEM;
1233
1234         /* create new sock for complete connection */
1235         newsk = unix_create1(sock_net(sk), NULL, 0);
1236         if (newsk == NULL)
1237                 goto out;
1238
1239         /* Allocate skb for sending to listening sock */
1240         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1241         if (skb == NULL)
1242                 goto out;
1243
1244 restart:
1245         /*  Find listening sock. */
1246         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1247         if (!other)
1248                 goto out;
1249
1250         /* Latch state of peer */
1251         unix_state_lock(other);
1252
1253         /* Apparently VFS overslept socket death. Retry. */
1254         if (sock_flag(other, SOCK_DEAD)) {
1255                 unix_state_unlock(other);
1256                 sock_put(other);
1257                 goto restart;
1258         }
1259
1260         err = -ECONNREFUSED;
1261         if (other->sk_state != TCP_LISTEN)
1262                 goto out_unlock;
1263         if (other->sk_shutdown & RCV_SHUTDOWN)
1264                 goto out_unlock;
1265
1266         if (unix_recvq_full(other)) {
1267                 err = -EAGAIN;
1268                 if (!timeo)
1269                         goto out_unlock;
1270
1271                 timeo = unix_wait_for_peer(other, timeo);
1272
1273                 err = sock_intr_errno(timeo);
1274                 if (signal_pending(current))
1275                         goto out;
1276                 sock_put(other);
1277                 goto restart;
1278         }
1279
1280         /* Latch our state.
1281
1282            It is tricky place. We need to grab our state lock and cannot
1283            drop lock on peer. It is dangerous because deadlock is
1284            possible. Connect to self case and simultaneous
1285            attempt to connect are eliminated by checking socket
1286            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1287            check this before attempt to grab lock.
1288
1289            Well, and we have to recheck the state after socket locked.
1290          */
1291         st = sk->sk_state;
1292
1293         switch (st) {
1294         case TCP_CLOSE:
1295                 /* This is ok... continue with connect */
1296                 break;
1297         case TCP_ESTABLISHED:
1298                 /* Socket is already connected */
1299                 err = -EISCONN;
1300                 goto out_unlock;
1301         default:
1302                 err = -EINVAL;
1303                 goto out_unlock;
1304         }
1305
1306         unix_state_lock_nested(sk);
1307
1308         if (sk->sk_state != st) {
1309                 unix_state_unlock(sk);
1310                 unix_state_unlock(other);
1311                 sock_put(other);
1312                 goto restart;
1313         }
1314
1315         err = security_unix_stream_connect(sk, other, newsk);
1316         if (err) {
1317                 unix_state_unlock(sk);
1318                 goto out_unlock;
1319         }
1320
1321         /* The way is open! Fastly set all the necessary fields... */
1322
1323         sock_hold(sk);
1324         unix_peer(newsk)        = sk;
1325         newsk->sk_state         = TCP_ESTABLISHED;
1326         newsk->sk_type          = sk->sk_type;
1327         init_peercred(newsk);
1328         newu = unix_sk(newsk);
1329         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1330         otheru = unix_sk(other);
1331
1332         /* copy address information from listening to new sock*/
1333         if (otheru->addr) {
1334                 refcount_inc(&otheru->addr->refcnt);
1335                 newu->addr = otheru->addr;
1336         }
1337         if (otheru->path.dentry) {
1338                 path_get(&otheru->path);
1339                 newu->path = otheru->path;
1340         }
1341
1342         /* Set credentials */
1343         copy_peercred(sk, other);
1344
1345         sock->state     = SS_CONNECTED;
1346         sk->sk_state    = TCP_ESTABLISHED;
1347         sock_hold(newsk);
1348
1349         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1350         unix_peer(sk)   = newsk;
1351
1352         unix_state_unlock(sk);
1353
1354         /* take ten and and send info to listening sock */
1355         spin_lock(&other->sk_receive_queue.lock);
1356         __skb_queue_tail(&other->sk_receive_queue, skb);
1357         spin_unlock(&other->sk_receive_queue.lock);
1358         unix_state_unlock(other);
1359         other->sk_data_ready(other);
1360         sock_put(other);
1361         return 0;
1362
1363 out_unlock:
1364         if (other)
1365                 unix_state_unlock(other);
1366
1367 out:
1368         kfree_skb(skb);
1369         if (newsk)
1370                 unix_release_sock(newsk, 0);
1371         if (other)
1372                 sock_put(other);
1373         return err;
1374 }
1375
1376 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1377 {
1378         struct sock *ska = socka->sk, *skb = sockb->sk;
1379
1380         /* Join our sockets back to back */
1381         sock_hold(ska);
1382         sock_hold(skb);
1383         unix_peer(ska) = skb;
1384         unix_peer(skb) = ska;
1385         init_peercred(ska);
1386         init_peercred(skb);
1387
1388         if (ska->sk_type != SOCK_DGRAM) {
1389                 ska->sk_state = TCP_ESTABLISHED;
1390                 skb->sk_state = TCP_ESTABLISHED;
1391                 socka->state  = SS_CONNECTED;
1392                 sockb->state  = SS_CONNECTED;
1393         }
1394         return 0;
1395 }
1396
1397 static void unix_sock_inherit_flags(const struct socket *old,
1398                                     struct socket *new)
1399 {
1400         if (test_bit(SOCK_PASSCRED, &old->flags))
1401                 set_bit(SOCK_PASSCRED, &new->flags);
1402         if (test_bit(SOCK_PASSSEC, &old->flags))
1403                 set_bit(SOCK_PASSSEC, &new->flags);
1404 }
1405
1406 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1407                        bool kern)
1408 {
1409         struct sock *sk = sock->sk;
1410         struct sock *tsk;
1411         struct sk_buff *skb;
1412         int err;
1413
1414         err = -EOPNOTSUPP;
1415         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1416                 goto out;
1417
1418         err = -EINVAL;
1419         if (sk->sk_state != TCP_LISTEN)
1420                 goto out;
1421
1422         /* If socket state is TCP_LISTEN it cannot change (for now...),
1423          * so that no locks are necessary.
1424          */
1425
1426         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1427         if (!skb) {
1428                 /* This means receive shutdown. */
1429                 if (err == 0)
1430                         err = -EINVAL;
1431                 goto out;
1432         }
1433
1434         tsk = skb->sk;
1435         skb_free_datagram(sk, skb);
1436         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1437
1438         /* attach accepted sock to socket */
1439         unix_state_lock(tsk);
1440         newsock->state = SS_CONNECTED;
1441         unix_sock_inherit_flags(sock, newsock);
1442         sock_graft(tsk, newsock);
1443         unix_state_unlock(tsk);
1444         return 0;
1445
1446 out:
1447         return err;
1448 }
1449
1450
1451 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1452 {
1453         struct sock *sk = sock->sk;
1454         struct unix_sock *u;
1455         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1456         int err = 0;
1457
1458         if (peer) {
1459                 sk = unix_peer_get(sk);
1460
1461                 err = -ENOTCONN;
1462                 if (!sk)
1463                         goto out;
1464                 err = 0;
1465         } else {
1466                 sock_hold(sk);
1467         }
1468
1469         u = unix_sk(sk);
1470         unix_state_lock(sk);
1471         if (!u->addr) {
1472                 sunaddr->sun_family = AF_UNIX;
1473                 sunaddr->sun_path[0] = 0;
1474                 err = sizeof(short);
1475         } else {
1476                 struct unix_address *addr = u->addr;
1477
1478                 err = addr->len;
1479                 memcpy(sunaddr, addr->name, addr->len);
1480         }
1481         unix_state_unlock(sk);
1482         sock_put(sk);
1483 out:
1484         return err;
1485 }
1486
1487 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1488 {
1489         int i;
1490
1491         scm->fp = UNIXCB(skb).fp;
1492         UNIXCB(skb).fp = NULL;
1493
1494         for (i = scm->fp->count-1; i >= 0; i--)
1495                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1496 }
1497
1498 static void unix_destruct_scm(struct sk_buff *skb)
1499 {
1500         struct scm_cookie scm;
1501         memset(&scm, 0, sizeof(scm));
1502         scm.pid  = UNIXCB(skb).pid;
1503         if (UNIXCB(skb).fp)
1504                 unix_detach_fds(&scm, skb);
1505
1506         /* Alas, it calls VFS */
1507         /* So fscking what? fput() had been SMP-safe since the last Summer */
1508         scm_destroy(&scm);
1509         sock_wfree(skb);
1510 }
1511
1512 /*
1513  * The "user->unix_inflight" variable is protected by the garbage
1514  * collection lock, and we just read it locklessly here. If you go
1515  * over the limit, there might be a tiny race in actually noticing
1516  * it across threads. Tough.
1517  */
1518 static inline bool too_many_unix_fds(struct task_struct *p)
1519 {
1520         struct user_struct *user = current_user();
1521
1522         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1523                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1524         return false;
1525 }
1526
1527 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1528 {
1529         int i;
1530
1531         if (too_many_unix_fds(current))
1532                 return -ETOOMANYREFS;
1533
1534         /*
1535          * Need to duplicate file references for the sake of garbage
1536          * collection.  Otherwise a socket in the fps might become a
1537          * candidate for GC while the skb is not yet queued.
1538          */
1539         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1540         if (!UNIXCB(skb).fp)
1541                 return -ENOMEM;
1542
1543         for (i = scm->fp->count - 1; i >= 0; i--)
1544                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1545         return 0;
1546 }
1547
1548 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1549 {
1550         int err = 0;
1551
1552         UNIXCB(skb).pid  = get_pid(scm->pid);
1553         UNIXCB(skb).uid = scm->creds.uid;
1554         UNIXCB(skb).gid = scm->creds.gid;
1555         UNIXCB(skb).fp = NULL;
1556         unix_get_secdata(scm, skb);
1557         if (scm->fp && send_fds)
1558                 err = unix_attach_fds(scm, skb);
1559
1560         skb->destructor = unix_destruct_scm;
1561         return err;
1562 }
1563
1564 static bool unix_passcred_enabled(const struct socket *sock,
1565                                   const struct sock *other)
1566 {
1567         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1568                !other->sk_socket ||
1569                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1570 }
1571
1572 /*
1573  * Some apps rely on write() giving SCM_CREDENTIALS
1574  * We include credentials if source or destination socket
1575  * asserted SOCK_PASSCRED.
1576  */
1577 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1578                             const struct sock *other)
1579 {
1580         if (UNIXCB(skb).pid)
1581                 return;
1582         if (unix_passcred_enabled(sock, other)) {
1583                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1584                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1585         }
1586 }
1587
1588 static int maybe_init_creds(struct scm_cookie *scm,
1589                             struct socket *socket,
1590                             const struct sock *other)
1591 {
1592         int err;
1593         struct msghdr msg = { .msg_controllen = 0 };
1594
1595         err = scm_send(socket, &msg, scm, false);
1596         if (err)
1597                 return err;
1598
1599         if (unix_passcred_enabled(socket, other)) {
1600                 scm->pid = get_pid(task_tgid(current));
1601                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1602         }
1603         return err;
1604 }
1605
1606 static bool unix_skb_scm_eq(struct sk_buff *skb,
1607                             struct scm_cookie *scm)
1608 {
1609         const struct unix_skb_parms *u = &UNIXCB(skb);
1610
1611         return u->pid == scm->pid &&
1612                uid_eq(u->uid, scm->creds.uid) &&
1613                gid_eq(u->gid, scm->creds.gid) &&
1614                unix_secdata_eq(scm, skb);
1615 }
1616
1617 /*
1618  *      Send AF_UNIX data.
1619  */
1620
1621 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1622                               size_t len)
1623 {
1624         struct sock *sk = sock->sk;
1625         struct net *net = sock_net(sk);
1626         struct unix_sock *u = unix_sk(sk);
1627         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1628         struct sock *other = NULL;
1629         int namelen = 0; /* fake GCC */
1630         int err;
1631         unsigned int hash;
1632         struct sk_buff *skb;
1633         long timeo;
1634         struct scm_cookie scm;
1635         int data_len = 0;
1636         int sk_locked;
1637
1638         wait_for_unix_gc();
1639         err = scm_send(sock, msg, &scm, false);
1640         if (err < 0)
1641                 return err;
1642
1643         err = -EOPNOTSUPP;
1644         if (msg->msg_flags&MSG_OOB)
1645                 goto out;
1646
1647         if (msg->msg_namelen) {
1648                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1649                 if (err < 0)
1650                         goto out;
1651                 namelen = err;
1652         } else {
1653                 sunaddr = NULL;
1654                 err = -ENOTCONN;
1655                 other = unix_peer_get(sk);
1656                 if (!other)
1657                         goto out;
1658         }
1659
1660         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1661             && (err = unix_autobind(sock)) != 0)
1662                 goto out;
1663
1664         err = -EMSGSIZE;
1665         if (len > sk->sk_sndbuf - 32)
1666                 goto out;
1667
1668         if (len > SKB_MAX_ALLOC) {
1669                 data_len = min_t(size_t,
1670                                  len - SKB_MAX_ALLOC,
1671                                  MAX_SKB_FRAGS * PAGE_SIZE);
1672                 data_len = PAGE_ALIGN(data_len);
1673
1674                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1675         }
1676
1677         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1678                                    msg->msg_flags & MSG_DONTWAIT, &err,
1679                                    PAGE_ALLOC_COSTLY_ORDER);
1680         if (skb == NULL)
1681                 goto out;
1682
1683         err = unix_scm_to_skb(&scm, skb, true);
1684         if (err < 0)
1685                 goto out_free;
1686
1687         skb_put(skb, len - data_len);
1688         skb->data_len = data_len;
1689         skb->len = len;
1690         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1691         if (err)
1692                 goto out_free;
1693
1694         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1695
1696 restart:
1697         if (!other) {
1698                 err = -ECONNRESET;
1699                 if (sunaddr == NULL)
1700                         goto out_free;
1701
1702                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1703                                         hash, &err);
1704                 if (other == NULL)
1705                         goto out_free;
1706         }
1707
1708         if (sk_filter(other, skb) < 0) {
1709                 /* Toss the packet but do not return any error to the sender */
1710                 err = len;
1711                 goto out_free;
1712         }
1713
1714         sk_locked = 0;
1715         unix_state_lock(other);
1716 restart_locked:
1717         err = -EPERM;
1718         if (!unix_may_send(sk, other))
1719                 goto out_unlock;
1720
1721         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1722                 /*
1723                  *      Check with 1003.1g - what should
1724                  *      datagram error
1725                  */
1726                 unix_state_unlock(other);
1727                 sock_put(other);
1728
1729                 if (!sk_locked)
1730                         unix_state_lock(sk);
1731
1732                 err = 0;
1733                 if (unix_peer(sk) == other) {
1734                         unix_peer(sk) = NULL;
1735                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1736
1737                         unix_state_unlock(sk);
1738
1739                         unix_dgram_disconnected(sk, other);
1740                         sock_put(other);
1741                         err = -ECONNREFUSED;
1742                 } else {
1743                         unix_state_unlock(sk);
1744                 }
1745
1746                 other = NULL;
1747                 if (err)
1748                         goto out_free;
1749                 goto restart;
1750         }
1751
1752         err = -EPIPE;
1753         if (other->sk_shutdown & RCV_SHUTDOWN)
1754                 goto out_unlock;
1755
1756         if (sk->sk_type != SOCK_SEQPACKET) {
1757                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1758                 if (err)
1759                         goto out_unlock;
1760         }
1761
1762         /* other == sk && unix_peer(other) != sk if
1763          * - unix_peer(sk) == NULL, destination address bound to sk
1764          * - unix_peer(sk) == sk by time of get but disconnected before lock
1765          */
1766         if (other != sk &&
1767             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1768                 if (timeo) {
1769                         timeo = unix_wait_for_peer(other, timeo);
1770
1771                         err = sock_intr_errno(timeo);
1772                         if (signal_pending(current))
1773                                 goto out_free;
1774
1775                         goto restart;
1776                 }
1777
1778                 if (!sk_locked) {
1779                         unix_state_unlock(other);
1780                         unix_state_double_lock(sk, other);
1781                 }
1782
1783                 if (unix_peer(sk) != other ||
1784                     unix_dgram_peer_wake_me(sk, other)) {
1785                         err = -EAGAIN;
1786                         sk_locked = 1;
1787                         goto out_unlock;
1788                 }
1789
1790                 if (!sk_locked) {
1791                         sk_locked = 1;
1792                         goto restart_locked;
1793                 }
1794         }
1795
1796         if (unlikely(sk_locked))
1797                 unix_state_unlock(sk);
1798
1799         if (sock_flag(other, SOCK_RCVTSTAMP))
1800                 __net_timestamp(skb);
1801         maybe_add_creds(skb, sock, other);
1802         skb_queue_tail(&other->sk_receive_queue, skb);
1803         unix_state_unlock(other);
1804         other->sk_data_ready(other);
1805         sock_put(other);
1806         scm_destroy(&scm);
1807         return len;
1808
1809 out_unlock:
1810         if (sk_locked)
1811                 unix_state_unlock(sk);
1812         unix_state_unlock(other);
1813 out_free:
1814         kfree_skb(skb);
1815 out:
1816         if (other)
1817                 sock_put(other);
1818         scm_destroy(&scm);
1819         return err;
1820 }
1821
1822 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1823  * bytes, and a minimum of a full page.
1824  */
1825 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1826
1827 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1828                                size_t len)
1829 {
1830         struct sock *sk = sock->sk;
1831         struct sock *other = NULL;
1832         int err, size;
1833         struct sk_buff *skb;
1834         int sent = 0;
1835         struct scm_cookie scm;
1836         bool fds_sent = false;
1837         int data_len;
1838
1839         wait_for_unix_gc();
1840         err = scm_send(sock, msg, &scm, false);
1841         if (err < 0)
1842                 return err;
1843
1844         err = -EOPNOTSUPP;
1845         if (msg->msg_flags&MSG_OOB)
1846                 goto out_err;
1847
1848         if (msg->msg_namelen) {
1849                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1850                 goto out_err;
1851         } else {
1852                 err = -ENOTCONN;
1853                 other = unix_peer(sk);
1854                 if (!other)
1855                         goto out_err;
1856         }
1857
1858         if (sk->sk_shutdown & SEND_SHUTDOWN)
1859                 goto pipe_err;
1860
1861         while (sent < len) {
1862                 size = len - sent;
1863
1864                 /* Keep two messages in the pipe so it schedules better */
1865                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1866
1867                 /* allow fallback to order-0 allocations */
1868                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1869
1870                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1871
1872                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1873
1874                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1875                                            msg->msg_flags & MSG_DONTWAIT, &err,
1876                                            get_order(UNIX_SKB_FRAGS_SZ));
1877                 if (!skb)
1878                         goto out_err;
1879
1880                 /* Only send the fds in the first buffer */
1881                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1882                 if (err < 0) {
1883                         kfree_skb(skb);
1884                         goto out_err;
1885                 }
1886                 fds_sent = true;
1887
1888                 skb_put(skb, size - data_len);
1889                 skb->data_len = data_len;
1890                 skb->len = size;
1891                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1892                 if (err) {
1893                         kfree_skb(skb);
1894                         goto out_err;
1895                 }
1896
1897                 unix_state_lock(other);
1898
1899                 if (sock_flag(other, SOCK_DEAD) ||
1900                     (other->sk_shutdown & RCV_SHUTDOWN))
1901                         goto pipe_err_free;
1902
1903                 maybe_add_creds(skb, sock, other);
1904                 skb_queue_tail(&other->sk_receive_queue, skb);
1905                 unix_state_unlock(other);
1906                 other->sk_data_ready(other);
1907                 sent += size;
1908         }
1909
1910         scm_destroy(&scm);
1911
1912         return sent;
1913
1914 pipe_err_free:
1915         unix_state_unlock(other);
1916         kfree_skb(skb);
1917 pipe_err:
1918         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1919                 send_sig(SIGPIPE, current, 0);
1920         err = -EPIPE;
1921 out_err:
1922         scm_destroy(&scm);
1923         return sent ? : err;
1924 }
1925
1926 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1927                                     int offset, size_t size, int flags)
1928 {
1929         int err;
1930         bool send_sigpipe = false;
1931         bool init_scm = true;
1932         struct scm_cookie scm;
1933         struct sock *other, *sk = socket->sk;
1934         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1935
1936         if (flags & MSG_OOB)
1937                 return -EOPNOTSUPP;
1938
1939         other = unix_peer(sk);
1940         if (!other || sk->sk_state != TCP_ESTABLISHED)
1941                 return -ENOTCONN;
1942
1943         if (false) {
1944 alloc_skb:
1945                 unix_state_unlock(other);
1946                 mutex_unlock(&unix_sk(other)->iolock);
1947                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1948                                               &err, 0);
1949                 if (!newskb)
1950                         goto err;
1951         }
1952
1953         /* we must acquire iolock as we modify already present
1954          * skbs in the sk_receive_queue and mess with skb->len
1955          */
1956         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1957         if (err) {
1958                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1959                 goto err;
1960         }
1961
1962         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1963                 err = -EPIPE;
1964                 send_sigpipe = true;
1965                 goto err_unlock;
1966         }
1967
1968         unix_state_lock(other);
1969
1970         if (sock_flag(other, SOCK_DEAD) ||
1971             other->sk_shutdown & RCV_SHUTDOWN) {
1972                 err = -EPIPE;
1973                 send_sigpipe = true;
1974                 goto err_state_unlock;
1975         }
1976
1977         if (init_scm) {
1978                 err = maybe_init_creds(&scm, socket, other);
1979                 if (err)
1980                         goto err_state_unlock;
1981                 init_scm = false;
1982         }
1983
1984         skb = skb_peek_tail(&other->sk_receive_queue);
1985         if (tail && tail == skb) {
1986                 skb = newskb;
1987         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1988                 if (newskb) {
1989                         skb = newskb;
1990                 } else {
1991                         tail = skb;
1992                         goto alloc_skb;
1993                 }
1994         } else if (newskb) {
1995                 /* this is fast path, we don't necessarily need to
1996                  * call to kfree_skb even though with newskb == NULL
1997                  * this - does no harm
1998                  */
1999                 consume_skb(newskb);
2000                 newskb = NULL;
2001         }
2002
2003         if (skb_append_pagefrags(skb, page, offset, size)) {
2004                 tail = skb;
2005                 goto alloc_skb;
2006         }
2007
2008         skb->len += size;
2009         skb->data_len += size;
2010         skb->truesize += size;
2011         refcount_add(size, &sk->sk_wmem_alloc);
2012
2013         if (newskb) {
2014                 err = unix_scm_to_skb(&scm, skb, false);
2015                 if (err)
2016                         goto err_state_unlock;
2017                 spin_lock(&other->sk_receive_queue.lock);
2018                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2019                 spin_unlock(&other->sk_receive_queue.lock);
2020         }
2021
2022         unix_state_unlock(other);
2023         mutex_unlock(&unix_sk(other)->iolock);
2024
2025         other->sk_data_ready(other);
2026         scm_destroy(&scm);
2027         return size;
2028
2029 err_state_unlock:
2030         unix_state_unlock(other);
2031 err_unlock:
2032         mutex_unlock(&unix_sk(other)->iolock);
2033 err:
2034         kfree_skb(newskb);
2035         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2036                 send_sig(SIGPIPE, current, 0);
2037         if (!init_scm)
2038                 scm_destroy(&scm);
2039         return err;
2040 }
2041
2042 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2043                                   size_t len)
2044 {
2045         int err;
2046         struct sock *sk = sock->sk;
2047
2048         err = sock_error(sk);
2049         if (err)
2050                 return err;
2051
2052         if (sk->sk_state != TCP_ESTABLISHED)
2053                 return -ENOTCONN;
2054
2055         if (msg->msg_namelen)
2056                 msg->msg_namelen = 0;
2057
2058         return unix_dgram_sendmsg(sock, msg, len);
2059 }
2060
2061 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2062                                   size_t size, int flags)
2063 {
2064         struct sock *sk = sock->sk;
2065
2066         if (sk->sk_state != TCP_ESTABLISHED)
2067                 return -ENOTCONN;
2068
2069         return unix_dgram_recvmsg(sock, msg, size, flags);
2070 }
2071
2072 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2073 {
2074         struct unix_sock *u = unix_sk(sk);
2075
2076         if (u->addr) {
2077                 msg->msg_namelen = u->addr->len;
2078                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2079         }
2080 }
2081
2082 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2083                               size_t size, int flags)
2084 {
2085         struct scm_cookie scm;
2086         struct sock *sk = sock->sk;
2087         struct unix_sock *u = unix_sk(sk);
2088         struct sk_buff *skb, *last;
2089         long timeo;
2090         int err;
2091         int peeked, skip;
2092
2093         err = -EOPNOTSUPP;
2094         if (flags&MSG_OOB)
2095                 goto out;
2096
2097         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2098
2099         do {
2100                 mutex_lock(&u->iolock);
2101
2102                 skip = sk_peek_offset(sk, flags);
2103                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2104                                               &err, &last);
2105                 if (skb)
2106                         break;
2107
2108                 mutex_unlock(&u->iolock);
2109
2110                 if (err != -EAGAIN)
2111                         break;
2112         } while (timeo &&
2113                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2114
2115         if (!skb) { /* implies iolock unlocked */
2116                 unix_state_lock(sk);
2117                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2118                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2119                     (sk->sk_shutdown & RCV_SHUTDOWN))
2120                         err = 0;
2121                 unix_state_unlock(sk);
2122                 goto out;
2123         }
2124
2125         if (wq_has_sleeper(&u->peer_wait))
2126                 wake_up_interruptible_sync_poll(&u->peer_wait,
2127                                                 EPOLLOUT | EPOLLWRNORM |
2128                                                 EPOLLWRBAND);
2129
2130         if (msg->msg_name)
2131                 unix_copy_addr(msg, skb->sk);
2132
2133         if (size > skb->len - skip)
2134                 size = skb->len - skip;
2135         else if (size < skb->len - skip)
2136                 msg->msg_flags |= MSG_TRUNC;
2137
2138         err = skb_copy_datagram_msg(skb, skip, msg, size);
2139         if (err)
2140                 goto out_free;
2141
2142         if (sock_flag(sk, SOCK_RCVTSTAMP))
2143                 __sock_recv_timestamp(msg, sk, skb);
2144
2145         memset(&scm, 0, sizeof(scm));
2146
2147         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2148         unix_set_secdata(&scm, skb);
2149
2150         if (!(flags & MSG_PEEK)) {
2151                 if (UNIXCB(skb).fp)
2152                         unix_detach_fds(&scm, skb);
2153
2154                 sk_peek_offset_bwd(sk, skb->len);
2155         } else {
2156                 /* It is questionable: on PEEK we could:
2157                    - do not return fds - good, but too simple 8)
2158                    - return fds, and do not return them on read (old strategy,
2159                      apparently wrong)
2160                    - clone fds (I chose it for now, it is the most universal
2161                      solution)
2162
2163                    POSIX 1003.1g does not actually define this clearly
2164                    at all. POSIX 1003.1g doesn't define a lot of things
2165                    clearly however!
2166
2167                 */
2168
2169                 sk_peek_offset_fwd(sk, size);
2170
2171                 if (UNIXCB(skb).fp)
2172                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2173         }
2174         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2175
2176         scm_recv(sock, msg, &scm, flags);
2177
2178 out_free:
2179         skb_free_datagram(sk, skb);
2180         mutex_unlock(&u->iolock);
2181 out:
2182         return err;
2183 }
2184
2185 /*
2186  *      Sleep until more data has arrived. But check for races..
2187  */
2188 static long unix_stream_data_wait(struct sock *sk, long timeo,
2189                                   struct sk_buff *last, unsigned int last_len,
2190                                   bool freezable)
2191 {
2192         struct sk_buff *tail;
2193         DEFINE_WAIT(wait);
2194
2195         unix_state_lock(sk);
2196
2197         for (;;) {
2198                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2199
2200                 tail = skb_peek_tail(&sk->sk_receive_queue);
2201                 if (tail != last ||
2202                     (tail && tail->len != last_len) ||
2203                     sk->sk_err ||
2204                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2205                     signal_pending(current) ||
2206                     !timeo)
2207                         break;
2208
2209                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2210                 unix_state_unlock(sk);
2211                 if (freezable)
2212                         timeo = freezable_schedule_timeout(timeo);
2213                 else
2214                         timeo = schedule_timeout(timeo);
2215                 unix_state_lock(sk);
2216
2217                 if (sock_flag(sk, SOCK_DEAD))
2218                         break;
2219
2220                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2221         }
2222
2223         finish_wait(sk_sleep(sk), &wait);
2224         unix_state_unlock(sk);
2225         return timeo;
2226 }
2227
2228 static unsigned int unix_skb_len(const struct sk_buff *skb)
2229 {
2230         return skb->len - UNIXCB(skb).consumed;
2231 }
2232
2233 struct unix_stream_read_state {
2234         int (*recv_actor)(struct sk_buff *, int, int,
2235                           struct unix_stream_read_state *);
2236         struct socket *socket;
2237         struct msghdr *msg;
2238         struct pipe_inode_info *pipe;
2239         size_t size;
2240         int flags;
2241         unsigned int splice_flags;
2242 };
2243
2244 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2245                                     bool freezable)
2246 {
2247         struct scm_cookie scm;
2248         struct socket *sock = state->socket;
2249         struct sock *sk = sock->sk;
2250         struct unix_sock *u = unix_sk(sk);
2251         int copied = 0;
2252         int flags = state->flags;
2253         int noblock = flags & MSG_DONTWAIT;
2254         bool check_creds = false;
2255         int target;
2256         int err = 0;
2257         long timeo;
2258         int skip;
2259         size_t size = state->size;
2260         unsigned int last_len;
2261
2262         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2263                 err = -EINVAL;
2264                 goto out;
2265         }
2266
2267         if (unlikely(flags & MSG_OOB)) {
2268                 err = -EOPNOTSUPP;
2269                 goto out;
2270         }
2271
2272         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2273         timeo = sock_rcvtimeo(sk, noblock);
2274
2275         memset(&scm, 0, sizeof(scm));
2276
2277         /* Lock the socket to prevent queue disordering
2278          * while sleeps in memcpy_tomsg
2279          */
2280         mutex_lock(&u->iolock);
2281
2282         skip = max(sk_peek_offset(sk, flags), 0);
2283
2284         do {
2285                 int chunk;
2286                 bool drop_skb;
2287                 struct sk_buff *skb, *last;
2288
2289 redo:
2290                 unix_state_lock(sk);
2291                 if (sock_flag(sk, SOCK_DEAD)) {
2292                         err = -ECONNRESET;
2293                         goto unlock;
2294                 }
2295                 last = skb = skb_peek(&sk->sk_receive_queue);
2296                 last_len = last ? last->len : 0;
2297 again:
2298                 if (skb == NULL) {
2299                         if (copied >= target)
2300                                 goto unlock;
2301
2302                         /*
2303                          *      POSIX 1003.1g mandates this order.
2304                          */
2305
2306                         err = sock_error(sk);
2307                         if (err)
2308                                 goto unlock;
2309                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2310                                 goto unlock;
2311
2312                         unix_state_unlock(sk);
2313                         if (!timeo) {
2314                                 err = -EAGAIN;
2315                                 break;
2316                         }
2317
2318                         mutex_unlock(&u->iolock);
2319
2320                         timeo = unix_stream_data_wait(sk, timeo, last,
2321                                                       last_len, freezable);
2322
2323                         if (signal_pending(current)) {
2324                                 err = sock_intr_errno(timeo);
2325                                 scm_destroy(&scm);
2326                                 goto out;
2327                         }
2328
2329                         mutex_lock(&u->iolock);
2330                         goto redo;
2331 unlock:
2332                         unix_state_unlock(sk);
2333                         break;
2334                 }
2335
2336                 while (skip >= unix_skb_len(skb)) {
2337                         skip -= unix_skb_len(skb);
2338                         last = skb;
2339                         last_len = skb->len;
2340                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2341                         if (!skb)
2342                                 goto again;
2343                 }
2344
2345                 unix_state_unlock(sk);
2346
2347                 if (check_creds) {
2348                         /* Never glue messages from different writers */
2349                         if (!unix_skb_scm_eq(skb, &scm))
2350                                 break;
2351                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2352                         /* Copy credentials */
2353                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2354                         unix_set_secdata(&scm, skb);
2355                         check_creds = true;
2356                 }
2357
2358                 /* Copy address just once */
2359                 if (state->msg && state->msg->msg_name) {
2360                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2361                                          state->msg->msg_name);
2362                         unix_copy_addr(state->msg, skb->sk);
2363                         sunaddr = NULL;
2364                 }
2365
2366                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2367                 skb_get(skb);
2368                 chunk = state->recv_actor(skb, skip, chunk, state);
2369                 drop_skb = !unix_skb_len(skb);
2370                 /* skb is only safe to use if !drop_skb */
2371                 consume_skb(skb);
2372                 if (chunk < 0) {
2373                         if (copied == 0)
2374                                 copied = -EFAULT;
2375                         break;
2376                 }
2377                 copied += chunk;
2378                 size -= chunk;
2379
2380                 if (drop_skb) {
2381                         /* the skb was touched by a concurrent reader;
2382                          * we should not expect anything from this skb
2383                          * anymore and assume it invalid - we can be
2384                          * sure it was dropped from the socket queue
2385                          *
2386                          * let's report a short read
2387                          */
2388                         err = 0;
2389                         break;
2390                 }
2391
2392                 /* Mark read part of skb as used */
2393                 if (!(flags & MSG_PEEK)) {
2394                         UNIXCB(skb).consumed += chunk;
2395
2396                         sk_peek_offset_bwd(sk, chunk);
2397
2398                         if (UNIXCB(skb).fp)
2399                                 unix_detach_fds(&scm, skb);
2400
2401                         if (unix_skb_len(skb))
2402                                 break;
2403
2404                         skb_unlink(skb, &sk->sk_receive_queue);
2405                         consume_skb(skb);
2406
2407                         if (scm.fp)
2408                                 break;
2409                 } else {
2410                         /* It is questionable, see note in unix_dgram_recvmsg.
2411                          */
2412                         if (UNIXCB(skb).fp)
2413                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2414
2415                         sk_peek_offset_fwd(sk, chunk);
2416
2417                         if (UNIXCB(skb).fp)
2418                                 break;
2419
2420                         skip = 0;
2421                         last = skb;
2422                         last_len = skb->len;
2423                         unix_state_lock(sk);
2424                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2425                         if (skb)
2426                                 goto again;
2427                         unix_state_unlock(sk);
2428                         break;
2429                 }
2430         } while (size);
2431
2432         mutex_unlock(&u->iolock);
2433         if (state->msg)
2434                 scm_recv(sock, state->msg, &scm, flags);
2435         else
2436                 scm_destroy(&scm);
2437 out:
2438         return copied ? : err;
2439 }
2440
2441 static int unix_stream_read_actor(struct sk_buff *skb,
2442                                   int skip, int chunk,
2443                                   struct unix_stream_read_state *state)
2444 {
2445         int ret;
2446
2447         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2448                                     state->msg, chunk);
2449         return ret ?: chunk;
2450 }
2451
2452 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2453                                size_t size, int flags)
2454 {
2455         struct unix_stream_read_state state = {
2456                 .recv_actor = unix_stream_read_actor,
2457                 .socket = sock,
2458                 .msg = msg,
2459                 .size = size,
2460                 .flags = flags
2461         };
2462
2463         return unix_stream_read_generic(&state, true);
2464 }
2465
2466 static int unix_stream_splice_actor(struct sk_buff *skb,
2467                                     int skip, int chunk,
2468                                     struct unix_stream_read_state *state)
2469 {
2470         return skb_splice_bits(skb, state->socket->sk,
2471                                UNIXCB(skb).consumed + skip,
2472                                state->pipe, chunk, state->splice_flags);
2473 }
2474
2475 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2476                                        struct pipe_inode_info *pipe,
2477                                        size_t size, unsigned int flags)
2478 {
2479         struct unix_stream_read_state state = {
2480                 .recv_actor = unix_stream_splice_actor,
2481                 .socket = sock,
2482                 .pipe = pipe,
2483                 .size = size,
2484                 .splice_flags = flags,
2485         };
2486
2487         if (unlikely(*ppos))
2488                 return -ESPIPE;
2489
2490         if (sock->file->f_flags & O_NONBLOCK ||
2491             flags & SPLICE_F_NONBLOCK)
2492                 state.flags = MSG_DONTWAIT;
2493
2494         return unix_stream_read_generic(&state, false);
2495 }
2496
2497 static int unix_shutdown(struct socket *sock, int mode)
2498 {
2499         struct sock *sk = sock->sk;
2500         struct sock *other;
2501
2502         if (mode < SHUT_RD || mode > SHUT_RDWR)
2503                 return -EINVAL;
2504         /* This maps:
2505          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2506          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2507          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2508          */
2509         ++mode;
2510
2511         unix_state_lock(sk);
2512         sk->sk_shutdown |= mode;
2513         other = unix_peer(sk);
2514         if (other)
2515                 sock_hold(other);
2516         unix_state_unlock(sk);
2517         sk->sk_state_change(sk);
2518
2519         if (other &&
2520                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2521
2522                 int peer_mode = 0;
2523
2524                 if (mode&RCV_SHUTDOWN)
2525                         peer_mode |= SEND_SHUTDOWN;
2526                 if (mode&SEND_SHUTDOWN)
2527                         peer_mode |= RCV_SHUTDOWN;
2528                 unix_state_lock(other);
2529                 other->sk_shutdown |= peer_mode;
2530                 unix_state_unlock(other);
2531                 other->sk_state_change(other);
2532                 if (peer_mode == SHUTDOWN_MASK)
2533                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2534                 else if (peer_mode & RCV_SHUTDOWN)
2535                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2536         }
2537         if (other)
2538                 sock_put(other);
2539
2540         return 0;
2541 }
2542
2543 long unix_inq_len(struct sock *sk)
2544 {
2545         struct sk_buff *skb;
2546         long amount = 0;
2547
2548         if (sk->sk_state == TCP_LISTEN)
2549                 return -EINVAL;
2550
2551         spin_lock(&sk->sk_receive_queue.lock);
2552         if (sk->sk_type == SOCK_STREAM ||
2553             sk->sk_type == SOCK_SEQPACKET) {
2554                 skb_queue_walk(&sk->sk_receive_queue, skb)
2555                         amount += unix_skb_len(skb);
2556         } else {
2557                 skb = skb_peek(&sk->sk_receive_queue);
2558                 if (skb)
2559                         amount = skb->len;
2560         }
2561         spin_unlock(&sk->sk_receive_queue.lock);
2562
2563         return amount;
2564 }
2565 EXPORT_SYMBOL_GPL(unix_inq_len);
2566
2567 long unix_outq_len(struct sock *sk)
2568 {
2569         return sk_wmem_alloc_get(sk);
2570 }
2571 EXPORT_SYMBOL_GPL(unix_outq_len);
2572
2573 static int unix_open_file(struct sock *sk)
2574 {
2575         struct path path;
2576         struct file *f;
2577         int fd;
2578
2579         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2580                 return -EPERM;
2581
2582         unix_state_lock(sk);
2583         path = unix_sk(sk)->path;
2584         if (!path.dentry) {
2585                 unix_state_unlock(sk);
2586                 return -ENOENT;
2587         }
2588
2589         path_get(&path);
2590         unix_state_unlock(sk);
2591
2592         fd = get_unused_fd_flags(O_CLOEXEC);
2593         if (fd < 0)
2594                 goto out;
2595
2596         f = dentry_open(&path, O_PATH, current_cred());
2597         if (IS_ERR(f)) {
2598                 put_unused_fd(fd);
2599                 fd = PTR_ERR(f);
2600                 goto out;
2601         }
2602
2603         fd_install(fd, f);
2604 out:
2605         path_put(&path);
2606
2607         return fd;
2608 }
2609
2610 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2611 {
2612         struct sock *sk = sock->sk;
2613         long amount = 0;
2614         int err;
2615
2616         switch (cmd) {
2617         case SIOCOUTQ:
2618                 amount = unix_outq_len(sk);
2619                 err = put_user(amount, (int __user *)arg);
2620                 break;
2621         case SIOCINQ:
2622                 amount = unix_inq_len(sk);
2623                 if (amount < 0)
2624                         err = amount;
2625                 else
2626                         err = put_user(amount, (int __user *)arg);
2627                 break;
2628         case SIOCUNIXFILE:
2629                 err = unix_open_file(sk);
2630                 break;
2631         default:
2632                 err = -ENOIOCTLCMD;
2633                 break;
2634         }
2635         return err;
2636 }
2637
2638 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2639 {
2640         struct sock *sk = sock->sk;
2641         __poll_t mask;
2642
2643         sock_poll_wait(file, wait);
2644         mask = 0;
2645
2646         /* exceptional events? */
2647         if (sk->sk_err)
2648                 mask |= EPOLLERR;
2649         if (sk->sk_shutdown == SHUTDOWN_MASK)
2650                 mask |= EPOLLHUP;
2651         if (sk->sk_shutdown & RCV_SHUTDOWN)
2652                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2653
2654         /* readable? */
2655         if (!skb_queue_empty(&sk->sk_receive_queue))
2656                 mask |= EPOLLIN | EPOLLRDNORM;
2657
2658         /* Connection-based need to check for termination and startup */
2659         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2660             sk->sk_state == TCP_CLOSE)
2661                 mask |= EPOLLHUP;
2662
2663         /*
2664          * we set writable also when the other side has shut down the
2665          * connection. This prevents stuck sockets.
2666          */
2667         if (unix_writable(sk))
2668                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2669
2670         return mask;
2671 }
2672
2673 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2674                                     poll_table *wait)
2675 {
2676         struct sock *sk = sock->sk, *other;
2677         unsigned int writable;
2678         __poll_t mask;
2679
2680         sock_poll_wait(file, wait);
2681         mask = 0;
2682
2683         /* exceptional events? */
2684         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2685                 mask |= EPOLLERR |
2686                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2687
2688         if (sk->sk_shutdown & RCV_SHUTDOWN)
2689                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2690         if (sk->sk_shutdown == SHUTDOWN_MASK)
2691                 mask |= EPOLLHUP;
2692
2693         /* readable? */
2694         if (!skb_queue_empty(&sk->sk_receive_queue))
2695                 mask |= EPOLLIN | EPOLLRDNORM;
2696
2697         /* Connection-based need to check for termination and startup */
2698         if (sk->sk_type == SOCK_SEQPACKET) {
2699                 if (sk->sk_state == TCP_CLOSE)
2700                         mask |= EPOLLHUP;
2701                 /* connection hasn't started yet? */
2702                 if (sk->sk_state == TCP_SYN_SENT)
2703                         return mask;
2704         }
2705
2706         /* No write status requested, avoid expensive OUT tests. */
2707         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2708                 return mask;
2709
2710         writable = unix_writable(sk);
2711         if (writable) {
2712                 unix_state_lock(sk);
2713
2714                 other = unix_peer(sk);
2715                 if (other && unix_peer(other) != sk &&
2716                     unix_recvq_full(other) &&
2717                     unix_dgram_peer_wake_me(sk, other))
2718                         writable = 0;
2719
2720                 unix_state_unlock(sk);
2721         }
2722
2723         if (writable)
2724                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2725         else
2726                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2727
2728         return mask;
2729 }
2730
2731 #ifdef CONFIG_PROC_FS
2732
2733 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2734
2735 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2736 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2737 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2738
2739 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2740 {
2741         unsigned long offset = get_offset(*pos);
2742         unsigned long bucket = get_bucket(*pos);
2743         struct sock *sk;
2744         unsigned long count = 0;
2745
2746         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2747                 if (sock_net(sk) != seq_file_net(seq))
2748                         continue;
2749                 if (++count == offset)
2750                         break;
2751         }
2752
2753         return sk;
2754 }
2755
2756 static struct sock *unix_next_socket(struct seq_file *seq,
2757                                      struct sock *sk,
2758                                      loff_t *pos)
2759 {
2760         unsigned long bucket;
2761
2762         while (sk > (struct sock *)SEQ_START_TOKEN) {
2763                 sk = sk_next(sk);
2764                 if (!sk)
2765                         goto next_bucket;
2766                 if (sock_net(sk) == seq_file_net(seq))
2767                         return sk;
2768         }
2769
2770         do {
2771                 sk = unix_from_bucket(seq, pos);
2772                 if (sk)
2773                         return sk;
2774
2775 next_bucket:
2776                 bucket = get_bucket(*pos) + 1;
2777                 *pos = set_bucket_offset(bucket, 1);
2778         } while (bucket < ARRAY_SIZE(unix_socket_table));
2779
2780         return NULL;
2781 }
2782
2783 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2784         __acquires(unix_table_lock)
2785 {
2786         spin_lock(&unix_table_lock);
2787
2788         if (!*pos)
2789                 return SEQ_START_TOKEN;
2790
2791         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2792                 return NULL;
2793
2794         return unix_next_socket(seq, NULL, pos);
2795 }
2796
2797 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2798 {
2799         ++*pos;
2800         return unix_next_socket(seq, v, pos);
2801 }
2802
2803 static void unix_seq_stop(struct seq_file *seq, void *v)
2804         __releases(unix_table_lock)
2805 {
2806         spin_unlock(&unix_table_lock);
2807 }
2808
2809 static int unix_seq_show(struct seq_file *seq, void *v)
2810 {
2811
2812         if (v == SEQ_START_TOKEN)
2813                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2814                          "Inode Path\n");
2815         else {
2816                 struct sock *s = v;
2817                 struct unix_sock *u = unix_sk(s);
2818                 unix_state_lock(s);
2819
2820                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2821                         s,
2822                         refcount_read(&s->sk_refcnt),
2823                         0,
2824                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2825                         s->sk_type,
2826                         s->sk_socket ?
2827                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2828                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2829                         sock_i_ino(s));
2830
2831                 if (u->addr) {
2832                         int i, len;
2833                         seq_putc(seq, ' ');
2834
2835                         i = 0;
2836                         len = u->addr->len - sizeof(short);
2837                         if (!UNIX_ABSTRACT(s))
2838                                 len--;
2839                         else {
2840                                 seq_putc(seq, '@');
2841                                 i++;
2842                         }
2843                         for ( ; i < len; i++)
2844                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2845                                          '@');
2846                 }
2847                 unix_state_unlock(s);
2848                 seq_putc(seq, '\n');
2849         }
2850
2851         return 0;
2852 }
2853
2854 static const struct seq_operations unix_seq_ops = {
2855         .start  = unix_seq_start,
2856         .next   = unix_seq_next,
2857         .stop   = unix_seq_stop,
2858         .show   = unix_seq_show,
2859 };
2860 #endif
2861
2862 static const struct net_proto_family unix_family_ops = {
2863         .family = PF_UNIX,
2864         .create = unix_create,
2865         .owner  = THIS_MODULE,
2866 };
2867
2868
2869 static int __net_init unix_net_init(struct net *net)
2870 {
2871         int error = -ENOMEM;
2872
2873         net->unx.sysctl_max_dgram_qlen = 10;
2874         if (unix_sysctl_register(net))
2875                 goto out;
2876
2877 #ifdef CONFIG_PROC_FS
2878         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2879                         sizeof(struct seq_net_private))) {
2880                 unix_sysctl_unregister(net);
2881                 goto out;
2882         }
2883 #endif
2884         error = 0;
2885 out:
2886         return error;
2887 }
2888
2889 static void __net_exit unix_net_exit(struct net *net)
2890 {
2891         unix_sysctl_unregister(net);
2892         remove_proc_entry("unix", net->proc_net);
2893 }
2894
2895 static struct pernet_operations unix_net_ops = {
2896         .init = unix_net_init,
2897         .exit = unix_net_exit,
2898 };
2899
2900 static int __init af_unix_init(void)
2901 {
2902         int rc = -1;
2903
2904         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2905
2906         rc = proto_register(&unix_proto, 1);
2907         if (rc != 0) {
2908                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2909                 goto out;
2910         }
2911
2912         sock_register(&unix_family_ops);
2913         register_pernet_subsys(&unix_net_ops);
2914 out:
2915         return rc;
2916 }
2917
2918 static void __exit af_unix_exit(void)
2919 {
2920         sock_unregister(PF_UNIX);
2921         proto_unregister(&unix_proto);
2922         unregister_pernet_subsys(&unix_net_ops);
2923 }
2924
2925 /* Earlier than device_initcall() so that other drivers invoking
2926    request_module() don't end up in a loop when modprobe tries
2927    to use a UNIX socket. But later than subsys_initcall() because
2928    we depend on stuff initialised there */
2929 fs_initcall(af_unix_init);
2930 module_exit(af_unix_exit);
2931
2932 MODULE_LICENSE("GPL");
2933 MODULE_ALIAS_NETPROTO(PF_UNIX);