net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         *hashp = 0;
 229
 230         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 231                 return -EINVAL;
 232         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 233                 return -EINVAL;
 234         if (sunaddr->sun_path[0]) {
 235                 /*
 236                  * This may look like an off by one error but it is a bit more
 237                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 238                  * sun_path[108] doesn't as such exist.  However in kernel space
 239                  * we are guaranteed that it is a valid memory location in our
 240                  * kernel address buffer.
 241                  */
 242                 ((char *)sunaddr)[len] = 0;
 243                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 244                 return len;
 245         }
 246
 247         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 248         return len;
 249 }
 250
 251 static void __unix_remove_socket(struct sock *sk)
 252 {
 253         sk_del_node_init(sk);
 254 }
 255
 256 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 257 {
 258         WARN_ON(!sk_unhashed(sk));
 259         sk_add_node(sk, list);
 260 }
 261
 262 static inline void unix_remove_socket(struct sock *sk)
 263 {
 264         spin_lock(&unix_table_lock);
 265         __unix_remove_socket(sk);
 266         spin_unlock(&unix_table_lock);
 267 }
 268
 269 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 270 {
 271         spin_lock(&unix_table_lock);
 272         __unix_insert_socket(list, sk);
 273         spin_unlock(&unix_table_lock);
 274 }
 275
 276 static struct sock *__unix_find_socket_byname(struct net *net,
 277                                               struct sockaddr_un *sunname,
 278                                               int len, int type, unsigned int hash)
 279 {
 280         struct sock *s;
 281
 282         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 283                 struct unix_sock *u = unix_sk(s);
 284
 285                 if (!net_eq(sock_net(s), net))
 286                         continue;
 287
 288                 if (u->addr->len == len &&
 289                     !memcmp(u->addr->name, sunname, len))
 290                         goto found;
 291         }
 292         s = NULL;
 293 found:
 294         return s;
 295 }
 296
 297 static inline struct sock *unix_find_socket_byname(struct net *net,
 298                                                    struct sockaddr_un *sunname,
 299                                                    int len, int type,
 300                                                    unsigned int hash)
 301 {
 302         struct sock *s;
 303
 304         spin_lock(&unix_table_lock);
 305         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 306         if (s)
 307                 sock_hold(s);
 308         spin_unlock(&unix_table_lock);
 309         return s;
 310 }
 311
 312 static struct sock *unix_find_socket_byinode(struct inode *i)
 313 {
 314         struct sock *s;
 315
 316         spin_lock(&unix_table_lock);
 317         sk_for_each(s,
 318                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 319                 struct dentry *dentry = unix_sk(s)->path.dentry;
 320
 321                 if (dentry && d_backing_inode(dentry) == i) {
 322                         sock_hold(s);
 323                         goto found;
 324                 }
 325         }
 326         s = NULL;
 327 found:
 328         spin_unlock(&unix_table_lock);
 329         return s;
 330 }
 331
 332 /* Support code for asymmetrically connected dgram sockets
 333  *
 334  * If a datagram socket is connected to a socket not itself connected
 335  * to the first socket (eg, /dev/log), clients may only enqueue more
 336  * messages if the present receive queue of the server socket is not
 337  * "too large". This means there's a second writeability condition
 338  * poll and sendmsg need to test. The dgram recv code will do a wake
 339  * up on the peer_wait wait queue of a socket upon reception of a
 340  * datagram which needs to be propagated to sleeping would-be writers
 341  * since these might not have sent anything so far. This can't be
 342  * accomplished via poll_wait because the lifetime of the server
 343  * socket might be less than that of its clients if these break their
 344  * association with it or if the server socket is closed while clients
 345  * are still connected to it and there's no way to inform "a polling
 346  * implementation" that it should let go of a certain wait queue
 347  *
 348  * In order to propagate a wake up, a wait_queue_entry_t of the client
 349  * socket is enqueued on the peer_wait queue of the server socket
 350  * whose wake function does a wake_up on the ordinary client socket
 351  * wait queue. This connection is established whenever a write (or
 352  * poll for write) hit the flow control condition and broken when the
 353  * association to the server socket is dissolved or after a wake up
 354  * was relayed.
 355  */
 356
 357 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 358                                       void *key)
 359 {
 360         struct unix_sock *u;
 361         wait_queue_head_t *u_sleep;
 362
 363         u = container_of(q, struct unix_sock, peer_wake);
 364
 365         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 366                             q);
 367         u->peer_wake.private = NULL;
 368
 369         /* relaying can only happen while the wq still exists */
 370         u_sleep = sk_sleep(&u->sk);
 371         if (u_sleep)
 372                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 373
 374         return 0;
 375 }
 376
 377 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 378 {
 379         struct unix_sock *u, *u_other;
 380         int rc;
 381
 382         u = unix_sk(sk);
 383         u_other = unix_sk(other);
 384         rc = 0;
 385         spin_lock(&u_other->peer_wait.lock);
 386
 387         if (!u->peer_wake.private) {
 388                 u->peer_wake.private = other;
 389                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 390
 391                 rc = 1;
 392         }
 393
 394         spin_unlock(&u_other->peer_wait.lock);
 395         return rc;
 396 }
 397
 398 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 399                                             struct sock *other)
 400 {
 401         struct unix_sock *u, *u_other;
 402
 403         u = unix_sk(sk);
 404         u_other = unix_sk(other);
 405         spin_lock(&u_other->peer_wait.lock);
 406
 407         if (u->peer_wake.private == other) {
 408                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 409                 u->peer_wake.private = NULL;
 410         }
 411
 412         spin_unlock(&u_other->peer_wait.lock);
 413 }
 414
 415 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 416                                                    struct sock *other)
 417 {
 418         unix_dgram_peer_wake_disconnect(sk, other);
 419         wake_up_interruptible_poll(sk_sleep(sk),
 420                                    EPOLLOUT |
 421                                    EPOLLWRNORM |
 422                                    EPOLLWRBAND);
 423 }
 424
 425 /* preconditions:
 426  *      - unix_peer(sk) == other
 427  *      - association is stable
 428  */
 429 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 430 {
 431         int connected;
 432
 433         connected = unix_dgram_peer_wake_connect(sk, other);
 434
 435         /* If other is SOCK_DEAD, we want to make sure we signal
 436          * POLLOUT, such that a subsequent write() can get a
 437          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 438          * to other and its full, we will hang waiting for POLLOUT.
 439          */
 440         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 441                 return 1;
 442
 443         if (connected)
 444                 unix_dgram_peer_wake_disconnect(sk, other);
 445
 446         return 0;
 447 }
 448
 449 static int unix_writable(const struct sock *sk)
 450 {
 451         return sk->sk_state != TCP_LISTEN &&
 452                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 453 }
 454
 455 static void unix_write_space(struct sock *sk)
 456 {
 457         struct socket_wq *wq;
 458
 459         rcu_read_lock();
 460         if (unix_writable(sk)) {
 461                 wq = rcu_dereference(sk->sk_wq);
 462                 if (skwq_has_sleeper(wq))
 463                         wake_up_interruptible_sync_poll(&wq->wait,
 464                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 465                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 466         }
 467         rcu_read_unlock();
 468 }
 469
 470 /* When dgram socket disconnects (or changes its peer), we clear its receive
 471  * queue of packets arrived from previous peer. First, it allows to do
 472  * flow control based only on wmem_alloc; second, sk connected to peer
 473  * may receive messages only from that peer. */
 474 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 475 {
 476         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 477                 skb_queue_purge(&sk->sk_receive_queue);
 478                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 479
 480                 /* If one link of bidirectional dgram pipe is disconnected,
 481                  * we signal error. Messages are lost. Do not make this,
 482                  * when peer was not connected to us.
 483                  */
 484                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 485                         other->sk_err = ECONNRESET;
 486                         other->sk_error_report(other);
 487                 }
 488         }
 489 }
 490
 491 static void unix_sock_destructor(struct sock *sk)
 492 {
 493         struct unix_sock *u = unix_sk(sk);
 494
 495         skb_queue_purge(&sk->sk_receive_queue);
 496
 497         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 498         WARN_ON(!sk_unhashed(sk));
 499         WARN_ON(sk->sk_socket);
 500         if (!sock_flag(sk, SOCK_DEAD)) {
 501                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 502                 return;
 503         }
 504
 505         if (u->addr)
 506                 unix_release_addr(u->addr);
 507
 508         atomic_long_dec(&unix_nr_socks);
 509         local_bh_disable();
 510         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 511         local_bh_enable();
 512 #ifdef UNIX_REFCNT_DEBUG
 513         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 514                 atomic_long_read(&unix_nr_socks));
 515 #endif
 516 }
 517
 518 static void unix_release_sock(struct sock *sk, int embrion)
 519 {
 520         struct unix_sock *u = unix_sk(sk);
 521         struct path path;
 522         struct sock *skpair;
 523         struct sk_buff *skb;
 524         int state;
 525
 526         unix_remove_socket(sk);
 527
 528         /* Clear state */
 529         unix_state_lock(sk);
 530         sock_orphan(sk);
 531         sk->sk_shutdown = SHUTDOWN_MASK;
 532         path         = u->path;
 533         u->path.dentry = NULL;
 534         u->path.mnt = NULL;
 535         state = sk->sk_state;
 536         sk->sk_state = TCP_CLOSE;
 537         unix_state_unlock(sk);
 538
 539         wake_up_interruptible_all(&u->peer_wait);
 540
 541         skpair = unix_peer(sk);
 542
 543         if (skpair != NULL) {
 544                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 545                         unix_state_lock(skpair);
 546                         /* No more writes */
 547                         skpair->sk_shutdown = SHUTDOWN_MASK;
 548                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 549                                 skpair->sk_err = ECONNRESET;
 550                         unix_state_unlock(skpair);
 551                         skpair->sk_state_change(skpair);
 552                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 553                 }
 554
 555                 unix_dgram_peer_wake_disconnect(sk, skpair);
 556                 sock_put(skpair); /* It may now die */
 557                 unix_peer(sk) = NULL;
 558         }
 559
 560         /* Try to flush out this socket. Throw out buffers at least */
 561
 562         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 563                 if (state == TCP_LISTEN)
 564                         unix_release_sock(skb->sk, 1);
 565                 /* passed fds are erased in the kfree_skb hook        */
 566                 UNIXCB(skb).consumed = skb->len;
 567                 kfree_skb(skb);
 568         }
 569
 570         if (path.dentry)
 571                 path_put(&path);
 572
 573         sock_put(sk);
 574
 575         /* ---- Socket is dead now and most probably destroyed ---- */
 576
 577         /*
 578          * Fixme: BSD difference: In BSD all sockets connected to us get
 579          *        ECONNRESET and we die on the spot. In Linux we behave
 580          *        like files and pipes do and wait for the last
 581          *        dereference.
 582          *
 583          * Can't we simply set sock->err?
 584          *
 585          *        What the above comment does talk about? --ANK(980817)
 586          */
 587
 588         if (unix_tot_inflight)
 589                 unix_gc();              /* Garbage collect fds */
 590 }
 591
 592 static void init_peercred(struct sock *sk)
 593 {
 594         put_pid(sk->sk_peer_pid);
 595         if (sk->sk_peer_cred)
 596                 put_cred(sk->sk_peer_cred);
 597         sk->sk_peer_pid  = get_pid(task_tgid(current));
 598         sk->sk_peer_cred = get_current_cred();
 599 }
 600
 601 static void copy_peercred(struct sock *sk, struct sock *peersk)
 602 {
 603         put_pid(sk->sk_peer_pid);
 604         if (sk->sk_peer_cred)
 605                 put_cred(sk->sk_peer_cred);
 606         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 607         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 608 }
 609
 610 static int unix_listen(struct socket *sock, int backlog)
 611 {
 612         int err;
 613         struct sock *sk = sock->sk;
 614         struct unix_sock *u = unix_sk(sk);
 615         struct pid *old_pid = NULL;
 616
 617         err = -EOPNOTSUPP;
 618         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 619                 goto out;       /* Only stream/seqpacket sockets accept */
 620         err = -EINVAL;
 621         if (!u->addr)
 622                 goto out;       /* No listens on an unbound socket */
 623         unix_state_lock(sk);
 624         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 625                 goto out_unlock;
 626         if (backlog > sk->sk_max_ack_backlog)
 627                 wake_up_interruptible_all(&u->peer_wait);
 628         sk->sk_max_ack_backlog  = backlog;
 629         sk->sk_state            = TCP_LISTEN;
 630         /* set credentials so connect can copy them */
 631         init_peercred(sk);
 632         err = 0;
 633
 634 out_unlock:
 635         unix_state_unlock(sk);
 636         put_pid(old_pid);
 637 out:
 638         return err;
 639 }
 640
 641 static int unix_release(struct socket *);
 642 static int unix_bind(struct socket *, struct sockaddr *, int);
 643 static int unix_stream_connect(struct socket *, struct sockaddr *,
 644                                int addr_len, int flags);
 645 static int unix_socketpair(struct socket *, struct socket *);
 646 static int unix_accept(struct socket *, struct socket *, int, bool);
 647 static int unix_getname(struct socket *, struct sockaddr *, int);
 648 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 649 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 650                                     poll_table *);
 651 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 652 static int unix_shutdown(struct socket *, int);
 653 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 654 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 656                                     size_t size, int flags);
 657 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 658                                        struct pipe_inode_info *, size_t size,
 659                                        unsigned int flags);
 660 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 661 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 662 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 663                               int, int);
 664 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 665 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 666                                   int);
 667
 668 static int unix_set_peek_off(struct sock *sk, int val)
 669 {
 670         struct unix_sock *u = unix_sk(sk);
 671
 672         if (mutex_lock_interruptible(&u->iolock))
 673                 return -EINTR;
 674
 675         sk->sk_peek_off = val;
 676         mutex_unlock(&u->iolock);
 677
 678         return 0;
 679 }
 680
 681
 682 static const struct proto_ops unix_stream_ops = {
 683         .family =       PF_UNIX,
 684         .owner =        THIS_MODULE,
 685         .release =      unix_release,
 686         .bind =         unix_bind,
 687         .connect =      unix_stream_connect,
 688         .socketpair =   unix_socketpair,
 689         .accept =       unix_accept,
 690         .getname =      unix_getname,
 691         .poll =         unix_poll,
 692         .ioctl =        unix_ioctl,
 693         .listen =       unix_listen,
 694         .shutdown =     unix_shutdown,
 695         .setsockopt =   sock_no_setsockopt,
 696         .getsockopt =   sock_no_getsockopt,
 697         .sendmsg =      unix_stream_sendmsg,
 698         .recvmsg =      unix_stream_recvmsg,
 699         .mmap =         sock_no_mmap,
 700         .sendpage =     unix_stream_sendpage,
 701         .splice_read =  unix_stream_splice_read,
 702         .set_peek_off = unix_set_peek_off,
 703 };
 704
 705 static const struct proto_ops unix_dgram_ops = {
 706         .family =       PF_UNIX,
 707         .owner =        THIS_MODULE,
 708         .release =      unix_release,
 709         .bind =         unix_bind,
 710         .connect =      unix_dgram_connect,
 711         .socketpair =   unix_socketpair,
 712         .accept =       sock_no_accept,
 713         .getname =      unix_getname,
 714         .poll =         unix_dgram_poll,
 715         .ioctl =        unix_ioctl,
 716         .listen =       sock_no_listen,
 717         .shutdown =     unix_shutdown,
 718         .setsockopt =   sock_no_setsockopt,
 719         .getsockopt =   sock_no_getsockopt,
 720         .sendmsg =      unix_dgram_sendmsg,
 721         .recvmsg =      unix_dgram_recvmsg,
 722         .mmap =         sock_no_mmap,
 723         .sendpage =     sock_no_sendpage,
 724         .set_peek_off = unix_set_peek_off,
 725 };
 726
 727 static const struct proto_ops unix_seqpacket_ops = {
 728         .family =       PF_UNIX,
 729         .owner =        THIS_MODULE,
 730         .release =      unix_release,
 731         .bind =         unix_bind,
 732         .connect =      unix_stream_connect,
 733         .socketpair =   unix_socketpair,
 734         .accept =       unix_accept,
 735         .getname =      unix_getname,
 736         .poll =         unix_dgram_poll,
 737         .ioctl =        unix_ioctl,
 738         .listen =       unix_listen,
 739         .shutdown =     unix_shutdown,
 740         .setsockopt =   sock_no_setsockopt,
 741         .getsockopt =   sock_no_getsockopt,
 742         .sendmsg =      unix_seqpacket_sendmsg,
 743         .recvmsg =      unix_seqpacket_recvmsg,
 744         .mmap =         sock_no_mmap,
 745         .sendpage =     sock_no_sendpage,
 746         .set_peek_off = unix_set_peek_off,
 747 };
 748
 749 static struct proto unix_proto = {
 750         .name                   = "UNIX",
 751         .owner                  = THIS_MODULE,
 752         .obj_size               = sizeof(struct unix_sock),
 753 };
 754
 755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 756 {
 757         struct sock *sk = NULL;
 758         struct unix_sock *u;
 759
 760         atomic_long_inc(&unix_nr_socks);
 761         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 762                 goto out;
 763
 764         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 765         if (!sk)
 766                 goto out;
 767
 768         sock_init_data(sock, sk);
 769
 770         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 771         sk->sk_write_space      = unix_write_space;
 772         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 773         sk->sk_destruct         = unix_sock_destructor;
 774         u         = unix_sk(sk);
 775         u->path.dentry = NULL;
 776         u->path.mnt = NULL;
 777         spin_lock_init(&u->lock);
 778         atomic_long_set(&u->inflight, 0);
 779         INIT_LIST_HEAD(&u->link);
 780         mutex_init(&u->iolock); /* single task reading lock */
 781         mutex_init(&u->bindlock); /* single task binding lock */
 782         init_waitqueue_head(&u->peer_wait);
 783         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 784         unix_insert_socket(unix_sockets_unbound(sk), sk);
 785 out:
 786         if (sk == NULL)
 787                 atomic_long_dec(&unix_nr_socks);
 788         else {
 789                 local_bh_disable();
 790                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 791                 local_bh_enable();
 792         }
 793         return sk;
 794 }
 795
 796 static int unix_create(struct net *net, struct socket *sock, int protocol,
 797                        int kern)
 798 {
 799         if (protocol && protocol != PF_UNIX)
 800                 return -EPROTONOSUPPORT;
 801
 802         sock->state = SS_UNCONNECTED;
 803
 804         switch (sock->type) {
 805         case SOCK_STREAM:
 806                 sock->ops = &unix_stream_ops;
 807                 break;
 808                 /*
 809                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 810                  *      nothing uses it.
 811                  */
 812         case SOCK_RAW:
 813                 sock->type = SOCK_DGRAM;
 814                 /* fall through */
 815         case SOCK_DGRAM:
 816                 sock->ops = &unix_dgram_ops;
 817                 break;
 818         case SOCK_SEQPACKET:
 819                 sock->ops = &unix_seqpacket_ops;
 820                 break;
 821         default:
 822                 return -ESOCKTNOSUPPORT;
 823         }
 824
 825         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 826 }
 827
 828 static int unix_release(struct socket *sock)
 829 {
 830         struct sock *sk = sock->sk;
 831
 832         if (!sk)
 833                 return 0;
 834
 835         unix_release_sock(sk, 0);
 836         sock->sk = NULL;
 837
 838         return 0;
 839 }
 840
 841 static int unix_autobind(struct socket *sock)
 842 {
 843         struct sock *sk = sock->sk;
 844         struct net *net = sock_net(sk);
 845         struct unix_sock *u = unix_sk(sk);
 846         static u32 ordernum = 1;
 847         struct unix_address *addr;
 848         int err;
 849         unsigned int retries = 0;
 850
 851         err = mutex_lock_interruptible(&u->bindlock);
 852         if (err)
 853                 return err;
 854
 855         err = 0;
 856         if (u->addr)
 857                 goto out;
 858
 859         err = -ENOMEM;
 860         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 861         if (!addr)
 862                 goto out;
 863
 864         addr->name->sun_family = AF_UNIX;
 865         refcount_set(&addr->refcnt, 1);
 866
 867 retry:
 868         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 869         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 870
 871         spin_lock(&unix_table_lock);
 872         ordernum = (ordernum+1)&0xFFFFF;
 873
 874         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 875                                       addr->hash)) {
 876                 spin_unlock(&unix_table_lock);
 877                 /*
 878                  * __unix_find_socket_byname() may take long time if many names
 879                  * are already in use.
 880                  */
 881                 cond_resched();
 882                 /* Give up if all names seems to be in use. */
 883                 if (retries++ == 0xFFFFF) {
 884                         err = -ENOSPC;
 885                         kfree(addr);
 886                         goto out;
 887                 }
 888                 goto retry;
 889         }
 890         addr->hash ^= sk->sk_type;
 891
 892         __unix_remove_socket(sk);
 893         smp_store_release(&u->addr, addr);
 894         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 895         spin_unlock(&unix_table_lock);
 896         err = 0;
 897
 898 out:    mutex_unlock(&u->bindlock);
 899         return err;
 900 }
 901
 902 static struct sock *unix_find_other(struct net *net,
 903                                     struct sockaddr_un *sunname, int len,
 904                                     int type, unsigned int hash, int *error)
 905 {
 906         struct sock *u;
 907         struct path path;
 908         int err = 0;
 909
 910         if (sunname->sun_path[0]) {
 911                 struct inode *inode;
 912                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 913                 if (err)
 914                         goto fail;
 915                 inode = d_backing_inode(path.dentry);
 916                 err = inode_permission(inode, MAY_WRITE);
 917                 if (err)
 918                         goto put_fail;
 919
 920                 err = -ECONNREFUSED;
 921                 if (!S_ISSOCK(inode->i_mode))
 922                         goto put_fail;
 923                 u = unix_find_socket_byinode(inode);
 924                 if (!u)
 925                         goto put_fail;
 926
 927                 if (u->sk_type == type)
 928                         touch_atime(&path);
 929
 930                 path_put(&path);
 931
 932                 err = -EPROTOTYPE;
 933                 if (u->sk_type != type) {
 934                         sock_put(u);
 935                         goto fail;
 936                 }
 937         } else {
 938                 err = -ECONNREFUSED;
 939                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 940                 if (u) {
 941                         struct dentry *dentry;
 942                         dentry = unix_sk(u)->path.dentry;
 943                         if (dentry)
 944                                 touch_atime(&unix_sk(u)->path);
 945                 } else
 946                         goto fail;
 947         }
 948         return u;
 949
 950 put_fail:
 951         path_put(&path);
 952 fail:
 953         *error = err;
 954         return NULL;
 955 }
 956
 957 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 958 {
 959         struct dentry *dentry;
 960         struct path path;
 961         int err = 0;
 962         /*
 963          * Get the parent directory, calculate the hash for last
 964          * component.
 965          */
 966         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 967         err = PTR_ERR(dentry);
 968         if (IS_ERR(dentry))
 969                 return err;
 970
 971         /*
 972          * All right, let's create it.
 973          */
 974         err = security_path_mknod(&path, dentry, mode, 0);
 975         if (!err) {
 976                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 977                 if (!err) {
 978                         res->mnt = mntget(path.mnt);
 979                         res->dentry = dget(dentry);
 980                 }
 981         }
 982         done_path_create(&path, dentry);
 983         return err;
 984 }
 985
 986 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 987 {
 988         struct sock *sk = sock->sk;
 989         struct net *net = sock_net(sk);
 990         struct unix_sock *u = unix_sk(sk);
 991         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 992         char *sun_path = sunaddr->sun_path;
 993         int err;
 994         unsigned int hash;
 995         struct unix_address *addr;
 996         struct hlist_head *list;
 997         struct path path = { };
 998
 999         err = -EINVAL;
1000         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1001             sunaddr->sun_family != AF_UNIX)
1002                 goto out;
1003
1004         if (addr_len == sizeof(short)) {
1005                 err = unix_autobind(sock);
1006                 goto out;
1007         }
1008
1009         err = unix_mkname(sunaddr, addr_len, &hash);
1010         if (err < 0)
1011                 goto out;
1012         addr_len = err;
1013
1014         if (sun_path[0]) {
1015                 umode_t mode = S_IFSOCK |
1016                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1017                 err = unix_mknod(sun_path, mode, &path);
1018                 if (err) {
1019                         if (err == -EEXIST)
1020                                 err = -EADDRINUSE;
1021                         goto out;
1022                 }
1023         }
1024
1025         err = mutex_lock_interruptible(&u->bindlock);
1026         if (err)
1027                 goto out_put;
1028
1029         err = -EINVAL;
1030         if (u->addr)
1031                 goto out_up;
1032
1033         err = -ENOMEM;
1034         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1035         if (!addr)
1036                 goto out_up;
1037
1038         memcpy(addr->name, sunaddr, addr_len);
1039         addr->len = addr_len;
1040         addr->hash = hash ^ sk->sk_type;
1041         refcount_set(&addr->refcnt, 1);
1042
1043         if (sun_path[0]) {
1044                 addr->hash = UNIX_HASH_SIZE;
1045                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1046                 spin_lock(&unix_table_lock);
1047                 u->path = path;
1048                 list = &unix_socket_table[hash];
1049         } else {
1050                 spin_lock(&unix_table_lock);
1051                 err = -EADDRINUSE;
1052                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1053                                               sk->sk_type, hash)) {
1054                         unix_release_addr(addr);
1055                         goto out_unlock;
1056                 }
1057
1058                 list = &unix_socket_table[addr->hash];
1059         }
1060
1061         err = 0;
1062         __unix_remove_socket(sk);
1063         smp_store_release(&u->addr, addr);
1064         __unix_insert_socket(list, sk);
1065
1066 out_unlock:
1067         spin_unlock(&unix_table_lock);
1068 out_up:
1069         mutex_unlock(&u->bindlock);
1070 out_put:
1071         if (err)
1072                 path_put(&path);
1073 out:
1074         return err;
1075 }
1076
1077 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1078 {
1079         if (unlikely(sk1 == sk2) || !sk2) {
1080                 unix_state_lock(sk1);
1081                 return;
1082         }
1083         if (sk1 < sk2) {
1084                 unix_state_lock(sk1);
1085                 unix_state_lock_nested(sk2);
1086         } else {
1087                 unix_state_lock(sk2);
1088                 unix_state_lock_nested(sk1);
1089         }
1090 }
1091
1092 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1093 {
1094         if (unlikely(sk1 == sk2) || !sk2) {
1095                 unix_state_unlock(sk1);
1096                 return;
1097         }
1098         unix_state_unlock(sk1);
1099         unix_state_unlock(sk2);
1100 }
1101
1102 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1103                               int alen, int flags)
1104 {
1105         struct sock *sk = sock->sk;
1106         struct net *net = sock_net(sk);
1107         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1108         struct sock *other;
1109         unsigned int hash;
1110         int err;
1111
1112         err = -EINVAL;
1113         if (alen < offsetofend(struct sockaddr, sa_family))
1114                 goto out;
1115
1116         if (addr->sa_family != AF_UNSPEC) {
1117                 err = unix_mkname(sunaddr, alen, &hash);
1118                 if (err < 0)
1119                         goto out;
1120                 alen = err;
1121
1122                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1123                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1124                         goto out;
1125
1126 restart:
1127                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1128                 if (!other)
1129                         goto out;
1130
1131                 unix_state_double_lock(sk, other);
1132
1133                 /* Apparently VFS overslept socket death. Retry. */
1134                 if (sock_flag(other, SOCK_DEAD)) {
1135                         unix_state_double_unlock(sk, other);
1136                         sock_put(other);
1137                         goto restart;
1138                 }
1139
1140                 err = -EPERM;
1141                 if (!unix_may_send(sk, other))
1142                         goto out_unlock;
1143
1144                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1145                 if (err)
1146                         goto out_unlock;
1147
1148         } else {
1149                 /*
1150                  *      1003.1g breaking connected state with AF_UNSPEC
1151                  */
1152                 other = NULL;
1153                 unix_state_double_lock(sk, other);
1154         }
1155
1156         /*
1157          * If it was connected, reconnect.
1158          */
1159         if (unix_peer(sk)) {
1160                 struct sock *old_peer = unix_peer(sk);
1161                 unix_peer(sk) = other;
1162                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1163
1164                 unix_state_double_unlock(sk, other);
1165
1166                 if (other != old_peer)
1167                         unix_dgram_disconnected(sk, old_peer);
1168                 sock_put(old_peer);
1169         } else {
1170                 unix_peer(sk) = other;
1171                 unix_state_double_unlock(sk, other);
1172         }
1173         return 0;
1174
1175 out_unlock:
1176         unix_state_double_unlock(sk, other);
1177         sock_put(other);
1178 out:
1179         return err;
1180 }
1181
1182 static long unix_wait_for_peer(struct sock *other, long timeo)
1183 {
1184         struct unix_sock *u = unix_sk(other);
1185         int sched;
1186         DEFINE_WAIT(wait);
1187
1188         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1189
1190         sched = !sock_flag(other, SOCK_DEAD) &&
1191                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1192                 unix_recvq_full(other);
1193
1194         unix_state_unlock(other);
1195
1196         if (sched)
1197                 timeo = schedule_timeout(timeo);
1198
1199         finish_wait(&u->peer_wait, &wait);
1200         return timeo;
1201 }
1202
1203 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1204                                int addr_len, int flags)
1205 {
1206         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1207         struct sock *sk = sock->sk;
1208         struct net *net = sock_net(sk);
1209         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1210         struct sock *newsk = NULL;
1211         struct sock *other = NULL;
1212         struct sk_buff *skb = NULL;
1213         unsigned int hash;
1214         int st;
1215         int err;
1216         long timeo;
1217
1218         err = unix_mkname(sunaddr, addr_len, &hash);
1219         if (err < 0)
1220                 goto out;
1221         addr_len = err;
1222
1223         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1224             (err = unix_autobind(sock)) != 0)
1225                 goto out;
1226
1227         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1228
1229         /* First of all allocate resources.
1230            If we will make it after state is locked,
1231            we will have to recheck all again in any case.
1232          */
1233
1234         err = -ENOMEM;
1235
1236         /* create new sock for complete connection */
1237         newsk = unix_create1(sock_net(sk), NULL, 0);
1238         if (newsk == NULL)
1239                 goto out;
1240
1241         /* Allocate skb for sending to listening sock */
1242         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1243         if (skb == NULL)
1244                 goto out;
1245
1246 restart:
1247         /*  Find listening sock. */
1248         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1249         if (!other)
1250                 goto out;
1251
1252         /* Latch state of peer */
1253         unix_state_lock(other);
1254
1255         /* Apparently VFS overslept socket death. Retry. */
1256         if (sock_flag(other, SOCK_DEAD)) {
1257                 unix_state_unlock(other);
1258                 sock_put(other);
1259                 goto restart;
1260         }
1261
1262         err = -ECONNREFUSED;
1263         if (other->sk_state != TCP_LISTEN)
1264                 goto out_unlock;
1265         if (other->sk_shutdown & RCV_SHUTDOWN)
1266                 goto out_unlock;
1267
1268         if (unix_recvq_full(other)) {
1269                 err = -EAGAIN;
1270                 if (!timeo)
1271                         goto out_unlock;
1272
1273                 timeo = unix_wait_for_peer(other, timeo);
1274
1275                 err = sock_intr_errno(timeo);
1276                 if (signal_pending(current))
1277                         goto out;
1278                 sock_put(other);
1279                 goto restart;
1280         }
1281
1282         /* Latch our state.
1283
1284            It is tricky place. We need to grab our state lock and cannot
1285            drop lock on peer. It is dangerous because deadlock is
1286            possible. Connect to self case and simultaneous
1287            attempt to connect are eliminated by checking socket
1288            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1289            check this before attempt to grab lock.
1290
1291            Well, and we have to recheck the state after socket locked.
1292          */
1293         st = sk->sk_state;
1294
1295         switch (st) {
1296         case TCP_CLOSE:
1297                 /* This is ok... continue with connect */
1298                 break;
1299         case TCP_ESTABLISHED:
1300                 /* Socket is already connected */
1301                 err = -EISCONN;
1302                 goto out_unlock;
1303         default:
1304                 err = -EINVAL;
1305                 goto out_unlock;
1306         }
1307
1308         unix_state_lock_nested(sk);
1309
1310         if (sk->sk_state != st) {
1311                 unix_state_unlock(sk);
1312                 unix_state_unlock(other);
1313                 sock_put(other);
1314                 goto restart;
1315         }
1316
1317         err = security_unix_stream_connect(sk, other, newsk);
1318         if (err) {
1319                 unix_state_unlock(sk);
1320                 goto out_unlock;
1321         }
1322
1323         /* The way is open! Fastly set all the necessary fields... */
1324
1325         sock_hold(sk);
1326         unix_peer(newsk)        = sk;
1327         newsk->sk_state         = TCP_ESTABLISHED;
1328         newsk->sk_type          = sk->sk_type;
1329         init_peercred(newsk);
1330         newu = unix_sk(newsk);
1331         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1332         otheru = unix_sk(other);
1333
1334         /* copy address information from listening to new sock
1335          *
1336          * The contents of *(otheru->addr) and otheru->path
1337          * are seen fully set up here, since we have found
1338          * otheru in hash under unix_table_lock.  Insertion
1339          * into the hash chain we'd found it in had been done
1340          * in an earlier critical area protected by unix_table_lock,
1341          * the same one where we'd set *(otheru->addr) contents,
1342          * as well as otheru->path and otheru->addr itself.
1343          *
1344          * Using smp_store_release() here to set newu->addr
1345          * is enough to make those stores, as well as stores
1346          * to newu->path visible to anyone who gets newu->addr
1347          * by smp_load_acquire().  IOW, the same warranties
1348          * as for unix_sock instances bound in unix_bind() or
1349          * in unix_autobind().
1350          */
1351         if (otheru->path.dentry) {
1352                 path_get(&otheru->path);
1353                 newu->path = otheru->path;
1354         }
1355         refcount_inc(&otheru->addr->refcnt);
1356         smp_store_release(&newu->addr, otheru->addr);
1357
1358         /* Set credentials */
1359         copy_peercred(sk, other);
1360
1361         sock->state     = SS_CONNECTED;
1362         sk->sk_state    = TCP_ESTABLISHED;
1363         sock_hold(newsk);
1364
1365         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1366         unix_peer(sk)   = newsk;
1367
1368         unix_state_unlock(sk);
1369
1370         /* take ten and and send info to listening sock */
1371         spin_lock(&other->sk_receive_queue.lock);
1372         __skb_queue_tail(&other->sk_receive_queue, skb);
1373         spin_unlock(&other->sk_receive_queue.lock);
1374         unix_state_unlock(other);
1375         other->sk_data_ready(other);
1376         sock_put(other);
1377         return 0;
1378
1379 out_unlock:
1380         if (other)
1381                 unix_state_unlock(other);
1382
1383 out:
1384         kfree_skb(skb);
1385         if (newsk)
1386                 unix_release_sock(newsk, 0);
1387         if (other)
1388                 sock_put(other);
1389         return err;
1390 }
1391
1392 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1393 {
1394         struct sock *ska = socka->sk, *skb = sockb->sk;
1395
1396         /* Join our sockets back to back */
1397         sock_hold(ska);
1398         sock_hold(skb);
1399         unix_peer(ska) = skb;
1400         unix_peer(skb) = ska;
1401         init_peercred(ska);
1402         init_peercred(skb);
1403
1404         if (ska->sk_type != SOCK_DGRAM) {
1405                 ska->sk_state = TCP_ESTABLISHED;
1406                 skb->sk_state = TCP_ESTABLISHED;
1407                 socka->state  = SS_CONNECTED;
1408                 sockb->state  = SS_CONNECTED;
1409         }
1410         return 0;
1411 }
1412
1413 static void unix_sock_inherit_flags(const struct socket *old,
1414                                     struct socket *new)
1415 {
1416         if (test_bit(SOCK_PASSCRED, &old->flags))
1417                 set_bit(SOCK_PASSCRED, &new->flags);
1418         if (test_bit(SOCK_PASSSEC, &old->flags))
1419                 set_bit(SOCK_PASSSEC, &new->flags);
1420 }
1421
1422 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1423                        bool kern)
1424 {
1425         struct sock *sk = sock->sk;
1426         struct sock *tsk;
1427         struct sk_buff *skb;
1428         int err;
1429
1430         err = -EOPNOTSUPP;
1431         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1432                 goto out;
1433
1434         err = -EINVAL;
1435         if (sk->sk_state != TCP_LISTEN)
1436                 goto out;
1437
1438         /* If socket state is TCP_LISTEN it cannot change (for now...),
1439          * so that no locks are necessary.
1440          */
1441
1442         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1443         if (!skb) {
1444                 /* This means receive shutdown. */
1445                 if (err == 0)
1446                         err = -EINVAL;
1447                 goto out;
1448         }
1449
1450         tsk = skb->sk;
1451         skb_free_datagram(sk, skb);
1452         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1453
1454         /* attach accepted sock to socket */
1455         unix_state_lock(tsk);
1456         newsock->state = SS_CONNECTED;
1457         unix_sock_inherit_flags(sock, newsock);
1458         sock_graft(tsk, newsock);
1459         unix_state_unlock(tsk);
1460         return 0;
1461
1462 out:
1463         return err;
1464 }
1465
1466
1467 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1468 {
1469         struct sock *sk = sock->sk;
1470         struct unix_address *addr;
1471         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1472         int err = 0;
1473
1474         if (peer) {
1475                 sk = unix_peer_get(sk);
1476
1477                 err = -ENOTCONN;
1478                 if (!sk)
1479                         goto out;
1480                 err = 0;
1481         } else {
1482                 sock_hold(sk);
1483         }
1484
1485         addr = smp_load_acquire(&unix_sk(sk)->addr);
1486         if (!addr) {
1487                 sunaddr->sun_family = AF_UNIX;
1488                 sunaddr->sun_path[0] = 0;
1489                 err = sizeof(short);
1490         } else {
1491                 err = addr->len;
1492                 memcpy(sunaddr, addr->name, addr->len);
1493         }
1494         sock_put(sk);
1495 out:
1496         return err;
1497 }
1498
1499 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1500 {
1501         int i;
1502
1503         scm->fp = UNIXCB(skb).fp;
1504         UNIXCB(skb).fp = NULL;
1505
1506         for (i = scm->fp->count-1; i >= 0; i--)
1507                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1508 }
1509
1510 static void unix_destruct_scm(struct sk_buff *skb)
1511 {
1512         struct scm_cookie scm;
1513         memset(&scm, 0, sizeof(scm));
1514         scm.pid  = UNIXCB(skb).pid;
1515         if (UNIXCB(skb).fp)
1516                 unix_detach_fds(&scm, skb);
1517
1518         /* Alas, it calls VFS */
1519         /* So fscking what? fput() had been SMP-safe since the last Summer */
1520         scm_destroy(&scm);
1521         sock_wfree(skb);
1522 }
1523
1524 /*
1525  * The "user->unix_inflight" variable is protected by the garbage
1526  * collection lock, and we just read it locklessly here. If you go
1527  * over the limit, there might be a tiny race in actually noticing
1528  * it across threads. Tough.
1529  */
1530 static inline bool too_many_unix_fds(struct task_struct *p)
1531 {
1532         struct user_struct *user = current_user();
1533
1534         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1535                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1536         return false;
1537 }
1538
1539 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1540 {
1541         int i;
1542
1543         if (too_many_unix_fds(current))
1544                 return -ETOOMANYREFS;
1545
1546         /*
1547          * Need to duplicate file references for the sake of garbage
1548          * collection.  Otherwise a socket in the fps might become a
1549          * candidate for GC while the skb is not yet queued.
1550          */
1551         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1552         if (!UNIXCB(skb).fp)
1553                 return -ENOMEM;
1554
1555         for (i = scm->fp->count - 1; i >= 0; i--)
1556                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1557         return 0;
1558 }
1559
1560 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1561 {
1562         int err = 0;
1563
1564         UNIXCB(skb).pid  = get_pid(scm->pid);
1565         UNIXCB(skb).uid = scm->creds.uid;
1566         UNIXCB(skb).gid = scm->creds.gid;
1567         UNIXCB(skb).fp = NULL;
1568         unix_get_secdata(scm, skb);
1569         if (scm->fp && send_fds)
1570                 err = unix_attach_fds(scm, skb);
1571
1572         skb->destructor = unix_destruct_scm;
1573         return err;
1574 }
1575
1576 static bool unix_passcred_enabled(const struct socket *sock,
1577                                   const struct sock *other)
1578 {
1579         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1580                !other->sk_socket ||
1581                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1582 }
1583
1584 /*
1585  * Some apps rely on write() giving SCM_CREDENTIALS
1586  * We include credentials if source or destination socket
1587  * asserted SOCK_PASSCRED.
1588  */
1589 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1590                             const struct sock *other)
1591 {
1592         if (UNIXCB(skb).pid)
1593                 return;
1594         if (unix_passcred_enabled(sock, other)) {
1595                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1596                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1597         }
1598 }
1599
1600 static int maybe_init_creds(struct scm_cookie *scm,
1601                             struct socket *socket,
1602                             const struct sock *other)
1603 {
1604         int err;
1605         struct msghdr msg = { .msg_controllen = 0 };
1606
1607         err = scm_send(socket, &msg, scm, false);
1608         if (err)
1609                 return err;
1610
1611         if (unix_passcred_enabled(socket, other)) {
1612                 scm->pid = get_pid(task_tgid(current));
1613                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1614         }
1615         return err;
1616 }
1617
1618 static bool unix_skb_scm_eq(struct sk_buff *skb,
1619                             struct scm_cookie *scm)
1620 {
1621         const struct unix_skb_parms *u = &UNIXCB(skb);
1622
1623         return u->pid == scm->pid &&
1624                uid_eq(u->uid, scm->creds.uid) &&
1625                gid_eq(u->gid, scm->creds.gid) &&
1626                unix_secdata_eq(scm, skb);
1627 }
1628
1629 /*
1630  *      Send AF_UNIX data.
1631  */
1632
1633 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1634                               size_t len)
1635 {
1636         struct sock *sk = sock->sk;
1637         struct net *net = sock_net(sk);
1638         struct unix_sock *u = unix_sk(sk);
1639         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1640         struct sock *other = NULL;
1641         int namelen = 0; /* fake GCC */
1642         int err;
1643         unsigned int hash;
1644         struct sk_buff *skb;
1645         long timeo;
1646         struct scm_cookie scm;
1647         int data_len = 0;
1648         int sk_locked;
1649
1650         wait_for_unix_gc();
1651         err = scm_send(sock, msg, &scm, false);
1652         if (err < 0)
1653                 return err;
1654
1655         err = -EOPNOTSUPP;
1656         if (msg->msg_flags&MSG_OOB)
1657                 goto out;
1658
1659         if (msg->msg_namelen) {
1660                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1661                 if (err < 0)
1662                         goto out;
1663                 namelen = err;
1664         } else {
1665                 sunaddr = NULL;
1666                 err = -ENOTCONN;
1667                 other = unix_peer_get(sk);
1668                 if (!other)
1669                         goto out;
1670         }
1671
1672         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1673             && (err = unix_autobind(sock)) != 0)
1674                 goto out;
1675
1676         err = -EMSGSIZE;
1677         if (len > sk->sk_sndbuf - 32)
1678                 goto out;
1679
1680         if (len > SKB_MAX_ALLOC) {
1681                 data_len = min_t(size_t,
1682                                  len - SKB_MAX_ALLOC,
1683                                  MAX_SKB_FRAGS * PAGE_SIZE);
1684                 data_len = PAGE_ALIGN(data_len);
1685
1686                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1687         }
1688
1689         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1690                                    msg->msg_flags & MSG_DONTWAIT, &err,
1691                                    PAGE_ALLOC_COSTLY_ORDER);
1692         if (skb == NULL)
1693                 goto out;
1694
1695         err = unix_scm_to_skb(&scm, skb, true);
1696         if (err < 0)
1697                 goto out_free;
1698
1699         skb_put(skb, len - data_len);
1700         skb->data_len = data_len;
1701         skb->len = len;
1702         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1703         if (err)
1704                 goto out_free;
1705
1706         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1707
1708 restart:
1709         if (!other) {
1710                 err = -ECONNRESET;
1711                 if (sunaddr == NULL)
1712                         goto out_free;
1713
1714                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1715                                         hash, &err);
1716                 if (other == NULL)
1717                         goto out_free;
1718         }
1719
1720         if (sk_filter(other, skb) < 0) {
1721                 /* Toss the packet but do not return any error to the sender */
1722                 err = len;
1723                 goto out_free;
1724         }
1725
1726         sk_locked = 0;
1727         unix_state_lock(other);
1728 restart_locked:
1729         err = -EPERM;
1730         if (!unix_may_send(sk, other))
1731                 goto out_unlock;
1732
1733         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1734                 /*
1735                  *      Check with 1003.1g - what should
1736                  *      datagram error
1737                  */
1738                 unix_state_unlock(other);
1739                 sock_put(other);
1740
1741                 if (!sk_locked)
1742                         unix_state_lock(sk);
1743
1744                 err = 0;
1745                 if (unix_peer(sk) == other) {
1746                         unix_peer(sk) = NULL;
1747                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1748
1749                         unix_state_unlock(sk);
1750
1751                         unix_dgram_disconnected(sk, other);
1752                         sock_put(other);
1753                         err = -ECONNREFUSED;
1754                 } else {
1755                         unix_state_unlock(sk);
1756                 }
1757
1758                 other = NULL;
1759                 if (err)
1760                         goto out_free;
1761                 goto restart;
1762         }
1763
1764         err = -EPIPE;
1765         if (other->sk_shutdown & RCV_SHUTDOWN)
1766                 goto out_unlock;
1767
1768         if (sk->sk_type != SOCK_SEQPACKET) {
1769                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1770                 if (err)
1771                         goto out_unlock;
1772         }
1773
1774         /* other == sk && unix_peer(other) != sk if
1775          * - unix_peer(sk) == NULL, destination address bound to sk
1776          * - unix_peer(sk) == sk by time of get but disconnected before lock
1777          */
1778         if (other != sk &&
1779             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1780                 if (timeo) {
1781                         timeo = unix_wait_for_peer(other, timeo);
1782
1783                         err = sock_intr_errno(timeo);
1784                         if (signal_pending(current))
1785                                 goto out_free;
1786
1787                         goto restart;
1788                 }
1789
1790                 if (!sk_locked) {
1791                         unix_state_unlock(other);
1792                         unix_state_double_lock(sk, other);
1793                 }
1794
1795                 if (unix_peer(sk) != other ||
1796                     unix_dgram_peer_wake_me(sk, other)) {
1797                         err = -EAGAIN;
1798                         sk_locked = 1;
1799                         goto out_unlock;
1800                 }
1801
1802                 if (!sk_locked) {
1803                         sk_locked = 1;
1804                         goto restart_locked;
1805                 }
1806         }
1807
1808         if (unlikely(sk_locked))
1809                 unix_state_unlock(sk);
1810
1811         if (sock_flag(other, SOCK_RCVTSTAMP))
1812                 __net_timestamp(skb);
1813         maybe_add_creds(skb, sock, other);
1814         skb_queue_tail(&other->sk_receive_queue, skb);
1815         unix_state_unlock(other);
1816         other->sk_data_ready(other);
1817         sock_put(other);
1818         scm_destroy(&scm);
1819         return len;
1820
1821 out_unlock:
1822         if (sk_locked)
1823                 unix_state_unlock(sk);
1824         unix_state_unlock(other);
1825 out_free:
1826         kfree_skb(skb);
1827 out:
1828         if (other)
1829                 sock_put(other);
1830         scm_destroy(&scm);
1831         return err;
1832 }
1833
1834 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1835  * bytes, and a minimum of a full page.
1836  */
1837 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1838
1839 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1840                                size_t len)
1841 {
1842         struct sock *sk = sock->sk;
1843         struct sock *other = NULL;
1844         int err, size;
1845         struct sk_buff *skb;
1846         int sent = 0;
1847         struct scm_cookie scm;
1848         bool fds_sent = false;
1849         int data_len;
1850
1851         wait_for_unix_gc();
1852         err = scm_send(sock, msg, &scm, false);
1853         if (err < 0)
1854                 return err;
1855
1856         err = -EOPNOTSUPP;
1857         if (msg->msg_flags&MSG_OOB)
1858                 goto out_err;
1859
1860         if (msg->msg_namelen) {
1861                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1862                 goto out_err;
1863         } else {
1864                 err = -ENOTCONN;
1865                 other = unix_peer(sk);
1866                 if (!other)
1867                         goto out_err;
1868         }
1869
1870         if (sk->sk_shutdown & SEND_SHUTDOWN)
1871                 goto pipe_err;
1872
1873         while (sent < len) {
1874                 size = len - sent;
1875
1876                 /* Keep two messages in the pipe so it schedules better */
1877                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1878
1879                 /* allow fallback to order-0 allocations */
1880                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1881
1882                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1883
1884                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1885
1886                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1887                                            msg->msg_flags & MSG_DONTWAIT, &err,
1888                                            get_order(UNIX_SKB_FRAGS_SZ));
1889                 if (!skb)
1890                         goto out_err;
1891
1892                 /* Only send the fds in the first buffer */
1893                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1894                 if (err < 0) {
1895                         kfree_skb(skb);
1896                         goto out_err;
1897                 }
1898                 fds_sent = true;
1899
1900                 skb_put(skb, size - data_len);
1901                 skb->data_len = data_len;
1902                 skb->len = size;
1903                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1904                 if (err) {
1905                         kfree_skb(skb);
1906                         goto out_err;
1907                 }
1908
1909                 unix_state_lock(other);
1910
1911                 if (sock_flag(other, SOCK_DEAD) ||
1912                     (other->sk_shutdown & RCV_SHUTDOWN))
1913                         goto pipe_err_free;
1914
1915                 maybe_add_creds(skb, sock, other);
1916                 skb_queue_tail(&other->sk_receive_queue, skb);
1917                 unix_state_unlock(other);
1918                 other->sk_data_ready(other);
1919                 sent += size;
1920         }
1921
1922         scm_destroy(&scm);
1923
1924         return sent;
1925
1926 pipe_err_free:
1927         unix_state_unlock(other);
1928         kfree_skb(skb);
1929 pipe_err:
1930         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1931                 send_sig(SIGPIPE, current, 0);
1932         err = -EPIPE;
1933 out_err:
1934         scm_destroy(&scm);
1935         return sent ? : err;
1936 }
1937
1938 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1939                                     int offset, size_t size, int flags)
1940 {
1941         int err;
1942         bool send_sigpipe = false;
1943         bool init_scm = true;
1944         struct scm_cookie scm;
1945         struct sock *other, *sk = socket->sk;
1946         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1947
1948         if (flags & MSG_OOB)
1949                 return -EOPNOTSUPP;
1950
1951         other = unix_peer(sk);
1952         if (!other || sk->sk_state != TCP_ESTABLISHED)
1953                 return -ENOTCONN;
1954
1955         if (false) {
1956 alloc_skb:
1957                 unix_state_unlock(other);
1958                 mutex_unlock(&unix_sk(other)->iolock);
1959                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1960                                               &err, 0);
1961                 if (!newskb)
1962                         goto err;
1963         }
1964
1965         /* we must acquire iolock as we modify already present
1966          * skbs in the sk_receive_queue and mess with skb->len
1967          */
1968         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1969         if (err) {
1970                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1971                 goto err;
1972         }
1973
1974         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1975                 err = -EPIPE;
1976                 send_sigpipe = true;
1977                 goto err_unlock;
1978         }
1979
1980         unix_state_lock(other);
1981
1982         if (sock_flag(other, SOCK_DEAD) ||
1983             other->sk_shutdown & RCV_SHUTDOWN) {
1984                 err = -EPIPE;
1985                 send_sigpipe = true;
1986                 goto err_state_unlock;
1987         }
1988
1989         if (init_scm) {
1990                 err = maybe_init_creds(&scm, socket, other);
1991                 if (err)
1992                         goto err_state_unlock;
1993                 init_scm = false;
1994         }
1995
1996         skb = skb_peek_tail(&other->sk_receive_queue);
1997         if (tail && tail == skb) {
1998                 skb = newskb;
1999         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2000                 if (newskb) {
2001                         skb = newskb;
2002                 } else {
2003                         tail = skb;
2004                         goto alloc_skb;
2005                 }
2006         } else if (newskb) {
2007                 /* this is fast path, we don't necessarily need to
2008                  * call to kfree_skb even though with newskb == NULL
2009                  * this - does no harm
2010                  */
2011                 consume_skb(newskb);
2012                 newskb = NULL;
2013         }
2014
2015         if (skb_append_pagefrags(skb, page, offset, size)) {
2016                 tail = skb;
2017                 goto alloc_skb;
2018         }
2019
2020         skb->len += size;
2021         skb->data_len += size;
2022         skb->truesize += size;
2023         refcount_add(size, &sk->sk_wmem_alloc);
2024
2025         if (newskb) {
2026                 err = unix_scm_to_skb(&scm, skb, false);
2027                 if (err)
2028                         goto err_state_unlock;
2029                 spin_lock(&other->sk_receive_queue.lock);
2030                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2031                 spin_unlock(&other->sk_receive_queue.lock);
2032         }
2033
2034         unix_state_unlock(other);
2035         mutex_unlock(&unix_sk(other)->iolock);
2036
2037         other->sk_data_ready(other);
2038         scm_destroy(&scm);
2039         return size;
2040
2041 err_state_unlock:
2042         unix_state_unlock(other);
2043 err_unlock:
2044         mutex_unlock(&unix_sk(other)->iolock);
2045 err:
2046         kfree_skb(newskb);
2047         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2048                 send_sig(SIGPIPE, current, 0);
2049         if (!init_scm)
2050                 scm_destroy(&scm);
2051         return err;
2052 }
2053
2054 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2055                                   size_t len)
2056 {
2057         int err;
2058         struct sock *sk = sock->sk;
2059
2060         err = sock_error(sk);
2061         if (err)
2062                 return err;
2063
2064         if (sk->sk_state != TCP_ESTABLISHED)
2065                 return -ENOTCONN;
2066
2067         if (msg->msg_namelen)
2068                 msg->msg_namelen = 0;
2069
2070         return unix_dgram_sendmsg(sock, msg, len);
2071 }
2072
2073 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2074                                   size_t size, int flags)
2075 {
2076         struct sock *sk = sock->sk;
2077
2078         if (sk->sk_state != TCP_ESTABLISHED)
2079                 return -ENOTCONN;
2080
2081         return unix_dgram_recvmsg(sock, msg, size, flags);
2082 }
2083
2084 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2085 {
2086         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2087
2088         if (addr) {
2089                 msg->msg_namelen = addr->len;
2090                 memcpy(msg->msg_name, addr->name, addr->len);
2091         }
2092 }
2093
2094 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2095                               size_t size, int flags)
2096 {
2097         struct scm_cookie scm;
2098         struct sock *sk = sock->sk;
2099         struct unix_sock *u = unix_sk(sk);
2100         struct sk_buff *skb, *last;
2101         long timeo;
2102         int err;
2103         int peeked, skip;
2104
2105         err = -EOPNOTSUPP;
2106         if (flags&MSG_OOB)
2107                 goto out;
2108
2109         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2110
2111         do {
2112                 mutex_lock(&u->iolock);
2113
2114                 skip = sk_peek_offset(sk, flags);
2115                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2116                                               &err, &last);
2117                 if (skb)
2118                         break;
2119
2120                 mutex_unlock(&u->iolock);
2121
2122                 if (err != -EAGAIN)
2123                         break;
2124         } while (timeo &&
2125                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2126
2127         if (!skb) { /* implies iolock unlocked */
2128                 unix_state_lock(sk);
2129                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2130                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2131                     (sk->sk_shutdown & RCV_SHUTDOWN))
2132                         err = 0;
2133                 unix_state_unlock(sk);
2134                 goto out;
2135         }
2136
2137         if (wq_has_sleeper(&u->peer_wait))
2138                 wake_up_interruptible_sync_poll(&u->peer_wait,
2139                                                 EPOLLOUT | EPOLLWRNORM |
2140                                                 EPOLLWRBAND);
2141
2142         if (msg->msg_name)
2143                 unix_copy_addr(msg, skb->sk);
2144
2145         if (size > skb->len - skip)
2146                 size = skb->len - skip;
2147         else if (size < skb->len - skip)
2148                 msg->msg_flags |= MSG_TRUNC;
2149
2150         err = skb_copy_datagram_msg(skb, skip, msg, size);
2151         if (err)
2152                 goto out_free;
2153
2154         if (sock_flag(sk, SOCK_RCVTSTAMP))
2155                 __sock_recv_timestamp(msg, sk, skb);
2156
2157         memset(&scm, 0, sizeof(scm));
2158
2159         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2160         unix_set_secdata(&scm, skb);
2161
2162         if (!(flags & MSG_PEEK)) {
2163                 if (UNIXCB(skb).fp)
2164                         unix_detach_fds(&scm, skb);
2165
2166                 sk_peek_offset_bwd(sk, skb->len);
2167         } else {
2168                 /* It is questionable: on PEEK we could:
2169                    - do not return fds - good, but too simple 8)
2170                    - return fds, and do not return them on read (old strategy,
2171                      apparently wrong)
2172                    - clone fds (I chose it for now, it is the most universal
2173                      solution)
2174
2175                    POSIX 1003.1g does not actually define this clearly
2176                    at all. POSIX 1003.1g doesn't define a lot of things
2177                    clearly however!
2178
2179                 */
2180
2181                 sk_peek_offset_fwd(sk, size);
2182
2183                 if (UNIXCB(skb).fp)
2184                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2185         }
2186         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2187
2188         scm_recv(sock, msg, &scm, flags);
2189
2190 out_free:
2191         skb_free_datagram(sk, skb);
2192         mutex_unlock(&u->iolock);
2193 out:
2194         return err;
2195 }
2196
2197 /*
2198  *      Sleep until more data has arrived. But check for races..
2199  */
2200 static long unix_stream_data_wait(struct sock *sk, long timeo,
2201                                   struct sk_buff *last, unsigned int last_len,
2202                                   bool freezable)
2203 {
2204         struct sk_buff *tail;
2205         DEFINE_WAIT(wait);
2206
2207         unix_state_lock(sk);
2208
2209         for (;;) {
2210                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2211
2212                 tail = skb_peek_tail(&sk->sk_receive_queue);
2213                 if (tail != last ||
2214                     (tail && tail->len != last_len) ||
2215                     sk->sk_err ||
2216                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2217                     signal_pending(current) ||
2218                     !timeo)
2219                         break;
2220
2221                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2222                 unix_state_unlock(sk);
2223                 if (freezable)
2224                         timeo = freezable_schedule_timeout(timeo);
2225                 else
2226                         timeo = schedule_timeout(timeo);
2227                 unix_state_lock(sk);
2228
2229                 if (sock_flag(sk, SOCK_DEAD))
2230                         break;
2231
2232                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2233         }
2234
2235         finish_wait(sk_sleep(sk), &wait);
2236         unix_state_unlock(sk);
2237         return timeo;
2238 }
2239
2240 static unsigned int unix_skb_len(const struct sk_buff *skb)
2241 {
2242         return skb->len - UNIXCB(skb).consumed;
2243 }
2244
2245 struct unix_stream_read_state {
2246         int (*recv_actor)(struct sk_buff *, int, int,
2247                           struct unix_stream_read_state *);
2248         struct socket *socket;
2249         struct msghdr *msg;
2250         struct pipe_inode_info *pipe;
2251         size_t size;
2252         int flags;
2253         unsigned int splice_flags;
2254 };
2255
2256 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2257                                     bool freezable)
2258 {
2259         struct scm_cookie scm;
2260         struct socket *sock = state->socket;
2261         struct sock *sk = sock->sk;
2262         struct unix_sock *u = unix_sk(sk);
2263         int copied = 0;
2264         int flags = state->flags;
2265         int noblock = flags & MSG_DONTWAIT;
2266         bool check_creds = false;
2267         int target;
2268         int err = 0;
2269         long timeo;
2270         int skip;
2271         size_t size = state->size;
2272         unsigned int last_len;
2273
2274         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2275                 err = -EINVAL;
2276                 goto out;
2277         }
2278
2279         if (unlikely(flags & MSG_OOB)) {
2280                 err = -EOPNOTSUPP;
2281                 goto out;
2282         }
2283
2284         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2285         timeo = sock_rcvtimeo(sk, noblock);
2286
2287         memset(&scm, 0, sizeof(scm));
2288
2289         /* Lock the socket to prevent queue disordering
2290          * while sleeps in memcpy_tomsg
2291          */
2292         mutex_lock(&u->iolock);
2293
2294         skip = max(sk_peek_offset(sk, flags), 0);
2295
2296         do {
2297                 int chunk;
2298                 bool drop_skb;
2299                 struct sk_buff *skb, *last;
2300
2301 redo:
2302                 unix_state_lock(sk);
2303                 if (sock_flag(sk, SOCK_DEAD)) {
2304                         err = -ECONNRESET;
2305                         goto unlock;
2306                 }
2307                 last = skb = skb_peek(&sk->sk_receive_queue);
2308                 last_len = last ? last->len : 0;
2309 again:
2310                 if (skb == NULL) {
2311                         if (copied >= target)
2312                                 goto unlock;
2313
2314                         /*
2315                          *      POSIX 1003.1g mandates this order.
2316                          */
2317
2318                         err = sock_error(sk);
2319                         if (err)
2320                                 goto unlock;
2321                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2322                                 goto unlock;
2323
2324                         unix_state_unlock(sk);
2325                         if (!timeo) {
2326                                 err = -EAGAIN;
2327                                 break;
2328                         }
2329
2330                         mutex_unlock(&u->iolock);
2331
2332                         timeo = unix_stream_data_wait(sk, timeo, last,
2333                                                       last_len, freezable);
2334
2335                         if (signal_pending(current)) {
2336                                 err = sock_intr_errno(timeo);
2337                                 scm_destroy(&scm);
2338                                 goto out;
2339                         }
2340
2341                         mutex_lock(&u->iolock);
2342                         goto redo;
2343 unlock:
2344                         unix_state_unlock(sk);
2345                         break;
2346                 }
2347
2348                 while (skip >= unix_skb_len(skb)) {
2349                         skip -= unix_skb_len(skb);
2350                         last = skb;
2351                         last_len = skb->len;
2352                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2353                         if (!skb)
2354                                 goto again;
2355                 }
2356
2357                 unix_state_unlock(sk);
2358
2359                 if (check_creds) {
2360                         /* Never glue messages from different writers */
2361                         if (!unix_skb_scm_eq(skb, &scm))
2362                                 break;
2363                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2364                         /* Copy credentials */
2365                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2366                         unix_set_secdata(&scm, skb);
2367                         check_creds = true;
2368                 }
2369
2370                 /* Copy address just once */
2371                 if (state->msg && state->msg->msg_name) {
2372                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2373                                          state->msg->msg_name);
2374                         unix_copy_addr(state->msg, skb->sk);
2375                         sunaddr = NULL;
2376                 }
2377
2378                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2379                 skb_get(skb);
2380                 chunk = state->recv_actor(skb, skip, chunk, state);
2381                 drop_skb = !unix_skb_len(skb);
2382                 /* skb is only safe to use if !drop_skb */
2383                 consume_skb(skb);
2384                 if (chunk < 0) {
2385                         if (copied == 0)
2386                                 copied = -EFAULT;
2387                         break;
2388                 }
2389                 copied += chunk;
2390                 size -= chunk;
2391
2392                 if (drop_skb) {
2393                         /* the skb was touched by a concurrent reader;
2394                          * we should not expect anything from this skb
2395                          * anymore and assume it invalid - we can be
2396                          * sure it was dropped from the socket queue
2397                          *
2398                          * let's report a short read
2399                          */
2400                         err = 0;
2401                         break;
2402                 }
2403
2404                 /* Mark read part of skb as used */
2405                 if (!(flags & MSG_PEEK)) {
2406                         UNIXCB(skb).consumed += chunk;
2407
2408                         sk_peek_offset_bwd(sk, chunk);
2409
2410                         if (UNIXCB(skb).fp)
2411                                 unix_detach_fds(&scm, skb);
2412
2413                         if (unix_skb_len(skb))
2414                                 break;
2415
2416                         skb_unlink(skb, &sk->sk_receive_queue);
2417                         consume_skb(skb);
2418
2419                         if (scm.fp)
2420                                 break;
2421                 } else {
2422                         /* It is questionable, see note in unix_dgram_recvmsg.
2423                          */
2424                         if (UNIXCB(skb).fp)
2425                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2426
2427                         sk_peek_offset_fwd(sk, chunk);
2428
2429                         if (UNIXCB(skb).fp)
2430                                 break;
2431
2432                         skip = 0;
2433                         last = skb;
2434                         last_len = skb->len;
2435                         unix_state_lock(sk);
2436                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2437                         if (skb)
2438                                 goto again;
2439                         unix_state_unlock(sk);
2440                         break;
2441                 }
2442         } while (size);
2443
2444         mutex_unlock(&u->iolock);
2445         if (state->msg)
2446                 scm_recv(sock, state->msg, &scm, flags);
2447         else
2448                 scm_destroy(&scm);
2449 out:
2450         return copied ? : err;
2451 }
2452
2453 static int unix_stream_read_actor(struct sk_buff *skb,
2454                                   int skip, int chunk,
2455                                   struct unix_stream_read_state *state)
2456 {
2457         int ret;
2458
2459         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2460                                     state->msg, chunk);
2461         return ret ?: chunk;
2462 }
2463
2464 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2465                                size_t size, int flags)
2466 {
2467         struct unix_stream_read_state state = {
2468                 .recv_actor = unix_stream_read_actor,
2469                 .socket = sock,
2470                 .msg = msg,
2471                 .size = size,
2472                 .flags = flags
2473         };
2474
2475         return unix_stream_read_generic(&state, true);
2476 }
2477
2478 static int unix_stream_splice_actor(struct sk_buff *skb,
2479                                     int skip, int chunk,
2480                                     struct unix_stream_read_state *state)
2481 {
2482         return skb_splice_bits(skb, state->socket->sk,
2483                                UNIXCB(skb).consumed + skip,
2484                                state->pipe, chunk, state->splice_flags);
2485 }
2486
2487 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2488                                        struct pipe_inode_info *pipe,
2489                                        size_t size, unsigned int flags)
2490 {
2491         struct unix_stream_read_state state = {
2492                 .recv_actor = unix_stream_splice_actor,
2493                 .socket = sock,
2494                 .pipe = pipe,
2495                 .size = size,
2496                 .splice_flags = flags,
2497         };
2498
2499         if (unlikely(*ppos))
2500                 return -ESPIPE;
2501
2502         if (sock->file->f_flags & O_NONBLOCK ||
2503             flags & SPLICE_F_NONBLOCK)
2504                 state.flags = MSG_DONTWAIT;
2505
2506         return unix_stream_read_generic(&state, false);
2507 }
2508
2509 static int unix_shutdown(struct socket *sock, int mode)
2510 {
2511         struct sock *sk = sock->sk;
2512         struct sock *other;
2513
2514         if (mode < SHUT_RD || mode > SHUT_RDWR)
2515                 return -EINVAL;
2516         /* This maps:
2517          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2518          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2519          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2520          */
2521         ++mode;
2522
2523         unix_state_lock(sk);
2524         sk->sk_shutdown |= mode;
2525         other = unix_peer(sk);
2526         if (other)
2527                 sock_hold(other);
2528         unix_state_unlock(sk);
2529         sk->sk_state_change(sk);
2530
2531         if (other &&
2532                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2533
2534                 int peer_mode = 0;
2535
2536                 if (mode&RCV_SHUTDOWN)
2537                         peer_mode |= SEND_SHUTDOWN;
2538                 if (mode&SEND_SHUTDOWN)
2539                         peer_mode |= RCV_SHUTDOWN;
2540                 unix_state_lock(other);
2541                 other->sk_shutdown |= peer_mode;
2542                 unix_state_unlock(other);
2543                 other->sk_state_change(other);
2544                 if (peer_mode == SHUTDOWN_MASK)
2545                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2546                 else if (peer_mode & RCV_SHUTDOWN)
2547                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2548         }
2549         if (other)
2550                 sock_put(other);
2551
2552         return 0;
2553 }
2554
2555 long unix_inq_len(struct sock *sk)
2556 {
2557         struct sk_buff *skb;
2558         long amount = 0;
2559
2560         if (sk->sk_state == TCP_LISTEN)
2561                 return -EINVAL;
2562
2563         spin_lock(&sk->sk_receive_queue.lock);
2564         if (sk->sk_type == SOCK_STREAM ||
2565             sk->sk_type == SOCK_SEQPACKET) {
2566                 skb_queue_walk(&sk->sk_receive_queue, skb)
2567                         amount += unix_skb_len(skb);
2568         } else {
2569                 skb = skb_peek(&sk->sk_receive_queue);
2570                 if (skb)
2571                         amount = skb->len;
2572         }
2573         spin_unlock(&sk->sk_receive_queue.lock);
2574
2575         return amount;
2576 }
2577 EXPORT_SYMBOL_GPL(unix_inq_len);
2578
2579 long unix_outq_len(struct sock *sk)
2580 {
2581         return sk_wmem_alloc_get(sk);
2582 }
2583 EXPORT_SYMBOL_GPL(unix_outq_len);
2584
2585 static int unix_open_file(struct sock *sk)
2586 {
2587         struct path path;
2588         struct file *f;
2589         int fd;
2590
2591         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2592                 return -EPERM;
2593
2594         if (!smp_load_acquire(&unix_sk(sk)->addr))
2595                 return -ENOENT;
2596
2597         path = unix_sk(sk)->path;
2598         if (!path.dentry)
2599                 return -ENOENT;
2600
2601         path_get(&path);
2602
2603         fd = get_unused_fd_flags(O_CLOEXEC);
2604         if (fd < 0)
2605                 goto out;
2606
2607         f = dentry_open(&path, O_PATH, current_cred());
2608         if (IS_ERR(f)) {
2609                 put_unused_fd(fd);
2610                 fd = PTR_ERR(f);
2611                 goto out;
2612         }
2613
2614         fd_install(fd, f);
2615 out:
2616         path_put(&path);
2617
2618         return fd;
2619 }
2620
2621 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2622 {
2623         struct sock *sk = sock->sk;
2624         long amount = 0;
2625         int err;
2626
2627         switch (cmd) {
2628         case SIOCOUTQ:
2629                 amount = unix_outq_len(sk);
2630                 err = put_user(amount, (int __user *)arg);
2631                 break;
2632         case SIOCINQ:
2633                 amount = unix_inq_len(sk);
2634                 if (amount < 0)
2635                         err = amount;
2636                 else
2637                         err = put_user(amount, (int __user *)arg);
2638                 break;
2639         case SIOCUNIXFILE:
2640                 err = unix_open_file(sk);
2641                 break;
2642         default:
2643                 err = -ENOIOCTLCMD;
2644                 break;
2645         }
2646         return err;
2647 }
2648
2649 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2650 {
2651         struct sock *sk = sock->sk;
2652         __poll_t mask;
2653
2654         sock_poll_wait(file, sock, wait);
2655         mask = 0;
2656
2657         /* exceptional events? */
2658         if (sk->sk_err)
2659                 mask |= EPOLLERR;
2660         if (sk->sk_shutdown == SHUTDOWN_MASK)
2661                 mask |= EPOLLHUP;
2662         if (sk->sk_shutdown & RCV_SHUTDOWN)
2663                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2664
2665         /* readable? */
2666         if (!skb_queue_empty(&sk->sk_receive_queue))
2667                 mask |= EPOLLIN | EPOLLRDNORM;
2668
2669         /* Connection-based need to check for termination and startup */
2670         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2671             sk->sk_state == TCP_CLOSE)
2672                 mask |= EPOLLHUP;
2673
2674         /*
2675          * we set writable also when the other side has shut down the
2676          * connection. This prevents stuck sockets.
2677          */
2678         if (unix_writable(sk))
2679                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2680
2681         return mask;
2682 }
2683
2684 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2685                                     poll_table *wait)
2686 {
2687         struct sock *sk = sock->sk, *other;
2688         unsigned int writable;
2689         __poll_t mask;
2690
2691         sock_poll_wait(file, sock, wait);
2692         mask = 0;
2693
2694         /* exceptional events? */
2695         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2696                 mask |= EPOLLERR |
2697                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2698
2699         if (sk->sk_shutdown & RCV_SHUTDOWN)
2700                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2701         if (sk->sk_shutdown == SHUTDOWN_MASK)
2702                 mask |= EPOLLHUP;
2703
2704         /* readable? */
2705         if (!skb_queue_empty(&sk->sk_receive_queue))
2706                 mask |= EPOLLIN | EPOLLRDNORM;
2707
2708         /* Connection-based need to check for termination and startup */
2709         if (sk->sk_type == SOCK_SEQPACKET) {
2710                 if (sk->sk_state == TCP_CLOSE)
2711                         mask |= EPOLLHUP;
2712                 /* connection hasn't started yet? */
2713                 if (sk->sk_state == TCP_SYN_SENT)
2714                         return mask;
2715         }
2716
2717         /* No write status requested, avoid expensive OUT tests. */
2718         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2719                 return mask;
2720
2721         writable = unix_writable(sk);
2722         if (writable) {
2723                 unix_state_lock(sk);
2724
2725                 other = unix_peer(sk);
2726                 if (other && unix_peer(other) != sk &&
2727                     unix_recvq_full(other) &&
2728                     unix_dgram_peer_wake_me(sk, other))
2729                         writable = 0;
2730
2731                 unix_state_unlock(sk);
2732         }
2733
2734         if (writable)
2735                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2736         else
2737                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2738
2739         return mask;
2740 }
2741
2742 #ifdef CONFIG_PROC_FS
2743
2744 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2745
2746 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2747 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2748 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2749
2750 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2751 {
2752         unsigned long offset = get_offset(*pos);
2753         unsigned long bucket = get_bucket(*pos);
2754         struct sock *sk;
2755         unsigned long count = 0;
2756
2757         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2758                 if (sock_net(sk) != seq_file_net(seq))
2759                         continue;
2760                 if (++count == offset)
2761                         break;
2762         }
2763
2764         return sk;
2765 }
2766
2767 static struct sock *unix_next_socket(struct seq_file *seq,
2768                                      struct sock *sk,
2769                                      loff_t *pos)
2770 {
2771         unsigned long bucket;
2772
2773         while (sk > (struct sock *)SEQ_START_TOKEN) {
2774                 sk = sk_next(sk);
2775                 if (!sk)
2776                         goto next_bucket;
2777                 if (sock_net(sk) == seq_file_net(seq))
2778                         return sk;
2779         }
2780
2781         do {
2782                 sk = unix_from_bucket(seq, pos);
2783                 if (sk)
2784                         return sk;
2785
2786 next_bucket:
2787                 bucket = get_bucket(*pos) + 1;
2788                 *pos = set_bucket_offset(bucket, 1);
2789         } while (bucket < ARRAY_SIZE(unix_socket_table));
2790
2791         return NULL;
2792 }
2793
2794 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2795         __acquires(unix_table_lock)
2796 {
2797         spin_lock(&unix_table_lock);
2798
2799         if (!*pos)
2800                 return SEQ_START_TOKEN;
2801
2802         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2803                 return NULL;
2804
2805         return unix_next_socket(seq, NULL, pos);
2806 }
2807
2808 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2809 {
2810         ++*pos;
2811         return unix_next_socket(seq, v, pos);
2812 }
2813
2814 static void unix_seq_stop(struct seq_file *seq, void *v)
2815         __releases(unix_table_lock)
2816 {
2817         spin_unlock(&unix_table_lock);
2818 }
2819
2820 static int unix_seq_show(struct seq_file *seq, void *v)
2821 {
2822
2823         if (v == SEQ_START_TOKEN)
2824                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2825                          "Inode Path\n");
2826         else {
2827                 struct sock *s = v;
2828                 struct unix_sock *u = unix_sk(s);
2829                 unix_state_lock(s);
2830
2831                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2832                         s,
2833                         refcount_read(&s->sk_refcnt),
2834                         0,
2835                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2836                         s->sk_type,
2837                         s->sk_socket ?
2838                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2839                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2840                         sock_i_ino(s));
2841
2842                 if (u->addr) {  // under unix_table_lock here
2843                         int i, len;
2844                         seq_putc(seq, ' ');
2845
2846                         i = 0;
2847                         len = u->addr->len - sizeof(short);
2848                         if (!UNIX_ABSTRACT(s))
2849                                 len--;
2850                         else {
2851                                 seq_putc(seq, '@');
2852                                 i++;
2853                         }
2854                         for ( ; i < len; i++)
2855                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2856                                          '@');
2857                 }
2858                 unix_state_unlock(s);
2859                 seq_putc(seq, '\n');
2860         }
2861
2862         return 0;
2863 }
2864
2865 static const struct seq_operations unix_seq_ops = {
2866         .start  = unix_seq_start,
2867         .next   = unix_seq_next,
2868         .stop   = unix_seq_stop,
2869         .show   = unix_seq_show,
2870 };
2871 #endif
2872
2873 static const struct net_proto_family unix_family_ops = {
2874         .family = PF_UNIX,
2875         .create = unix_create,
2876         .owner  = THIS_MODULE,
2877 };
2878
2879
2880 static int __net_init unix_net_init(struct net *net)
2881 {
2882         int error = -ENOMEM;
2883
2884         net->unx.sysctl_max_dgram_qlen = 10;
2885         if (unix_sysctl_register(net))
2886                 goto out;
2887
2888 #ifdef CONFIG_PROC_FS
2889         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2890                         sizeof(struct seq_net_private))) {
2891                 unix_sysctl_unregister(net);
2892                 goto out;
2893         }
2894 #endif
2895         error = 0;
2896 out:
2897         return error;
2898 }
2899
2900 static void __net_exit unix_net_exit(struct net *net)
2901 {
2902         unix_sysctl_unregister(net);
2903         remove_proc_entry("unix", net->proc_net);
2904 }
2905
2906 static struct pernet_operations unix_net_ops = {
2907         .init = unix_net_init,
2908         .exit = unix_net_exit,
2909 };
2910
2911 static int __init af_unix_init(void)
2912 {
2913         int rc = -1;
2914
2915         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2916
2917         rc = proto_register(&unix_proto, 1);
2918         if (rc != 0) {
2919                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2920                 goto out;
2921         }
2922
2923         sock_register(&unix_family_ops);
2924         register_pernet_subsys(&unix_net_ops);
2925 out:
2926         return rc;
2927 }
2928
2929 static void __exit af_unix_exit(void)
2930 {
2931         sock_unregister(PF_UNIX);
2932         proto_unregister(&unix_proto);
2933         unregister_pernet_subsys(&unix_net_ops);
2934 }
2935
2936 /* Earlier than device_initcall() so that other drivers invoking
2937    request_module() don't end up in a loop when modprobe tries
2938    to use a UNIX socket. But later than subsys_initcall() because
2939    we depend on stuff initialised there */
2940 fs_initcall(af_unix_init);
2941 module_exit(af_unix_exit);
2942
2943 MODULE_LICENSE("GPL");
2944 MODULE_ALIAS_NETPROTO(PF_UNIX);